summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/net
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/net')
-rw-r--r--tools/testing/selftests/net/.gitignore13
-rw-r--r--tools/testing/selftests/net/Makefile283
-rw-r--r--tools/testing/selftests/net/af_unix/.gitignore8
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile14
-rw-r--r--tools/testing/selftests/net/af_unix/config2
-rw-r--r--tools/testing/selftests/net/af_unix/diag_uid.c2
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c144
-rw-r--r--tools/testing/selftests/net/af_unix/scm_inq.c123
-rw-r--r--tools/testing/selftests/net/af_unix/scm_pidfd.c217
-rw-r--r--tools/testing/selftests/net/af_unix/scm_rights.c110
-rw-r--r--tools/testing/selftests/net/af_unix/so_peek_off.c162
-rw-r--r--tools/testing/selftests/net/af_unix/unix_connect.c2
-rw-r--r--tools/testing/selftests/net/af_unix/unix_connreset.c180
-rwxr-xr-xtools/testing/selftests/net/amt.sh20
-rwxr-xr-xtools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh2
-rwxr-xr-xtools/testing/selftests/net/bareudp.sh51
-rw-r--r--tools/testing/selftests/net/bench/Makefile7
-rw-r--r--tools/testing/selftests/net/bench/page_pool/Makefile17
-rw-r--r--tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c267
-rw-r--r--tools/testing/selftests/net/bench/page_pool/time_bench.c394
-rw-r--r--tools/testing/selftests/net/bench/page_pool/time_bench.h238
-rwxr-xr-xtools/testing/selftests/net/bench/test_bench_page_pool.sh32
-rw-r--r--tools/testing/selftests/net/bind_bhash.c4
-rw-r--r--tools/testing/selftests/net/bind_timewait.c2
-rw-r--r--tools/testing/selftests/net/bind_wildcard.c2
-rwxr-xr-xtools/testing/selftests/net/bpf_offload.py23
-rwxr-xr-xtools/testing/selftests/net/broadcast_ether_dst.sh83
-rwxr-xr-xtools/testing/selftests/net/broadcast_pmtu.sh47
-rwxr-xr-xtools/testing/selftests/net/busy_poll_test.sh26
-rw-r--r--tools/testing/selftests/net/busy_poller.c104
-rw-r--r--tools/testing/selftests/net/can/.gitignore2
-rw-r--r--tools/testing/selftests/net/can/Makefile11
-rw-r--r--tools/testing/selftests/net/can/config3
-rw-r--r--tools/testing/selftests/net/can/test_raw_filter.c405
-rwxr-xr-xtools/testing/selftests/net/can/test_raw_filter.sh45
-rwxr-xr-xtools/testing/selftests/net/cmsg_ip.sh187
-rwxr-xr-xtools/testing/selftests/net/cmsg_ipv6.sh154
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c135
-rwxr-xr-xtools/testing/selftests/net/cmsg_so_priority.sh151
-rwxr-xr-xtools/testing/selftests/net/cmsg_time.sh35
-rw-r--r--tools/testing/selftests/net/config131
-rw-r--r--tools/testing/selftests/net/epoll_busy_poll.c2
-rwxr-xr-xtools/testing/selftests/net/fcnal-ipv4.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-ipv6.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-other.sh2
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh439
-rwxr-xr-xtools/testing/selftests/net/fdb_flush.sh2
-rwxr-xr-xtools/testing/selftests/net/fdb_notify.sh26
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh61
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh142
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh187
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile59
-rw-r--r--tools/testing/selftests/net/forwarding/README17
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_activity_notify.sh170
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh387
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_igmp.sh80
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh102
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mld.sh81
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh96
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh25
-rw-r--r--tools/testing/selftests/net/forwarding/config29
-rwxr-xr-xtools/testing/selftests/net/forwarding/custom_multipath_hash.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh2
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh140
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib_sh_test.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh3
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh3
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh29
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_lag.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multicast.sh35
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_ets.sh1
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_core.sh9
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_tests.sh8
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_red.sh12
-rw-r--r--tools/testing/selftests/net/forwarding/sch_tbf_core.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh52
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_port_range.sh46
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_taprio.sh421
-rw-r--r--tools/testing/selftests/net/forwarding/tsn_lib.sh26
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh10
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh15
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh766
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_reserved.sh347
-rwxr-xr-xtools/testing/selftests/net/gre_ipv6_lladdr.sh184
-rw-r--r--tools/testing/selftests/net/gro.c1328
-rwxr-xr-xtools/testing/selftests/net/gro.sh104
-rw-r--r--tools/testing/selftests/net/hsr/Makefile6
-rw-r--r--tools/testing/selftests/net/hsr/config4
-rwxr-xr-xtools/testing/selftests/net/icmp_redirect.sh2
-rw-r--r--tools/testing/selftests/net/io_uring_zerocopy_tx.c24
-rw-r--r--tools/testing/selftests/net/ip_local_port_range.c2
-rwxr-xr-xtools/testing/selftests/net/ip_local_port_range.sh4
-rw-r--r--tools/testing/selftests/net/ipsec.c5
-rwxr-xr-xtools/testing/selftests/net/ipv6_force_forwarding.sh105
-rw-r--r--tools/testing/selftests/net/ipv6_fragmentation.c114
-rwxr-xr-xtools/testing/selftests/net/ipv6_route_update_soft_lockup.sh1
-rw-r--r--tools/testing/selftests/net/lib.sh226
-rw-r--r--tools/testing/selftests/net/lib/.gitignore1
-rw-r--r--tools/testing/selftests/net/lib/Makefile17
-rw-r--r--tools/testing/selftests/net/lib/ksft.h56
-rwxr-xr-xtools/testing/selftests/net/lib/ksft_setup_loopback.sh111
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py36
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py154
-rw-r--r--tools/testing/selftests/net/lib/py/netns.py18
-rw-r--r--tools/testing/selftests/net/lib/py/nsim.py2
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py173
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py36
-rw-r--r--tools/testing/selftests/net/lib/sh/defer.sh20
-rw-r--r--tools/testing/selftests/net/lib/xdp_dummy.bpf.c (renamed from tools/testing/selftests/net/xdp_dummy.bpf.c)6
-rw-r--r--tools/testing/selftests/net/lib/xdp_helper.c131
-rw-r--r--tools/testing/selftests/net/lib/xdp_native.bpf.c680
-rwxr-xr-xtools/testing/selftests/net/link_netns.py141
-rwxr-xr-xtools/testing/selftests/net/lwt_dst_cache_ref_loop.sh246
-rw-r--r--tools/testing/selftests/net/mptcp/.gitignore1
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile39
-rw-r--r--tools/testing/selftests/net/mptcp/config44
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh60
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c113
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh163
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh5
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh5
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_diag.c435
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_inq.c28
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh631
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh102
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c44
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh42
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh6
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c25
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh49
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh49
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c24
-rwxr-xr-xtools/testing/selftests/net/msg_zerocopy.sh84
-rwxr-xr-xtools/testing/selftests/net/nat6to4.sh15
-rw-r--r--tools/testing/selftests/net/net_helper.sh25
-rwxr-xr-xtools/testing/selftests/net/netdev-l2addr.sh59
-rw-r--r--tools/testing/selftests/net/netfilter/.gitignore1
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile86
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter.sh10
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter_queue.sh7
-rwxr-xr-xtools/testing/selftests/net/netfilter/bridge_brouter.sh2
-rw-r--r--tools/testing/selftests/net/netfilter/config49
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_clash.sh174
-rw-r--r--tools/testing/selftests/net/netfilter/conntrack_dump_flush.c2
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_resize.sh515
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_vrf.sh37
-rwxr-xr-xtools/testing/selftests/net/netfilter/ipvs.sh8
-rwxr-xr-xtools/testing/selftests/net/netfilter/nf_nat_edemux.sh58
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh361
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_fib.sh638
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_flowtable.sh213
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_interface_stress.sh157
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat.sh85
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat_zones.sh2
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh39
-rwxr-xr-xtools/testing/selftests/net/netfilter/rpath.sh36
-rw-r--r--tools/testing/selftests/net/netfilter/sctp_collision.c3
-rw-r--r--tools/testing/selftests/net/netfilter/udpclash.c158
-rw-r--r--tools/testing/selftests/net/netlink-dumps.c155
-rwxr-xr-xtools/testing/selftests/net/netns-name.sh23
-rw-r--r--tools/testing/selftests/net/nettest.c12
-rwxr-xr-xtools/testing/selftests/net/nl_netdev.py160
-rw-r--r--tools/testing/selftests/net/openvswitch/Makefile2
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh105
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py2
-rw-r--r--tools/testing/selftests/net/ovpn/.gitignore2
-rw-r--r--tools/testing/selftests/net/ovpn/Makefile34
-rw-r--r--tools/testing/selftests/net/ovpn/common.sh108
-rw-r--r--tools/testing/selftests/net/ovpn/config10
-rw-r--r--tools/testing/selftests/net/ovpn/data64.key5
-rw-r--r--tools/testing/selftests/net/ovpn/ovpn-cli.c2387
-rw-r--r--tools/testing/selftests/net/ovpn/tcp_peers.txt5
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-chachapoly.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket-tcp.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket.sh45
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-float.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-large-mtu.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-tcp.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test.sh117
-rw-r--r--tools/testing/selftests/net/ovpn/udp_peers.txt6
-rw-r--r--tools/testing/selftests/net/packetdrill/Makefile10
-rw-r--r--tools/testing/selftests/net/packetdrill/config4
-rwxr-xr-xtools/testing/selftests/net/packetdrill/defaults.sh3
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh58
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt31
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt35
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt36
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt45
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt72
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt36
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt72
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt43
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt41
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt46
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt49
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt74
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt53
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt40
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt43
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt53
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt27
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt44
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt33
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt54
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt64
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt62
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt42
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt34
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt54
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt92
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt91
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt145
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt39
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt3
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt2
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt2
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh10
-rw-r--r--tools/testing/selftests/net/proc_net_pktgen.c690
-rw-r--r--tools/testing/selftests/net/psock_fanout.c2
-rw-r--r--tools/testing/selftests/net/psock_lib.h4
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c8
-rw-r--r--tools/testing/selftests/net/rds/Makefile13
-rw-r--r--tools/testing/selftests/net/reuseaddr_ports_exhausted.c4
-rw-r--r--tools/testing/selftests/net/reuseport_addr_any.c36
-rw-r--r--tools/testing/selftests/net/reuseport_bpf.c2
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_numa.c2
-rwxr-xr-xtools/testing/selftests/net/route_hint.sh79
-rwxr-xr-xtools/testing/selftests/net/rps_default_mask.sh12
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.py30
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh224
-rwxr-xr-xtools/testing/selftests/net/rtnetlink_notification.sh112
-rw-r--r--tools/testing/selftests/net/rxtimestamp.c2
-rw-r--r--tools/testing/selftests/net/sctp_hello.c17
-rwxr-xr-xtools/testing/selftests/net/sctp_vrf.sh73
-rw-r--r--tools/testing/selftests/net/setup_loopback.sh120
-rw-r--r--tools/testing/selftests/net/setup_veth.sh44
-rw-r--r--tools/testing/selftests/net/sk_so_peek_off.c2
-rw-r--r--tools/testing/selftests/net/skf_net_off.c244
-rwxr-xr-xtools/testing/selftests/net/skf_net_off.sh30
-rw-r--r--tools/testing/selftests/net/so_incoming_cpu.c2
-rw-r--r--tools/testing/selftests/net/so_rcv_listener.c168
-rw-r--r--tools/testing/selftests/net/so_txtime.c2
-rw-r--r--tools/testing/selftests/net/socket.c13
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh5
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh5
-rwxr-xr-xtools/testing/selftests/net/srv6_end_flavors_test.sh4
-rwxr-xr-xtools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh79
-rwxr-xr-xtools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh133
-rwxr-xr-xtools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh76
-rwxr-xr-xtools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh85
-rw-r--r--tools/testing/selftests/net/tap.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/config2
-rw-r--r--tools/testing/selftests/net/tcp_ao/connect-deny.c58
-rw-r--r--tools/testing/selftests/net/tcp_ao/connect.c22
-rw-r--r--tools/testing/selftests/net/tcp_ao/icmps-discard.c17
-rw-r--r--tools/testing/selftests/net/tcp_ao/key-management.c76
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/aolib.h114
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c7
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/setup.c2
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/sock.c315
-rw-r--r--tools/testing/selftests/net/tcp_ao/restore.c75
-rw-r--r--tools/testing/selftests/net/tcp_ao/rst.c47
-rw-r--r--tools/testing/selftests/net/tcp_ao/self-connect.c21
-rw-r--r--tools/testing/selftests/net/tcp_ao/seq-ext.c32
-rw-r--r--tools/testing/selftests/net/tcp_ao/unsigned-md5.c118
-rw-r--r--tools/testing/selftests/net/tcp_fastopen_backup_key.c2
-rw-r--r--tools/testing/selftests/net/tcp_port_share.c258
-rwxr-xr-xtools/testing/selftests/net/test_blackhole_dev.sh11
-rwxr-xr-xtools/testing/selftests/net/test_bridge_backup_port.sh31
-rwxr-xr-xtools/testing/selftests/net/test_bridge_neigh_suppress.sh125
-rwxr-xr-xtools/testing/selftests/net/test_neigh.sh366
-rwxr-xr-xtools/testing/selftests/net/test_so_rcv.sh73
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_fdb_changelink.sh111
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_nh.sh223
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_vnifiltering.sh9
-rw-r--r--tools/testing/selftests/net/tfo.c171
-rwxr-xr-xtools/testing/selftests/net/tfo_passive.sh112
-rw-r--r--tools/testing/selftests/net/tls.c1104
-rw-r--r--tools/testing/selftests/net/toeplitz.c589
-rwxr-xr-xtools/testing/selftests/net/toeplitz.sh199
-rwxr-xr-xtools/testing/selftests/net/toeplitz_client.sh28
-rwxr-xr-xtools/testing/selftests/net/traceroute.sh561
-rw-r--r--tools/testing/selftests/net/tun.c2
-rw-r--r--tools/testing/selftests/net/txtimestamp.c2
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh10
-rwxr-xr-xtools/testing/selftests/net/udpgro_bench.sh4
-rwxr-xr-xtools/testing/selftests/net/udpgro_frglist.sh4
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh4
-rw-r--r--tools/testing/selftests/net/udpgso.c26
-rwxr-xr-xtools/testing/selftests/net/udpgso_bench.sh3
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c2
-rwxr-xr-xtools/testing/selftests/net/veth.sh2
-rwxr-xr-xtools/testing/selftests/net/vlan_bridge_binding.sh258
-rwxr-xr-xtools/testing/selftests/net/vlan_hw_filter.sh98
-rwxr-xr-xtools/testing/selftests/net/vrf_route_leaking.sh4
-rw-r--r--tools/testing/selftests/net/ynl.mk11
350 files changed, 25090 insertions, 4983 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 28a715a8ef2b..6930fe926c58 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -4,10 +4,8 @@ bind_timewait
bind_wildcard
busy_poller
cmsg_sender
-diag_uid
epoll_busy_poll
fin_ack_lat
-gro
hwtstamp_config
io_uring_zerocopy_tx
ioam6_parser
@@ -16,11 +14,12 @@ ip_local_port_range
ipsec
ipv6_flowlabel
ipv6_flowlabel_mgr
+ipv6_fragmentation
log.txt
-msg_oob
msg_zerocopy
netlink-dumps
nettest
+proc_net_pktgen
psock_fanout
psock_snd
psock_tpacket
@@ -33,23 +32,24 @@ reuseport_bpf_numa
reuseport_dualstack
rxtimestamp
sctp_hello
-scm_pidfd
-scm_rights
sk_bind_sendto_listen
sk_connect_zero_addr
sk_so_peek_off
+skf_net_off
socket
so_incoming_cpu
so_netns_cookie
so_txtime
+so_rcv_listener
stress_reuseport_listen
tap
tcp_fastopen_backup_key
tcp_inq
tcp_mmap
+tcp_port_share
+tfo
timestamping
tls
-toeplitz
tools
tun
txring_overwrite
@@ -57,4 +57,3 @@ txtimestamp
udpgso
udpgso_bench_rx
udpgso_bench_tx
-unix_connect
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3d487b03c4a0..b66ba04f19d9 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -1,111 +1,200 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for net selftests
-CFLAGS += -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES)
# Additional include paths needed by kselftest.h
CFLAGS += -I../
-TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
- rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh
-TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh
-TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
-TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
-TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
-TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
-TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh
-TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh
-TEST_PROGS += route_localnet.sh
-TEST_PROGS += reuseaddr_ports_exhausted.sh
-TEST_PROGS += txtimestamp.sh
-TEST_PROGS += vrf-xfrm-tests.sh
-TEST_PROGS += rxtimestamp.sh
-TEST_PROGS += drop_monitor_tests.sh
-TEST_PROGS += vrf_route_leaking.sh
-TEST_PROGS += bareudp.sh
-TEST_PROGS += amt.sh
-TEST_PROGS += unicast_extensions.sh
-TEST_PROGS += udpgro_fwd.sh
-TEST_PROGS += udpgro_frglist.sh
-TEST_PROGS += veth.sh
-TEST_PROGS += ioam6.sh
-TEST_PROGS += gro.sh
-TEST_PROGS += gre_gso.sh
-TEST_PROGS += cmsg_so_mark.sh
-TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
-TEST_PROGS += netns-name.sh
-TEST_PROGS += nl_netdev.py
-TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
-TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
-TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
-TEST_PROGS += srv6_hencap_red_l3vpn_test.sh
-TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh
-TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh
-TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh
-TEST_PROGS += srv6_end_flavors_test.sh
-TEST_PROGS += srv6_end_dx4_netfilter_test.sh
-TEST_PROGS += srv6_end_dx6_netfilter_test.sh
-TEST_PROGS += vrf_strict_mode_test.sh
-TEST_PROGS += arp_ndisc_evict_nocarrier.sh
-TEST_PROGS += ndisc_unsolicited_na_test.sh
-TEST_PROGS += arp_ndisc_untracked_subnets.sh
-TEST_PROGS += stress_reuseport_listen.sh
-TEST_PROGS += l2_tos_ttl_inherit.sh
-TEST_PROGS += bind_bhash.sh
-TEST_PROGS += ip_local_port_range.sh
-TEST_PROGS += rps_default_mask.sh
-TEST_PROGS += big_tcp.sh
-TEST_PROGS += netns-sysctl.sh
-TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh xfrm_policy_add_speed.sh
-TEST_GEN_FILES = socket nettest
-TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
-TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
-TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
-TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr so_netns_cookie
-TEST_GEN_FILES += tcp_fastopen_backup_key
-TEST_GEN_FILES += fin_ack_lat
-TEST_GEN_FILES += reuseaddr_ports_exhausted
-TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
-TEST_GEN_FILES += ipsec
-TEST_GEN_FILES += ioam6_parser
-TEST_GEN_FILES += gro
-TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_poll
-TEST_GEN_FILES += toeplitz
-TEST_GEN_FILES += cmsg_sender
-TEST_GEN_FILES += stress_reuseport_listen
-TEST_PROGS += test_vxlan_vnifiltering.sh
-TEST_GEN_FILES += io_uring_zerocopy_tx
-TEST_PROGS += io_uring_zerocopy_tx.sh
-TEST_GEN_FILES += bind_bhash
-TEST_GEN_PROGS += netlink-dumps
-TEST_GEN_PROGS += sk_bind_sendto_listen
-TEST_GEN_PROGS += sk_connect_zero_addr
-TEST_GEN_PROGS += sk_so_peek_off
-TEST_PROGS += test_ingress_egress_chaining.sh
-TEST_GEN_PROGS += so_incoming_cpu
-TEST_PROGS += sctp_vrf.sh
-TEST_GEN_FILES += sctp_hello
-TEST_GEN_FILES += ip_local_port_range
-TEST_GEN_PROGS += bind_wildcard
-TEST_GEN_PROGS += bind_timewait
-TEST_PROGS += test_vxlan_mdb.sh
-TEST_PROGS += test_bridge_neigh_suppress.sh
-TEST_PROGS += test_vxlan_nolocalbypass.sh
-TEST_PROGS += test_bridge_backup_port.sh
-TEST_PROGS += fdb_flush.sh fdb_notify.sh
-TEST_PROGS += fq_band_pktlimit.sh
-TEST_PROGS += vlan_hw_filter.sh
-TEST_PROGS += bpf_offload.py
-TEST_PROGS += ipv6_route_update_soft_lockup.sh
-TEST_PROGS += busy_poll_test.sh
+TEST_PROGS := \
+ altnames.sh \
+ amt.sh \
+ arp_ndisc_evict_nocarrier.sh \
+ arp_ndisc_untracked_subnets.sh \
+ bareudp.sh \
+ big_tcp.sh \
+ bind_bhash.sh \
+ bpf_offload.py \
+ broadcast_ether_dst.sh \
+ broadcast_pmtu.sh \
+ busy_poll_test.sh \
+ cmsg_ip.sh \
+ cmsg_so_mark.sh \
+ cmsg_so_priority.sh \
+ cmsg_time.sh \
+ drop_monitor_tests.sh \
+ fcnal-ipv4.sh \
+ fcnal-ipv6.sh \
+ fcnal-other.sh \
+ fdb_flush.sh \
+ fdb_notify.sh \
+ fib-onlink-tests.sh \
+ fib_nexthop_multiprefix.sh \
+ fib_nexthop_nongw.sh \
+ fib_nexthops.sh \
+ fib_rule_tests.sh \
+ fib_tests.sh \
+ fin_ack_lat.sh \
+ fq_band_pktlimit.sh \
+ gre_gso.sh \
+ gre_ipv6_lladdr.sh \
+ icmp.sh \
+ icmp_redirect.sh \
+ io_uring_zerocopy_tx.sh \
+ ioam6.sh \
+ ip6_gre_headroom.sh \
+ ip_defrag.sh \
+ ip_local_port_range.sh \
+ ipv6_flowlabel.sh \
+ ipv6_force_forwarding.sh \
+ ipv6_route_update_soft_lockup.sh \
+ l2_tos_ttl_inherit.sh \
+ l2tp.sh \
+ link_netns.py \
+ lwt_dst_cache_ref_loop.sh \
+ msg_zerocopy.sh \
+ nat6to4.sh \
+ ndisc_unsolicited_na_test.sh \
+ netdev-l2addr.sh \
+ netdevice.sh \
+ netns-name.sh \
+ netns-sysctl.sh \
+ nl_netdev.py \
+ pmtu.sh \
+ psock_snd.sh \
+ reuseaddr_ports_exhausted.sh \
+ reuseport_addr_any.sh \
+ route_hint.sh \
+ route_localnet.sh \
+ rps_default_mask.sh \
+ rtnetlink.py \
+ rtnetlink.sh \
+ rtnetlink_notification.sh \
+ run_afpackettests \
+ run_netsocktests \
+ rxtimestamp.sh \
+ sctp_vrf.sh \
+ skf_net_off.sh \
+ so_txtime.sh \
+ srv6_end_dt46_l3vpn_test.sh \
+ srv6_end_dt4_l3vpn_test.sh \
+ srv6_end_dt6_l3vpn_test.sh \
+ srv6_end_dx4_netfilter_test.sh \
+ srv6_end_dx6_netfilter_test.sh \
+ srv6_end_flavors_test.sh \
+ srv6_end_next_csid_l3vpn_test.sh \
+ srv6_end_x_next_csid_l3vpn_test.sh \
+ srv6_hencap_red_l3vpn_test.sh \
+ srv6_hl2encap_red_l2vpn_test.sh \
+ stress_reuseport_listen.sh \
+ tcp_fastopen_backup_key.sh \
+ test_bpf.sh \
+ test_bridge_backup_port.sh \
+ test_bridge_neigh_suppress.sh \
+ test_ingress_egress_chaining.sh \
+ test_neigh.sh \
+ test_so_rcv.sh \
+ test_vxlan_fdb_changelink.sh \
+ test_vxlan_mdb.sh \
+ test_vxlan_nh.sh \
+ test_vxlan_nolocalbypass.sh \
+ test_vxlan_under_vrf.sh \
+ test_vxlan_vnifiltering.sh \
+ tfo_passive.sh \
+ traceroute.sh \
+ txtimestamp.sh \
+ udpgro.sh \
+ udpgro_bench.sh \
+ udpgro_frglist.sh \
+ udpgro_fwd.sh \
+ udpgso.sh \
+ udpgso_bench.sh \
+ unicast_extensions.sh \
+ veth.sh \
+ vlan_bridge_binding.sh \
+ vlan_hw_filter.sh \
+ vrf-xfrm-tests.sh \
+ vrf_route_leaking.sh \
+ vrf_strict_mode_test.sh \
+ xfrm_policy.sh \
+# end of TEST_PROGS
+
+TEST_PROGS_EXTENDED := \
+ xfrm_policy_add_speed.sh \
+# end of TEST_PROGS_EXTENDED
+
+TEST_GEN_FILES := \
+ bind_bhash \
+ cmsg_sender \
+ fin_ack_lat \
+ hwtstamp_config \
+ io_uring_zerocopy_tx \
+ ioam6_parser \
+ ip_defrag \
+ ip_local_port_range \
+ ipsec \
+ ipv6_flowlabel \
+ ipv6_flowlabel_mgr \
+ msg_zerocopy \
+ nettest \
+ psock_fanout \
+ psock_snd \
+ psock_tpacket \
+ reuseaddr_ports_exhausted \
+ reuseport_addr_any \
+ rxtimestamp \
+ sctp_hello \
+ skf_net_off \
+ so_netns_cookie \
+ so_rcv_listener \
+ so_txtime \
+ socket \
+ stress_reuseport_listen \
+ tcp_fastopen_backup_key \
+ tcp_inq \
+ tcp_mmap \
+ tfo \
+ timestamping \
+ txring_overwrite \
+ txtimestamp \
+ udpgso \
+ udpgso_bench_rx \
+ udpgso_bench_tx \
+# end of TEST_GEN_FILES
+
+TEST_GEN_PROGS := \
+ bind_timewait \
+ bind_wildcard \
+ epoll_busy_poll \
+ ipv6_fragmentation \
+ proc_net_pktgen \
+ reuseaddr_conflict \
+ reuseport_bpf \
+ reuseport_bpf_cpu \
+ reuseport_bpf_numa \
+ reuseport_dualstack \
+ sk_bind_sendto_listen \
+ sk_connect_zero_addr \
+ sk_so_peek_off \
+ so_incoming_cpu \
+ tap \
+ tcp_port_share \
+ tls \
+ tun \
+# end of TEST_GEN_PROGS
+
+TEST_FILES := \
+ fcnal-test.sh \
+ in_netns.sh \
+ lib.sh \
+ settings \
+# end of TEST_FILES
# YNL files, must be before "include ..lib.mk"
YNL_GEN_FILES := busy_poller
+YNL_GEN_PROGS := netlink-dumps
TEST_GEN_FILES += $(YNL_GEN_FILES)
-
-TEST_FILES := settings
-TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
+TEST_GEN_PROGS += $(YNL_GEN_PROGS)
TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
diff --git a/tools/testing/selftests/net/af_unix/.gitignore b/tools/testing/selftests/net/af_unix/.gitignore
new file mode 100644
index 000000000000..240b26740c9e
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/.gitignore
@@ -0,0 +1,8 @@
+diag_uid
+msg_oob
+scm_inq
+scm_pidfd
+scm_rights
+so_peek_off
+unix_connect
+unix_connreset
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 50584479540b..3cd677b72072 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,14 @@
-CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect
+CFLAGS += $(KHDR_INCLUDES) -Wall -Wflex-array-member-not-at-end
+
+TEST_GEN_PROGS := \
+ diag_uid \
+ msg_oob \
+ scm_inq \
+ scm_pidfd \
+ scm_rights \
+ so_peek_off \
+ unix_connect \
+ unix_connreset \
+# end of TEST_GEN_PROGS
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config
index 37368567768c..b5429c15a53c 100644
--- a/tools/testing/selftests/net/af_unix/config
+++ b/tools/testing/selftests/net/af_unix/config
@@ -1,3 +1,3 @@
-CONFIG_UNIX=y
CONFIG_AF_UNIX_OOB=y
+CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c
index 79a3dd75590e..da7d50cedee6 100644
--- a/tools/testing/selftests/net/af_unix/diag_uid.c
+++ b/tools/testing/selftests/net/af_unix/diag_uid.c
@@ -14,7 +14,7 @@
#include <sys/types.h>
#include <sys/un.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(diag_uid)
{
diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c
index 3ed3882a93b8..1b499d56656c 100644
--- a/tools/testing/selftests/net/af_unix/msg_oob.c
+++ b/tools/testing/selftests/net/af_unix/msg_oob.c
@@ -11,7 +11,7 @@
#include <sys/signalfd.h>
#include <sys/socket.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define BUF_SZ 32
@@ -210,7 +210,7 @@ static void __sendpair(struct __test_metadata *_metadata,
static void __recvpair(struct __test_metadata *_metadata,
FIXTURE_DATA(msg_oob) *self,
const char *expected_buf, int expected_len,
- int buf_len, int flags)
+ int buf_len, int flags, bool is_sender)
{
int i, ret[2], recv_errno[2], expected_errno = 0;
char recv_buf[2][BUF_SZ] = {};
@@ -221,7 +221,9 @@ static void __recvpair(struct __test_metadata *_metadata,
errno = 0;
for (i = 0; i < 2; i++) {
- ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags);
+ int index = is_sender ? i * 2 : i * 2 + 1;
+
+ ret[i] = recv(self->fd[index], recv_buf[i], buf_len, flags);
recv_errno[i] = errno;
}
@@ -308,6 +310,20 @@ static void __siocatmarkpair(struct __test_metadata *_metadata,
ASSERT_EQ(answ[0], answ[1]);
}
+static void __resetpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ const FIXTURE_VARIANT(msg_oob) *variant,
+ bool reset)
+{
+ int i;
+
+ for (i = 0; i < 2; i++)
+ close(self->fd[i * 2 + 1]);
+
+ __recvpair(_metadata, self, "", reset ? -ECONNRESET : 0, 1,
+ variant->peek ? MSG_PEEK : 0, true);
+}
+
#define sendpair(buf, len, flags) \
__sendpair(_metadata, self, buf, len, flags)
@@ -316,9 +332,10 @@ static void __siocatmarkpair(struct __test_metadata *_metadata,
if (variant->peek) \
__recvpair(_metadata, self, \
expected_buf, expected_len, \
- buf_len, (flags) | MSG_PEEK); \
+ buf_len, (flags) | MSG_PEEK, false); \
__recvpair(_metadata, self, \
- expected_buf, expected_len, buf_len, flags); \
+ expected_buf, expected_len, \
+ buf_len, flags, false); \
} while (0)
#define epollpair(oob_remaining) \
@@ -330,6 +347,9 @@ static void __siocatmarkpair(struct __test_metadata *_metadata,
#define setinlinepair() \
__setinlinepair(_metadata, self)
+#define resetpair(reset) \
+ __resetpair(_metadata, self, variant, reset)
+
#define tcp_incompliant \
for (self->tcp_compliant = false; \
self->tcp_compliant == false; \
@@ -344,6 +364,21 @@ TEST_F(msg_oob, non_oob)
recvpair("", -EINVAL, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(true);
+}
+
+TEST_F(msg_oob, non_oob_no_reset)
+{
+ sendpair("x", 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, oob)
@@ -355,6 +390,19 @@ TEST_F(msg_oob, oob)
recvpair("x", 1, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(true);
+
+ tcp_incompliant {
+ resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */
+ }
+}
+
+TEST_F(msg_oob, oob_reset)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ resetpair(true);
}
TEST_F(msg_oob, oob_drop)
@@ -370,6 +418,8 @@ TEST_F(msg_oob, oob_drop)
recvpair("", -EINVAL, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, oob_ahead)
@@ -385,6 +435,10 @@ TEST_F(msg_oob, oob_ahead)
recvpair("hell", 4, 4, 0);
epollpair(false);
siocatmarkpair(true);
+
+ tcp_incompliant {
+ resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */
+ }
}
TEST_F(msg_oob, oob_break)
@@ -403,6 +457,8 @@ TEST_F(msg_oob, oob_break)
recvpair("", -EAGAIN, 1, 0);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, oob_ahead_break)
@@ -426,6 +482,8 @@ TEST_F(msg_oob, oob_ahead_break)
recvpair("world", 5, 5, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, oob_break_drop)
@@ -449,6 +507,8 @@ TEST_F(msg_oob, oob_break_drop)
recvpair("", -EINVAL, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, ex_oob_break)
@@ -476,6 +536,8 @@ TEST_F(msg_oob, ex_oob_break)
recvpair("ld", 2, 2, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, ex_oob_drop)
@@ -498,6 +560,8 @@ TEST_F(msg_oob, ex_oob_drop)
epollpair(false);
siocatmarkpair(true);
}
+
+ resetpair(false);
}
TEST_F(msg_oob, ex_oob_drop_2)
@@ -523,6 +587,8 @@ TEST_F(msg_oob, ex_oob_drop_2)
epollpair(false);
siocatmarkpair(true);
}
+
+ resetpair(false);
}
TEST_F(msg_oob, ex_oob_oob)
@@ -546,6 +612,54 @@ TEST_F(msg_oob, ex_oob_oob)
recvpair("", -EINVAL, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
+}
+
+TEST_F(msg_oob, ex_oob_ex_oob)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ tcp_incompliant {
+ resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */
+ }
+}
+
+TEST_F(msg_oob, ex_oob_ex_oob_oob)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ sendpair("z", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
}
TEST_F(msg_oob, ex_oob_ahead_break)
@@ -576,6 +690,10 @@ TEST_F(msg_oob, ex_oob_ahead_break)
recvpair("d", 1, 1, MSG_OOB);
epollpair(false);
siocatmarkpair(true);
+
+ tcp_incompliant {
+ resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */
+ }
}
TEST_F(msg_oob, ex_oob_siocatmark)
@@ -595,6 +713,8 @@ TEST_F(msg_oob, ex_oob_siocatmark)
recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
epollpair(true);
siocatmarkpair(false);
+
+ resetpair(true);
}
TEST_F(msg_oob, inline_oob)
@@ -612,6 +732,8 @@ TEST_F(msg_oob, inline_oob)
recvpair("x", 1, 1, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_oob_break)
@@ -633,6 +755,8 @@ TEST_F(msg_oob, inline_oob_break)
recvpair("o", 1, 1, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_oob_ahead_break)
@@ -661,6 +785,8 @@ TEST_F(msg_oob, inline_oob_ahead_break)
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_ex_oob_break)
@@ -686,6 +812,8 @@ TEST_F(msg_oob, inline_ex_oob_break)
recvpair("rld", 3, 3, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_ex_oob_no_drop)
@@ -707,6 +835,8 @@ TEST_F(msg_oob, inline_ex_oob_no_drop)
recvpair("y", 1, 1, 0);
epollpair(false);
siocatmarkpair(false);
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_ex_oob_drop)
@@ -731,6 +861,8 @@ TEST_F(msg_oob, inline_ex_oob_drop)
epollpair(false);
siocatmarkpair(false);
}
+
+ resetpair(false);
}
TEST_F(msg_oob, inline_ex_oob_siocatmark)
@@ -752,6 +884,8 @@ TEST_F(msg_oob, inline_ex_oob_siocatmark)
recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
epollpair(true);
siocatmarkpair(false);
+
+ resetpair(true);
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c
new file mode 100644
index 000000000000..3a86be9bda17
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/scm_inq.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "kselftest_harness.h"
+
+#define NR_CHUNKS 100
+#define MSG_LEN 256
+
+FIXTURE(scm_inq)
+{
+ int fd[2];
+};
+
+FIXTURE_VARIANT(scm_inq)
+{
+ int type;
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, stream)
+{
+ .type = SOCK_STREAM,
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, dgram)
+{
+ .type = SOCK_DGRAM,
+};
+
+FIXTURE_VARIANT_ADD(scm_inq, seqpacket)
+{
+ .type = SOCK_SEQPACKET,
+};
+
+FIXTURE_SETUP(scm_inq)
+{
+ int err;
+
+ err = socketpair(AF_UNIX, variant->type | SOCK_NONBLOCK, 0, self->fd);
+ ASSERT_EQ(0, err);
+}
+
+FIXTURE_TEARDOWN(scm_inq)
+{
+ close(self->fd[0]);
+ close(self->fd[1]);
+}
+
+static void send_chunks(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_inq) *self)
+{
+ char buf[MSG_LEN] = {};
+ int i, ret;
+
+ for (i = 0; i < NR_CHUNKS; i++) {
+ ret = send(self->fd[0], buf, sizeof(buf), 0);
+ ASSERT_EQ(sizeof(buf), ret);
+ }
+}
+
+static void recv_chunks(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_inq) *self)
+{
+ char cmsg_buf[CMSG_SPACE(sizeof(int))];
+ struct msghdr msg = {};
+ struct iovec iov = {};
+ struct cmsghdr *cmsg;
+ char buf[MSG_LEN];
+ int i, ret;
+ int inq;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+
+ for (i = 0; i < NR_CHUNKS; i++) {
+ memset(buf, 0, sizeof(buf));
+ memset(cmsg_buf, 0, sizeof(cmsg_buf));
+
+ ret = recvmsg(self->fd[1], &msg, 0);
+ ASSERT_EQ(MSG_LEN, ret);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ ASSERT_NE(NULL, cmsg);
+ ASSERT_EQ(CMSG_LEN(sizeof(int)), cmsg->cmsg_len);
+ ASSERT_EQ(SOL_SOCKET, cmsg->cmsg_level);
+ ASSERT_EQ(SCM_INQ, cmsg->cmsg_type);
+
+ ret = ioctl(self->fd[1], SIOCINQ, &inq);
+ ASSERT_EQ(0, ret);
+ ASSERT_EQ(*(int *)CMSG_DATA(cmsg), inq);
+ }
+}
+
+TEST_F(scm_inq, basic)
+{
+ int err, inq;
+
+ err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int));
+ if (variant->type != SOCK_STREAM) {
+ ASSERT_EQ(-ENOPROTOOPT, -errno);
+ return;
+ }
+
+ ASSERT_EQ(0, err);
+
+ err = ioctl(self->fd[1], SIOCINQ, &inq);
+ ASSERT_EQ(0, err);
+ ASSERT_EQ(0, inq);
+
+ send_chunks(_metadata, self);
+ recv_chunks(_metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c
index 7e534594167e..2c18b92a2603 100644
--- a/tools/testing/selftests/net/af_unix/scm_pidfd.c
+++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c
@@ -15,7 +15,8 @@
#include <sys/types.h>
#include <sys/wait.h>
-#include "../../kselftest_harness.h"
+#include "../../pidfd/pidfd.h"
+#include "kselftest_harness.h"
#define clean_errno() (errno == 0 ? "None" : strerror(errno))
#define log_err(MSG, ...) \
@@ -26,6 +27,8 @@
#define SCM_PIDFD 0x04
#endif
+#define CHILD_EXIT_CODE_OK 123
+
static void child_die()
{
exit(1);
@@ -126,16 +129,64 @@ out:
return result;
}
+struct cmsg_data {
+ struct ucred *ucred;
+ int *pidfd;
+};
+
+static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res)
+{
+ struct cmsghdr *cmsg;
+
+ if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+ log_err("recvmsg: truncated");
+ return 1;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_PIDFD) {
+ if (cmsg->cmsg_len < sizeof(*res->pidfd)) {
+ log_err("CMSG parse: SCM_PIDFD wrong len");
+ return 1;
+ }
+
+ res->pidfd = (void *)CMSG_DATA(cmsg);
+ }
+
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS) {
+ if (cmsg->cmsg_len < sizeof(*res->ucred)) {
+ log_err("CMSG parse: SCM_CREDENTIALS wrong len");
+ return 1;
+ }
+
+ res->ucred = (void *)CMSG_DATA(cmsg);
+ }
+ }
+
+ if (!res->pidfd) {
+ log_err("CMSG parse: SCM_PIDFD not found");
+ return 1;
+ }
+
+ if (!res->ucred) {
+ log_err("CMSG parse: SCM_CREDENTIALS not found");
+ return 1;
+ }
+
+ return 0;
+}
+
static int cmsg_check(int fd)
{
struct msghdr msg = { 0 };
- struct cmsghdr *cmsg;
+ struct cmsg_data res;
struct iovec iov;
- struct ucred *ucred = NULL;
int data = 0;
char control[CMSG_SPACE(sizeof(struct ucred)) +
CMSG_SPACE(sizeof(int))] = { 0 };
- int *pidfd = NULL;
pid_t parent_pid;
int err;
@@ -158,53 +209,98 @@ static int cmsg_check(int fd)
return 1;
}
- for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
- cmsg = CMSG_NXTHDR(&msg, cmsg)) {
- if (cmsg->cmsg_level == SOL_SOCKET &&
- cmsg->cmsg_type == SCM_PIDFD) {
- if (cmsg->cmsg_len < sizeof(*pidfd)) {
- log_err("CMSG parse: SCM_PIDFD wrong len");
- return 1;
- }
+ /* send(pfd, "x", sizeof(char), 0) */
+ if (data != 'x') {
+ log_err("recvmsg: data corruption");
+ return 1;
+ }
- pidfd = (void *)CMSG_DATA(cmsg);
- }
+ if (parse_cmsg(&msg, &res)) {
+ log_err("CMSG parse: parse_cmsg() failed");
+ return 1;
+ }
- if (cmsg->cmsg_level == SOL_SOCKET &&
- cmsg->cmsg_type == SCM_CREDENTIALS) {
- if (cmsg->cmsg_len < sizeof(*ucred)) {
- log_err("CMSG parse: SCM_CREDENTIALS wrong len");
- return 1;
- }
+ /* pidfd from SCM_PIDFD should point to the parent process PID */
+ parent_pid =
+ get_pid_from_fdinfo_file(*res.pidfd, "Pid:", sizeof("Pid:") - 1);
+ if (parent_pid != getppid()) {
+ log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid());
+ close(*res.pidfd);
+ return 1;
+ }
- ucred = (void *)CMSG_DATA(cmsg);
- }
+ close(*res.pidfd);
+ return 0;
+}
+
+static int cmsg_check_dead(int fd, int expected_pid)
+{
+ int err;
+ struct msghdr msg = { 0 };
+ struct cmsg_data res;
+ struct iovec iov;
+ int data = 0;
+ char control[CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(int))] = { 0 };
+ struct pidfd_info info = {
+ .mask = PIDFD_INFO_EXIT,
+ };
+
+ iov.iov_base = &data;
+ iov.iov_len = sizeof(data);
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ err = recvmsg(fd, &msg, 0);
+ if (err < 0) {
+ log_err("recvmsg");
+ return 1;
}
- /* send(pfd, "x", sizeof(char), 0) */
- if (data != 'x') {
+ if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+ log_err("recvmsg: truncated");
+ return 1;
+ }
+
+ /* send(cfd, "y", sizeof(char), 0) */
+ if (data != 'y') {
log_err("recvmsg: data corruption");
return 1;
}
- if (!pidfd) {
- log_err("CMSG parse: SCM_PIDFD not found");
+ if (parse_cmsg(&msg, &res)) {
+ log_err("CMSG parse: parse_cmsg() failed");
return 1;
}
- if (!ucred) {
- log_err("CMSG parse: SCM_CREDENTIALS not found");
+ /*
+ * pidfd from SCM_PIDFD should point to the client_pid.
+ * Let's read exit information and check if it's what
+ * we expect to see.
+ */
+ if (ioctl(*res.pidfd, PIDFD_GET_INFO, &info)) {
+ log_err("%s: ioctl(PIDFD_GET_INFO) failed", __func__);
+ close(*res.pidfd);
return 1;
}
- /* pidfd from SCM_PIDFD should point to the parent process PID */
- parent_pid =
- get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1);
- if (parent_pid != getppid()) {
- log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid());
+ if (!(info.mask & PIDFD_INFO_EXIT)) {
+ log_err("%s: No exit information from ioctl(PIDFD_GET_INFO)", __func__);
+ close(*res.pidfd);
+ return 1;
+ }
+
+ err = WIFEXITED(info.exit_code) ? WEXITSTATUS(info.exit_code) : 1;
+ if (err != CHILD_EXIT_CODE_OK) {
+ log_err("%s: wrong exit_code %d != %d", __func__, err, CHILD_EXIT_CODE_OK);
+ close(*res.pidfd);
return 1;
}
+ close(*res.pidfd);
return 0;
}
@@ -291,6 +387,24 @@ static void fill_sockaddr(struct sock_addr *addr, bool abstract)
memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name));
}
+static int sk_enable_cred_pass(int sk)
+{
+ int on = 0;
+
+ on = 1;
+ if (setsockopt(sk, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) {
+ log_err("Failed to set SO_PASSCRED");
+ return 1;
+ }
+
+ if (setsockopt(sk, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) {
+ log_err("Failed to set SO_PASSPIDFD");
+ return 1;
+ }
+
+ return 0;
+}
+
static void client(FIXTURE_DATA(scm_pidfd) *self,
const FIXTURE_VARIANT(scm_pidfd) *variant)
{
@@ -299,7 +413,6 @@ static void client(FIXTURE_DATA(scm_pidfd) *self,
struct ucred peer_cred;
int peer_pidfd;
pid_t peer_pid;
- int on = 0;
cfd = socket(AF_UNIX, variant->type, 0);
if (cfd < 0) {
@@ -322,14 +435,8 @@ static void client(FIXTURE_DATA(scm_pidfd) *self,
child_die();
}
- on = 1;
- if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) {
- log_err("Failed to set SO_PASSCRED");
- child_die();
- }
-
- if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) {
- log_err("Failed to set SO_PASSPIDFD");
+ if (sk_enable_cred_pass(cfd)) {
+ log_err("sk_enable_cred_pass() failed");
child_die();
}
@@ -340,6 +447,12 @@ static void client(FIXTURE_DATA(scm_pidfd) *self,
child_die();
}
+ /* send something to the parent so it can receive SCM_PIDFD too and validate it */
+ if (send(cfd, "y", sizeof(char), 0) == -1) {
+ log_err("Failed to send(cfd, \"y\", sizeof(char), 0)");
+ child_die();
+ }
+
/* skip further for SOCK_DGRAM as it's not applicable */
if (variant->type == SOCK_DGRAM)
return;
@@ -398,7 +511,13 @@ TEST_F(scm_pidfd, test)
close(self->server);
close(self->startup_pipe[0]);
client(self, variant);
- exit(0);
+
+ /*
+ * It's a bit unusual, but in case of success we return non-zero
+ * exit code (CHILD_EXIT_CODE_OK) and then we expect to read it
+ * from ioctl(PIDFD_GET_INFO) in cmsg_check_dead().
+ */
+ exit(CHILD_EXIT_CODE_OK);
}
close(self->startup_pipe[1]);
@@ -421,9 +540,17 @@ TEST_F(scm_pidfd, test)
ASSERT_NE(-1, err);
}
- close(pfd);
waitpid(self->client_pid, &child_status, 0);
- ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1);
+ /* see comment before exit(CHILD_EXIT_CODE_OK) */
+ ASSERT_EQ(CHILD_EXIT_CODE_OK, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1);
+
+ err = sk_enable_cred_pass(pfd);
+ ASSERT_EQ(0, err);
+
+ err = cmsg_check_dead(pfd, self->client_pid);
+ ASSERT_EQ(0, err);
+
+ close(pfd);
}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c
index d66336256580..d82a79c21c17 100644
--- a/tools/testing/selftests/net/af_unix/scm_rights.c
+++ b/tools/testing/selftests/net/af_unix/scm_rights.c
@@ -10,7 +10,7 @@
#include <sys/socket.h>
#include <sys/un.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(scm_rights)
{
@@ -23,6 +23,7 @@ FIXTURE_VARIANT(scm_rights)
int type;
int flags;
bool test_listener;
+ bool disabled;
};
FIXTURE_VARIANT_ADD(scm_rights, dgram)
@@ -31,6 +32,16 @@ FIXTURE_VARIANT_ADD(scm_rights, dgram)
.type = SOCK_DGRAM,
.flags = 0,
.test_listener = false,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, dgram_disabled)
+{
+ .name = "UNIX ",
+ .type = SOCK_DGRAM,
+ .flags = 0,
+ .test_listener = false,
+ .disabled = true,
};
FIXTURE_VARIANT_ADD(scm_rights, stream)
@@ -39,6 +50,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream)
.type = SOCK_STREAM,
.flags = 0,
.test_listener = false,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_disabled)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = 0,
+ .test_listener = false,
+ .disabled = true,
};
FIXTURE_VARIANT_ADD(scm_rights, stream_oob)
@@ -47,6 +68,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_oob)
.type = SOCK_STREAM,
.flags = MSG_OOB,
.test_listener = false,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_oob_disabled)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = MSG_OOB,
+ .test_listener = false,
+ .disabled = true,
};
FIXTURE_VARIANT_ADD(scm_rights, stream_listener)
@@ -55,6 +86,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener)
.type = SOCK_STREAM,
.flags = 0,
.test_listener = true,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener_disabled)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = 0,
+ .test_listener = true,
+ .disabled = true,
};
FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob)
@@ -63,6 +104,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob)
.type = SOCK_STREAM,
.flags = MSG_OOB,
.test_listener = true,
+ .disabled = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob_disabled)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = MSG_OOB,
+ .test_listener = true,
+ .disabled = true,
};
static int count_sockets(struct __test_metadata *_metadata,
@@ -105,6 +156,9 @@ FIXTURE_SETUP(scm_rights)
ret = unshare(CLONE_NEWNET);
ASSERT_EQ(0, ret);
+ if (variant->disabled)
+ return;
+
ret = count_sockets(_metadata, variant);
ASSERT_EQ(0, ret);
}
@@ -113,6 +167,9 @@ FIXTURE_TEARDOWN(scm_rights)
{
int ret;
+ if (variant->disabled)
+ return;
+
sleep(1);
ret = count_sockets(_metadata, variant);
@@ -121,6 +178,7 @@ FIXTURE_TEARDOWN(scm_rights)
static void create_listeners(struct __test_metadata *_metadata,
FIXTURE_DATA(scm_rights) *self,
+ const FIXTURE_VARIANT(scm_rights) *variant,
int n)
{
struct sockaddr_un addr = {
@@ -140,6 +198,12 @@ static void create_listeners(struct __test_metadata *_metadata,
ret = listen(self->fd[i], -1);
ASSERT_EQ(0, ret);
+ if (variant->disabled) {
+ ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS,
+ &(int){0}, sizeof(int));
+ ASSERT_EQ(0, ret);
+ }
+
addrlen = sizeof(addr);
ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen);
ASSERT_EQ(0, ret);
@@ -164,6 +228,12 @@ static void create_socketpairs(struct __test_metadata *_metadata,
for (i = 0; i < n * 2; i += 2) {
ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i);
ASSERT_EQ(0, ret);
+
+ if (variant->disabled) {
+ ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS,
+ &(int){0}, sizeof(int));
+ ASSERT_EQ(0, ret);
+ }
}
}
@@ -175,7 +245,7 @@ static void __create_sockets(struct __test_metadata *_metadata,
ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0]));
if (variant->test_listener)
- create_listeners(_metadata, self, n);
+ create_listeners(_metadata, self, variant, n);
else
create_socketpairs(_metadata, self, variant, n);
}
@@ -201,20 +271,11 @@ void __send_fd(struct __test_metadata *_metadata,
{
#define MSG "x"
#define MSGLEN 1
- struct {
- struct cmsghdr cmsghdr;
- int fd[2];
- } cmsg = {
- .cmsghdr = {
- .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)),
- .cmsg_level = SOL_SOCKET,
- .cmsg_type = SCM_RIGHTS,
- },
- .fd = {
- self->fd[inflight * 2],
- self->fd[inflight * 2],
- },
+ int fds[2] = {
+ self->fd[inflight * 2],
+ self->fd[inflight * 2],
};
+ char cmsg_buf[CMSG_SPACE(sizeof(fds))];
struct iovec iov = {
.iov_base = MSG,
.iov_len = MSGLEN,
@@ -224,13 +285,26 @@ void __send_fd(struct __test_metadata *_metadata,
.msg_namelen = 0,
.msg_iov = &iov,
.msg_iovlen = 1,
- .msg_control = &cmsg,
- .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)),
+ .msg_control = cmsg_buf,
+ .msg_controllen = sizeof(cmsg_buf),
};
+ struct cmsghdr *cmsg;
int ret;
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(fds));
+ memcpy(CMSG_DATA(cmsg), fds, sizeof(fds));
+
ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags);
- ASSERT_EQ(MSGLEN, ret);
+
+ if (variant->disabled) {
+ ASSERT_EQ(-1, ret);
+ ASSERT_EQ(-EPERM, -errno);
+ } else {
+ ASSERT_EQ(MSGLEN, ret);
+ }
}
#define create_sockets(n) \
diff --git a/tools/testing/selftests/net/af_unix/so_peek_off.c b/tools/testing/selftests/net/af_unix/so_peek_off.c
new file mode 100644
index 000000000000..86e7b0fb522d
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/so_peek_off.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(so_peek_off)
+{
+ int fd[2]; /* 0: sender, 1: receiver */
+};
+
+FIXTURE_VARIANT(so_peek_off)
+{
+ int type;
+};
+
+FIXTURE_VARIANT_ADD(so_peek_off, stream)
+{
+ .type = SOCK_STREAM,
+};
+
+FIXTURE_VARIANT_ADD(so_peek_off, dgram)
+{
+ .type = SOCK_DGRAM,
+};
+
+FIXTURE_VARIANT_ADD(so_peek_off, seqpacket)
+{
+ .type = SOCK_SEQPACKET,
+};
+
+FIXTURE_SETUP(so_peek_off)
+{
+ struct timeval timeout = {
+ .tv_sec = 5,
+ .tv_usec = 0,
+ };
+ int ret;
+
+ ret = socketpair(AF_UNIX, variant->type, 0, self->fd);
+ ASSERT_EQ(0, ret);
+
+ ret = setsockopt(self->fd[1], SOL_SOCKET, SO_RCVTIMEO_NEW,
+ &timeout, sizeof(timeout));
+ ASSERT_EQ(0, ret);
+
+ ret = setsockopt(self->fd[1], SOL_SOCKET, SO_PEEK_OFF,
+ &(int){0}, sizeof(int));
+ ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(so_peek_off)
+{
+ close_range(self->fd[0], self->fd[1], 0);
+}
+
+#define sendeq(fd, str, flags) \
+ do { \
+ int bytes, len = strlen(str); \
+ \
+ bytes = send(fd, str, len, flags); \
+ ASSERT_EQ(len, bytes); \
+ } while (0)
+
+#define recveq(fd, str, buflen, flags) \
+ do { \
+ char buf[(buflen) + 1] = {}; \
+ int bytes; \
+ \
+ bytes = recv(fd, buf, buflen, flags); \
+ ASSERT_NE(-1, bytes); \
+ ASSERT_STREQ(str, buf); \
+ } while (0)
+
+#define async \
+ for (pid_t pid = (pid = fork(), \
+ pid < 0 ? \
+ __TH_LOG("Failed to start async {}"), \
+ _metadata->exit_code = KSFT_FAIL, \
+ __bail(1, _metadata), \
+ 0xdead : \
+ pid); \
+ !pid; exit(0))
+
+TEST_F(so_peek_off, single_chunk)
+{
+ sendeq(self->fd[0], "aaaabbbb", 0);
+
+ recveq(self->fd[1], "aaaa", 4, MSG_PEEK);
+ recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+}
+
+TEST_F(so_peek_off, two_chunks)
+{
+ sendeq(self->fd[0], "aaaa", 0);
+ sendeq(self->fd[0], "bbbb", 0);
+
+ recveq(self->fd[1], "aaaa", 4, MSG_PEEK);
+ recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+}
+
+TEST_F(so_peek_off, two_chunks_blocking)
+{
+ async {
+ usleep(1000);
+ sendeq(self->fd[0], "aaaa", 0);
+ }
+
+ recveq(self->fd[1], "aaaa", 4, MSG_PEEK);
+
+ async {
+ usleep(1000);
+ sendeq(self->fd[0], "bbbb", 0);
+ }
+
+ /* goto again; -> goto redo; in unix_stream_read_generic(). */
+ recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+}
+
+TEST_F(so_peek_off, two_chunks_overlap)
+{
+ sendeq(self->fd[0], "aaaa", 0);
+ recveq(self->fd[1], "aa", 2, MSG_PEEK);
+
+ sendeq(self->fd[0], "bbbb", 0);
+
+ if (variant->type == SOCK_STREAM) {
+ /* SOCK_STREAM tries to fill the buffer. */
+ recveq(self->fd[1], "aabb", 4, MSG_PEEK);
+ recveq(self->fd[1], "bb", 100, MSG_PEEK);
+ } else {
+ /* SOCK_DGRAM and SOCK_SEQPACKET returns at the skb boundary. */
+ recveq(self->fd[1], "aa", 100, MSG_PEEK);
+ recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+ }
+}
+
+TEST_F(so_peek_off, two_chunks_overlap_blocking)
+{
+ async {
+ usleep(1000);
+ sendeq(self->fd[0], "aaaa", 0);
+ }
+
+ recveq(self->fd[1], "aa", 2, MSG_PEEK);
+
+ async {
+ usleep(1000);
+ sendeq(self->fd[0], "bbbb", 0);
+ }
+
+ /* Even SOCK_STREAM does not wait if at least one byte is read. */
+ recveq(self->fd[1], "aa", 100, MSG_PEEK);
+
+ recveq(self->fd[1], "bbbb", 100, MSG_PEEK);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/unix_connect.c b/tools/testing/selftests/net/af_unix/unix_connect.c
index d799fd8f5c7c..870ca96fa8ea 100644
--- a/tools/testing/selftests/net/af_unix/unix_connect.c
+++ b/tools/testing/selftests/net/af_unix/unix_connect.c
@@ -10,7 +10,7 @@
#include <sys/socket.h>
#include <sys/un.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(unix_connect)
{
diff --git a/tools/testing/selftests/net/af_unix/unix_connreset.c b/tools/testing/selftests/net/af_unix/unix_connreset.c
new file mode 100644
index 000000000000..08c1de8f5a98
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/unix_connreset.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Selftest for AF_UNIX socket close and ECONNRESET behaviour.
+ *
+ * This test verifies:
+ * 1. SOCK_STREAM returns EOF when the peer closes normally.
+ * 2. SOCK_STREAM returns ECONNRESET if peer closes with unread data.
+ * 3. SOCK_SEQPACKET returns EOF when the peer closes normally.
+ * 4. SOCK_SEQPACKET returns ECONNRESET if the peer closes with unread data.
+ * 5. SOCK_DGRAM does not return ECONNRESET when the peer closes.
+ *
+ * These tests document the intended Linux behaviour.
+ *
+ */
+
+#define _GNU_SOURCE
+#include <string.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include "../../kselftest_harness.h"
+
+#define SOCK_PATH "/tmp/af_unix_connreset.sock"
+
+static void remove_socket_file(void)
+{
+ unlink(SOCK_PATH);
+}
+
+FIXTURE(unix_sock)
+{
+ int server;
+ int client;
+ int child;
+};
+
+FIXTURE_VARIANT(unix_sock)
+{
+ int socket_type;
+ const char *name;
+};
+
+FIXTURE_VARIANT_ADD(unix_sock, stream) {
+ .socket_type = SOCK_STREAM,
+ .name = "SOCK_STREAM",
+};
+
+FIXTURE_VARIANT_ADD(unix_sock, dgram) {
+ .socket_type = SOCK_DGRAM,
+ .name = "SOCK_DGRAM",
+};
+
+FIXTURE_VARIANT_ADD(unix_sock, seqpacket) {
+ .socket_type = SOCK_SEQPACKET,
+ .name = "SOCK_SEQPACKET",
+};
+
+FIXTURE_SETUP(unix_sock)
+{
+ struct sockaddr_un addr = {};
+ int err;
+
+ addr.sun_family = AF_UNIX;
+ strcpy(addr.sun_path, SOCK_PATH);
+ remove_socket_file();
+
+ self->server = socket(AF_UNIX, variant->socket_type, 0);
+ ASSERT_LT(-1, self->server);
+
+ err = bind(self->server, (struct sockaddr *)&addr, sizeof(addr));
+ ASSERT_EQ(0, err);
+
+ if (variant->socket_type == SOCK_STREAM ||
+ variant->socket_type == SOCK_SEQPACKET) {
+ err = listen(self->server, 1);
+ ASSERT_EQ(0, err);
+ }
+
+ self->client = socket(AF_UNIX, variant->socket_type | SOCK_NONBLOCK, 0);
+ ASSERT_LT(-1, self->client);
+
+ err = connect(self->client, (struct sockaddr *)&addr, sizeof(addr));
+ ASSERT_EQ(0, err);
+}
+
+FIXTURE_TEARDOWN(unix_sock)
+{
+ if (variant->socket_type == SOCK_STREAM ||
+ variant->socket_type == SOCK_SEQPACKET)
+ close(self->child);
+
+ close(self->client);
+ close(self->server);
+ remove_socket_file();
+}
+
+/* Test 1: peer closes normally */
+TEST_F(unix_sock, eof)
+{
+ char buf[16] = {};
+ ssize_t n;
+
+ if (variant->socket_type == SOCK_STREAM ||
+ variant->socket_type == SOCK_SEQPACKET) {
+ self->child = accept(self->server, NULL, NULL);
+ ASSERT_LT(-1, self->child);
+
+ close(self->child);
+ } else {
+ close(self->server);
+ }
+
+ n = recv(self->client, buf, sizeof(buf), 0);
+
+ if (variant->socket_type == SOCK_STREAM ||
+ variant->socket_type == SOCK_SEQPACKET) {
+ ASSERT_EQ(0, n);
+ } else {
+ ASSERT_EQ(-1, n);
+ ASSERT_EQ(EAGAIN, errno);
+ }
+}
+
+/* Test 2: peer closes with unread data */
+TEST_F(unix_sock, reset_unread_behavior)
+{
+ char buf[16] = {};
+ ssize_t n;
+
+ /* Send data that will remain unread */
+ send(self->client, "hello", 5, 0);
+
+ if (variant->socket_type == SOCK_DGRAM) {
+ /* No real connection, just close the server */
+ close(self->server);
+ } else {
+ self->child = accept(self->server, NULL, NULL);
+ ASSERT_LT(-1, self->child);
+
+ /* Peer closes before client reads */
+ close(self->child);
+ }
+
+ n = recv(self->client, buf, sizeof(buf), 0);
+ ASSERT_EQ(-1, n);
+
+ if (variant->socket_type == SOCK_STREAM ||
+ variant->socket_type == SOCK_SEQPACKET) {
+ ASSERT_EQ(ECONNRESET, errno);
+ } else {
+ ASSERT_EQ(EAGAIN, errno);
+ }
+}
+
+/* Test 3: closing unaccepted (embryo) server socket should reset client. */
+TEST_F(unix_sock, reset_closed_embryo)
+{
+ char buf[16] = {};
+ ssize_t n;
+
+ if (variant->socket_type == SOCK_DGRAM) {
+ snprintf(_metadata->results->reason,
+ sizeof(_metadata->results->reason),
+ "Test only applies to SOCK_STREAM and SOCK_SEQPACKET");
+ exit(KSFT_XFAIL);
+ }
+
+ /* Close server without accept()ing */
+ close(self->server);
+
+ n = recv(self->client, buf, sizeof(buf), 0);
+
+ ASSERT_EQ(-1, n);
+ ASSERT_EQ(ECONNRESET, errno);
+}
+
+TEST_HARNESS_MAIN
+
diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh
index d458b45c775b..3ef209cacb8e 100755
--- a/tools/testing/selftests/net/amt.sh
+++ b/tools/testing/selftests/net/amt.sh
@@ -194,15 +194,21 @@ test_remote_ip()
send_mcast_torture4()
{
- ip netns exec "${SOURCE}" bash -c \
- 'cat /dev/urandom | head -c 1G | nc -w 1 -u 239.0.0.1 4001'
+ for i in `seq 10`; do
+ ip netns exec "${SOURCE}" bash -c \
+ 'cat /dev/urandom | head -c 100M | nc -w 1 -u 239.0.0.1 4001'
+ echo -n "."
+ done
}
send_mcast_torture6()
{
- ip netns exec "${SOURCE}" bash -c \
- 'cat /dev/urandom | head -c 1G | nc -w 1 -u ff0e::5:6 6001'
+ for i in `seq 10`; do
+ ip netns exec "${SOURCE}" bash -c \
+ 'cat /dev/urandom | head -c 100M | nc -w 1 -u ff0e::5:6 6001'
+ echo -n "."
+ done
}
check_features()
@@ -278,10 +284,12 @@ wait $pid || err=$?
if [ $err -eq 1 ]; then
ERR=1
fi
+printf "TEST: %-50s" "IPv4 amt traffic forwarding torture"
send_mcast_torture4
-printf "TEST: %-60s [ OK ]\n" "IPv4 amt traffic forwarding torture"
+printf " [ OK ]\n"
+printf "TEST: %-50s" "IPv6 amt traffic forwarding torture"
send_mcast_torture6
-printf "TEST: %-60s [ OK ]\n" "IPv6 amt traffic forwarding torture"
+printf " [ OK ]\n"
sleep 5
if [ "${ERR}" -eq 1 ]; then
echo "Some tests failed." >&2
diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
index 92eb880c52f2..00758f00efbf 100755
--- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
+++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
@@ -75,7 +75,7 @@ setup_v4() {
ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
if [ $? -ne 0 ]; then
cleanup_v4
- echo "failed"
+ echo "failed; is the system using MACAddressPolicy=persistent ?"
exit 1
fi
diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
index f366cadbc5e8..d9e5b967f815 100755
--- a/tools/testing/selftests/net/bareudp.sh
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# Test various bareudp tunnel configurations.
@@ -106,26 +106,16 @@
# | |
# +-----------------------------------------------------------------------+
+. ./lib.sh
+
ERR=4 # Return 4 by default, which is the SKIP code for kselftest
PING6="ping"
PAUSE_ON_FAIL="no"
-readonly NS0=$(mktemp -u ns0-XXXXXXXX)
-readonly NS1=$(mktemp -u ns1-XXXXXXXX)
-readonly NS2=$(mktemp -u ns2-XXXXXXXX)
-readonly NS3=$(mktemp -u ns3-XXXXXXXX)
-
# Exit the script after having removed the network namespaces it created
-#
-# Parameters:
-#
-# * The list of network namespaces to delete before exiting.
-#
exit_cleanup()
{
- for ns in "$@"; do
- ip netns delete "${ns}" 2>/dev/null || true
- done
+ cleanup_all_ns
if [ "${ERR}" -eq 4 ]; then
echo "Error: Setting up the testing environment failed." >&2
@@ -140,17 +130,7 @@ exit_cleanup()
# namespaces created by this script are deleted.
create_namespaces()
{
- ip netns add "${NS0}" || exit_cleanup
- ip netns add "${NS1}" || exit_cleanup "${NS0}"
- ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}"
- ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}"
-}
-
-# The trap function handler
-#
-exit_cleanup_all()
-{
- exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}"
+ setup_ns NS0 NS1 NS2 NS3 || exit_cleanup
}
# Configure a network interface using a host route
@@ -188,10 +168,6 @@ iface_config()
#
setup_underlay()
{
- for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do
- ip -netns "${ns}" link set dev lo up
- done;
-
ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}"
ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}"
ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}"
@@ -234,14 +210,6 @@ setup_overlay_ipv4()
ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1
ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10
ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33
-
- # The intermediate namespaces don't have routes for the reverse path,
- # as it will be handled by tc. So we need to ensure that rp_filter is
- # not going to block the traffic.
- ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0
- ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0
}
setup_overlay_ipv6()
@@ -521,13 +489,10 @@ done
check_features
-# Create namespaces before setting up the exit trap.
-# Otherwise, exit_cleanup_all() could delete namespaces that were not created
-# by this script.
-create_namespaces
-
set -e
-trap exit_cleanup_all EXIT
+trap exit_cleanup EXIT
+
+create_namespaces
setup_underlay
setup_overlay_ipv4
diff --git a/tools/testing/selftests/net/bench/Makefile b/tools/testing/selftests/net/bench/Makefile
new file mode 100644
index 000000000000..2546c45e42f7
--- /dev/null
+++ b/tools/testing/selftests/net/bench/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_MODS_DIR := page_pool
+
+TEST_PROGS += test_bench_page_pool.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/bench/page_pool/Makefile b/tools/testing/selftests/net/bench/page_pool/Makefile
new file mode 100644
index 000000000000..0549a16ba275
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/Makefile
@@ -0,0 +1,17 @@
+BENCH_PAGE_POOL_SIMPLE_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+obj-m += bench_page_pool.o
+bench_page_pool-y += bench_page_pool_simple.o time_bench.o
+
+all:
+ +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) modules
+
+clean:
+ +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) clean
diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
new file mode 100644
index 000000000000..cb6468adbda4
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Benchmark module for page_pool.
+ *
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/interrupt.h>
+#include <linux/limits.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <net/page_pool/helpers.h>
+
+#include "time_bench.h"
+
+static int verbose = 1;
+#define MY_POOL_SIZE 1024
+
+/* Makes tests selectable. Useful for perf-record to analyze a single test.
+ * Hint: Bash shells support writing binary number like: $((2#101010)
+ *
+ * # modprobe bench_page_pool_simple run_flags=$((2#100))
+ */
+static unsigned long run_flags = 0xFFFFFFFF;
+module_param(run_flags, ulong, 0);
+MODULE_PARM_DESC(run_flags, "Limit which bench test that runs");
+
+/* Count the bit number from the enum */
+enum benchmark_bit {
+ bit_run_bench_baseline,
+ bit_run_bench_no_softirq01,
+ bit_run_bench_no_softirq02,
+ bit_run_bench_no_softirq03,
+};
+
+#define bit(b) (1 << (b))
+#define enabled(b) ((run_flags & (bit(b))))
+
+/* notice time_bench is limited to U32_MAX nr loops */
+static unsigned long loops = 10000000;
+module_param(loops, ulong, 0);
+MODULE_PARM_DESC(loops, "Specify loops bench will run");
+
+/* Timing at the nanosec level, we need to know the overhead
+ * introduced by the for loop itself
+ */
+static int time_bench_for_loop(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ int i;
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+ }
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+static int time_bench_atomic_inc(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ atomic_t cnt;
+ int i;
+
+ atomic_set(&cnt, 0);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ atomic_inc(&cnt);
+ barrier(); /* avoid compiler to optimize this loop */
+ }
+ loops_cnt = atomic_read(&cnt);
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+/* The ptr_ping in page_pool uses a spinlock. We need to know the minimum
+ * overhead of taking+releasing a spinlock, to know the cycles that can be saved
+ * by e.g. amortizing this via bulking.
+ */
+static int time_bench_lock(struct time_bench_record *rec, void *data)
+{
+ uint64_t loops_cnt = 0;
+ spinlock_t lock;
+ int i;
+
+ spin_lock_init(&lock);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ spin_lock(&lock);
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+ spin_unlock(&lock);
+ }
+ time_bench_stop(rec, loops_cnt);
+ return loops_cnt;
+}
+
+/* Helper for filling some page's into ptr_ring */
+static void pp_fill_ptr_ring(struct page_pool *pp, int elems)
+{
+ /* GFP_ATOMIC needed when under run softirq */
+ gfp_t gfp_mask = GFP_ATOMIC;
+ struct page **array;
+ int i;
+
+ array = kcalloc(elems, sizeof(struct page *), gfp_mask);
+
+ for (i = 0; i < elems; i++)
+ array[i] = page_pool_alloc_pages(pp, gfp_mask);
+ for (i = 0; i < elems; i++)
+ page_pool_put_page(pp, array[i], -1, false);
+
+ kfree(array);
+}
+
+enum test_type { type_fast_path, type_ptr_ring, type_page_allocator };
+
+/* Depends on compile optimizing this function */
+static int time_bench_page_pool(struct time_bench_record *rec, void *data,
+ enum test_type type, const char *func)
+{
+ uint64_t loops_cnt = 0;
+ gfp_t gfp_mask = GFP_ATOMIC; /* GFP_ATOMIC is not really needed */
+ int i, err;
+
+ struct page_pool *pp;
+ struct page *page;
+
+ struct page_pool_params pp_params = {
+ .order = 0,
+ .flags = 0,
+ .pool_size = MY_POOL_SIZE,
+ .nid = NUMA_NO_NODE,
+ .dev = NULL, /* Only use for DMA mapping */
+ .dma_dir = DMA_BIDIRECTIONAL,
+ };
+
+ pp = page_pool_create(&pp_params);
+ if (IS_ERR(pp)) {
+ err = PTR_ERR(pp);
+ pr_warn("%s: Error(%d) creating page_pool\n", func, err);
+ goto out;
+ }
+ pp_fill_ptr_ring(pp, 64);
+
+ if (in_serving_softirq())
+ pr_warn("%s(): in_serving_softirq fast-path\n", func);
+ else
+ pr_warn("%s(): Cannot use page_pool fast-path\n", func);
+
+ time_bench_start(rec);
+ /** Loop to measure **/
+ for (i = 0; i < rec->loops; i++) {
+ /* Common fast-path alloc that depend on in_serving_softirq() */
+ page = page_pool_alloc_pages(pp, gfp_mask);
+ if (!page)
+ break;
+ loops_cnt++;
+ barrier(); /* avoid compiler to optimize this loop */
+
+ /* The benchmarks purpose it to test different return paths.
+ * Compiler should inline optimize other function calls out
+ */
+ if (type == type_fast_path) {
+ /* Fast-path recycling e.g. XDP_DROP use-case */
+ page_pool_recycle_direct(pp, page);
+
+ } else if (type == type_ptr_ring) {
+ /* Normal return path */
+ page_pool_put_page(pp, page, -1, false);
+
+ } else if (type == type_page_allocator) {
+ /* Test if not pages are recycled, but instead
+ * returned back into systems page allocator
+ */
+ get_page(page); /* cause no-recycling */
+ page_pool_put_page(pp, page, -1, false);
+ put_page(page);
+ } else {
+ BUILD_BUG();
+ }
+ }
+ time_bench_stop(rec, loops_cnt);
+out:
+ page_pool_destroy(pp);
+ return loops_cnt;
+}
+
+static int time_bench_page_pool01_fast_path(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_fast_path, __func__);
+}
+
+static int time_bench_page_pool02_ptr_ring(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_ptr_ring, __func__);
+}
+
+static int time_bench_page_pool03_slow(struct time_bench_record *rec,
+ void *data)
+{
+ return time_bench_page_pool(rec, data, type_page_allocator, __func__);
+}
+
+static int run_benchmark_tests(void)
+{
+ uint32_t nr_loops = loops;
+
+ /* Baseline tests */
+ if (enabled(bit_run_bench_baseline)) {
+ time_bench_loop(nr_loops * 10, 0, "for_loop", NULL,
+ time_bench_for_loop);
+ time_bench_loop(nr_loops * 10, 0, "atomic_inc", NULL,
+ time_bench_atomic_inc);
+ time_bench_loop(nr_loops, 0, "lock", NULL, time_bench_lock);
+ }
+
+ /* This test cannot activate correct code path, due to no-softirq ctx */
+ if (enabled(bit_run_bench_no_softirq01))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool01", NULL,
+ time_bench_page_pool01_fast_path);
+ if (enabled(bit_run_bench_no_softirq02))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool02", NULL,
+ time_bench_page_pool02_ptr_ring);
+ if (enabled(bit_run_bench_no_softirq03))
+ time_bench_loop(nr_loops, 0, "no-softirq-page_pool03", NULL,
+ time_bench_page_pool03_slow);
+
+ return 0;
+}
+
+static int __init bench_page_pool_simple_module_init(void)
+{
+ if (verbose)
+ pr_info("Loaded\n");
+
+ if (loops > U32_MAX) {
+ pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n", loops,
+ U32_MAX);
+ return -ECHRNG;
+ }
+
+ run_benchmark_tests();
+
+ return 0;
+}
+module_init(bench_page_pool_simple_module_init);
+
+static void __exit bench_page_pool_simple_module_exit(void)
+{
+ if (verbose)
+ pr_info("Unloaded\n");
+}
+module_exit(bench_page_pool_simple_module_exit);
+
+MODULE_DESCRIPTION("Benchmark of page_pool simple cases");
+MODULE_AUTHOR("Jesper Dangaard Brouer <netoptimizer@brouer.com>");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.c b/tools/testing/selftests/net/bench/page_pool/time_bench.c
new file mode 100644
index 000000000000..073bb36ec5f2
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/time_bench.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Benchmarking code execution time inside the kernel
+ *
+ * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/time.h>
+
+#include <linux/perf_event.h> /* perf_event_create_kernel_counter() */
+
+/* For concurrency testing */
+#include <linux/completion.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include <linux/kthread.h>
+
+#include "time_bench.h"
+
+static int verbose = 1;
+
+/** TSC (Time-Stamp Counter) based **
+ * See: linux/time_bench.h
+ * tsc_start_clock() and tsc_stop_clock()
+ */
+
+/** Wall-clock based **
+ */
+
+/** PMU (Performance Monitor Unit) based **
+ */
+#define PERF_FORMAT \
+ (PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \
+ PERF_FORMAT_TOTAL_TIME_RUNNING)
+
+struct raw_perf_event {
+ uint64_t config; /* event */
+ uint64_t config1; /* umask */
+ struct perf_event *save;
+ char *desc;
+};
+
+/* if HT is enable a maximum of 4 events (5 if one is instructions
+ * retired can be specified, if HT is disabled a maximum of 8 (9 if
+ * one is instructions retired) can be specified.
+ *
+ * From Table 19-1. Architectural Performance Events
+ * Architectures Software Developer’s Manual Volume 3: System Programming
+ * Guide
+ */
+struct raw_perf_event perf_events[] = {
+ { 0x3c, 0x00, NULL, "Unhalted CPU Cycles" },
+ { 0xc0, 0x00, NULL, "Instruction Retired" }
+};
+
+#define NUM_EVTS (ARRAY_SIZE(perf_events))
+
+/* WARNING: PMU config is currently broken!
+ */
+bool time_bench_PMU_config(bool enable)
+{
+ int i;
+ struct perf_event_attr perf_conf;
+ struct perf_event *perf_event;
+ int cpu;
+
+ preempt_disable();
+ cpu = smp_processor_id();
+ pr_info("DEBUG: cpu:%d\n", cpu);
+ preempt_enable();
+
+ memset(&perf_conf, 0, sizeof(struct perf_event_attr));
+ perf_conf.type = PERF_TYPE_RAW;
+ perf_conf.size = sizeof(struct perf_event_attr);
+ perf_conf.read_format = PERF_FORMAT;
+ perf_conf.pinned = 1;
+ perf_conf.exclude_user = 1; /* No userspace events */
+ perf_conf.exclude_kernel = 0; /* Only kernel events */
+
+ for (i = 0; i < NUM_EVTS; i++) {
+ perf_conf.disabled = enable;
+ //perf_conf.disabled = (i == 0) ? 1 : 0;
+ perf_conf.config = perf_events[i].config;
+ perf_conf.config1 = perf_events[i].config1;
+ if (verbose)
+ pr_info("%s() enable PMU counter: %s\n",
+ __func__, perf_events[i].desc);
+ perf_event = perf_event_create_kernel_counter(&perf_conf, cpu,
+ NULL /* task */,
+ NULL /* overflow_handler*/,
+ NULL /* context */);
+ if (perf_event) {
+ perf_events[i].save = perf_event;
+ pr_info("%s():DEBUG perf_event success\n", __func__);
+
+ perf_event_enable(perf_event);
+ } else {
+ pr_info("%s():DEBUG perf_event is NULL\n", __func__);
+ }
+ }
+
+ return true;
+}
+
+/** Generic functions **
+ */
+
+/* Calculate stats, store results in record */
+bool time_bench_calc_stats(struct time_bench_record *rec)
+{
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+ uint64_t ns_per_call_tmp_rem = 0;
+ uint32_t ns_per_call_remainder = 0;
+ uint64_t pmc_ipc_tmp_rem = 0;
+ uint32_t pmc_ipc_remainder = 0;
+ uint32_t pmc_ipc_div = 0;
+ uint32_t invoked_cnt_precision = 0;
+ uint32_t invoked_cnt = 0; /* 32-bit due to div_u64_rem() */
+
+ if (rec->flags & TIME_BENCH_LOOP) {
+ if (rec->invoked_cnt < 1000) {
+ pr_err("ERR: need more(>1000) loops(%llu) for timing\n",
+ rec->invoked_cnt);
+ return false;
+ }
+ if (rec->invoked_cnt > ((1ULL << 32) - 1)) {
+ /* div_u64_rem() can only support div with 32bit*/
+ pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n",
+ rec->invoked_cnt);
+ return false;
+ }
+ invoked_cnt = (uint32_t)rec->invoked_cnt;
+ }
+
+ /* TSC (Time-Stamp Counter) records */
+ if (rec->flags & TIME_BENCH_TSC) {
+ rec->tsc_interval = rec->tsc_stop - rec->tsc_start;
+ if (rec->tsc_interval == 0) {
+ pr_err("ABORT: timing took ZERO TSC time\n");
+ return false;
+ }
+ /* Calculate stats */
+ if (rec->flags & TIME_BENCH_LOOP)
+ rec->tsc_cycles = rec->tsc_interval / invoked_cnt;
+ else
+ rec->tsc_cycles = rec->tsc_interval;
+ }
+
+ /* Wall-clock time calc */
+ if (rec->flags & TIME_BENCH_WALLCLOCK) {
+ rec->time_start = rec->ts_start.tv_nsec +
+ (NANOSEC_PER_SEC * rec->ts_start.tv_sec);
+ rec->time_stop = rec->ts_stop.tv_nsec +
+ (NANOSEC_PER_SEC * rec->ts_stop.tv_sec);
+ rec->time_interval = rec->time_stop - rec->time_start;
+ if (rec->time_interval == 0) {
+ pr_err("ABORT: timing took ZERO wallclock time\n");
+ return false;
+ }
+ /* Calculate stats */
+ /*** Division in kernel it tricky ***/
+ /* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */
+ /* remainder only correct because NANOSEC_PER_SEC is 10^9 */
+ rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC,
+ &rec->time_sec_remainder);
+ //TODO: use existing struct timespec records instead of div?
+
+ if (rec->flags & TIME_BENCH_LOOP) {
+ /*** Division in kernel it tricky ***/
+ /* Orig: ns = ((double)time_interval / invoked_cnt); */
+ /* First get quotient */
+ rec->ns_per_call_quotient =
+ div_u64_rem(rec->time_interval, invoked_cnt,
+ &ns_per_call_remainder);
+ /* Now get decimals .xxx precision (incorrect roundup)*/
+ ns_per_call_tmp_rem = ns_per_call_remainder;
+ invoked_cnt_precision = invoked_cnt / 1000;
+ if (invoked_cnt_precision > 0) {
+ rec->ns_per_call_decimal =
+ div_u64_rem(ns_per_call_tmp_rem,
+ invoked_cnt_precision,
+ &ns_per_call_remainder);
+ }
+ }
+ }
+
+ /* Performance Monitor Unit (PMU) counters */
+ if (rec->flags & TIME_BENCH_PMU) {
+ //FIXME: Overflow handling???
+ rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start;
+ rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start;
+
+ /* Calc Instruction Per Cycle (IPC) */
+ /* First get quotient */
+ rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk,
+ &pmc_ipc_remainder);
+ /* Now get decimals .xxx precision (incorrect roundup)*/
+ pmc_ipc_tmp_rem = pmc_ipc_remainder;
+ pmc_ipc_div = rec->pmc_clk / 1000;
+ if (pmc_ipc_div > 0) {
+ rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem,
+ pmc_ipc_div,
+ &pmc_ipc_remainder);
+ }
+ }
+
+ return true;
+}
+
+/* Generic function for invoking a loop function and calculating
+ * execution time stats. The function being called/timed is assumed
+ * to perform a tight loop, and update the timing record struct.
+ */
+bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
+ int (*func)(struct time_bench_record *record, void *data))
+{
+ struct time_bench_record rec;
+
+ /* Setup record */
+ memset(&rec, 0, sizeof(rec)); /* zero func might not update all */
+ rec.version_abi = 1;
+ rec.loops = loops;
+ rec.step = step;
+ rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK);
+
+ /*** Loop function being timed ***/
+ if (!func(&rec, data)) {
+ pr_err("ABORT: function being timed failed\n");
+ return false;
+ }
+
+ if (rec.invoked_cnt < loops)
+ pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n",
+ rec.invoked_cnt, loops);
+
+ /* Calculate stats */
+ time_bench_calc_stats(&rec);
+
+ pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
+ txt, rec.tsc_cycles, rec.ns_per_call_quotient,
+ rec.ns_per_call_decimal, rec.step, rec.time_sec,
+ rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt,
+ rec.tsc_interval);
+ if (rec.flags & TIME_BENCH_PMU)
+ pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n",
+ txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient,
+ rec.pmc_ipc_decimal);
+ return true;
+}
+
+/* Function getting invoked by kthread */
+static int invoke_test_on_cpu_func(void *private)
+{
+ struct time_bench_cpu *cpu = private;
+ struct time_bench_sync *sync = cpu->sync;
+ cpumask_t newmask = CPU_MASK_NONE;
+ void *data = cpu->data;
+
+ /* Restrict CPU */
+ cpumask_set_cpu(cpu->rec.cpu, &newmask);
+ set_cpus_allowed_ptr(current, &newmask);
+
+ /* Synchronize start of concurrency test */
+ atomic_inc(&sync->nr_tests_running);
+ wait_for_completion(&sync->start_event);
+
+ /* Start benchmark function */
+ if (!cpu->bench_func(&cpu->rec, data)) {
+ pr_err("ERROR: function being timed failed on CPU:%d(%d)\n",
+ cpu->rec.cpu, smp_processor_id());
+ } else {
+ if (verbose)
+ pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu,
+ smp_processor_id());
+ }
+ cpu->did_bench_run = true;
+
+ /* End test */
+ atomic_dec(&sync->nr_tests_running);
+ /* Wait for kthread_stop() telling us to stop */
+ while (!kthread_should_stop()) {
+ set_current_state(TASK_INTERRUPTIBLE);
+ schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+ return 0;
+}
+
+void time_bench_print_stats_cpumask(const char *desc,
+ struct time_bench_cpu *cpu_tasks,
+ const struct cpumask *mask)
+{
+ uint64_t average = 0;
+ int cpu;
+ int step = 0;
+ struct sum {
+ uint64_t tsc_cycles;
+ int records;
+ } sum = { 0 };
+
+ /* Get stats */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+ struct time_bench_record *rec = &c->rec;
+
+ /* Calculate stats */
+ time_bench_calc_stats(rec);
+
+ pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
+ desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient,
+ rec->ns_per_call_decimal, rec->step, rec->time_sec,
+ rec->time_sec_remainder, rec->time_interval,
+ rec->invoked_cnt, rec->tsc_interval);
+
+ /* Collect average */
+ sum.records++;
+ sum.tsc_cycles += rec->tsc_cycles;
+ step = rec->step;
+ }
+
+ if (sum.records) /* avoid div-by-zero */
+ average = sum.tsc_cycles / sum.records;
+ pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc,
+ average, sum.records, step);
+}
+
+void time_bench_run_concurrent(uint32_t loops, int step, void *data,
+ const struct cpumask *mask, /* Support masking outsome CPUs*/
+ struct time_bench_sync *sync,
+ struct time_bench_cpu *cpu_tasks,
+ int (*func)(struct time_bench_record *record, void *data))
+{
+ int cpu, running = 0;
+
+ if (verbose) // DEBUG
+ pr_warn("%s() Started on CPU:%d\n", __func__,
+ smp_processor_id());
+
+ /* Reset sync conditions */
+ atomic_set(&sync->nr_tests_running, 0);
+ init_completion(&sync->start_event);
+
+ /* Spawn off jobs on all CPUs */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+
+ running++;
+ c->sync = sync; /* Send sync variable along */
+ c->data = data; /* Send opaque along */
+
+ /* Init benchmark record */
+ memset(&c->rec, 0, sizeof(struct time_bench_record));
+ c->rec.version_abi = 1;
+ c->rec.loops = loops;
+ c->rec.step = step;
+ c->rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC |
+ TIME_BENCH_WALLCLOCK);
+ c->rec.cpu = cpu;
+ c->bench_func = func;
+ c->task = kthread_run(invoke_test_on_cpu_func, c,
+ "time_bench%d", cpu);
+ if (IS_ERR(c->task)) {
+ pr_err("%s(): Failed to start test func\n", __func__);
+ return; /* Argh, what about cleanup?! */
+ }
+ }
+
+ /* Wait until all processes are running */
+ while (atomic_read(&sync->nr_tests_running) < running) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(10);
+ }
+ /* Kick off all CPU concurrently on completion event */
+ complete_all(&sync->start_event);
+
+ /* Wait for CPUs to finish */
+ while (atomic_read(&sync->nr_tests_running)) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ schedule_timeout(10);
+ }
+
+ /* Stop the kthreads */
+ for_each_cpu(cpu, mask) {
+ struct time_bench_cpu *c = &cpu_tasks[cpu];
+
+ kthread_stop(c->task);
+ }
+
+ if (verbose) // DEBUG - happens often, finish on another CPU
+ pr_warn("%s() Finished on CPU:%d\n", __func__,
+ smp_processor_id());
+}
diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.h b/tools/testing/selftests/net/bench/page_pool/time_bench.h
new file mode 100644
index 000000000000..e113fcf341dc
--- /dev/null
+++ b/tools/testing/selftests/net/bench/page_pool/time_bench.h
@@ -0,0 +1,238 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Benchmarking code execution time inside the kernel
+ *
+ * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer
+ * for licensing details see kernel-base/COPYING
+ */
+#ifndef _LINUX_TIME_BENCH_H
+#define _LINUX_TIME_BENCH_H
+
+/* Main structure used for recording a benchmark run */
+struct time_bench_record {
+ uint32_t version_abi;
+ uint32_t loops; /* Requested loop invocations */
+ uint32_t step; /* option for e.g. bulk invocations */
+
+ uint32_t flags; /* Measurements types enabled */
+#define TIME_BENCH_LOOP BIT(0)
+#define TIME_BENCH_TSC BIT(1)
+#define TIME_BENCH_WALLCLOCK BIT(2)
+#define TIME_BENCH_PMU BIT(3)
+
+ uint32_t cpu; /* Used when embedded in time_bench_cpu */
+
+ /* Records */
+ uint64_t invoked_cnt; /* Returned actual invocations */
+ uint64_t tsc_start;
+ uint64_t tsc_stop;
+ struct timespec64 ts_start;
+ struct timespec64 ts_stop;
+ /* PMU counters for instruction and cycles
+ * instructions counter including pipelined instructions
+ */
+ uint64_t pmc_inst_start;
+ uint64_t pmc_inst_stop;
+ /* CPU unhalted clock counter */
+ uint64_t pmc_clk_start;
+ uint64_t pmc_clk_stop;
+
+ /* Result records */
+ uint64_t tsc_interval;
+ uint64_t time_start, time_stop, time_interval; /* in nanosec */
+ uint64_t pmc_inst, pmc_clk;
+
+ /* Derived result records */
+ uint64_t tsc_cycles; // +decimal?
+ uint64_t ns_per_call_quotient, ns_per_call_decimal;
+ uint64_t time_sec;
+ uint32_t time_sec_remainder;
+ uint64_t pmc_ipc_quotient, pmc_ipc_decimal; /* inst per cycle */
+};
+
+/* For synchronizing parallel CPUs to run concurrently */
+struct time_bench_sync {
+ atomic_t nr_tests_running;
+ struct completion start_event;
+};
+
+/* Keep track of CPUs executing our bench function.
+ *
+ * Embed a time_bench_record for storing info per cpu
+ */
+struct time_bench_cpu {
+ struct time_bench_record rec;
+ struct time_bench_sync *sync; /* back ptr */
+ struct task_struct *task;
+ /* "data" opaque could have been placed in time_bench_sync,
+ * but to avoid any false sharing, place it per CPU
+ */
+ void *data;
+ /* Support masking outsome CPUs, mark if it ran */
+ bool did_bench_run;
+ /* int cpu; // note CPU stored in time_bench_record */
+ int (*bench_func)(struct time_bench_record *record, void *data);
+};
+
+/*
+ * Below TSC assembler code is not compatible with other archs, and
+ * can also fail on guests if cpu-flags are not correct.
+ *
+ * The way TSC reading is used, many iterations, does not require as
+ * high accuracy as described below (in Intel Doc #324264).
+ *
+ * Considering changing to use get_cycles() (#include <asm/timex.h>).
+ */
+
+/** TSC (Time-Stamp Counter) based **
+ * Recommend reading, to understand details of reading TSC accurately:
+ * Intel Doc #324264, "How to Benchmark Code Execution Times on Intel"
+ *
+ * Consider getting exclusive ownership of CPU by using:
+ * unsigned long flags;
+ * preempt_disable();
+ * raw_local_irq_save(flags);
+ * _your_code_
+ * raw_local_irq_restore(flags);
+ * preempt_enable();
+ *
+ * Clobbered registers: "%rax", "%rbx", "%rcx", "%rdx"
+ * RDTSC only change "%rax" and "%rdx" but
+ * CPUID clears the high 32-bits of all (rax/rbx/rcx/rdx)
+ */
+static __always_inline uint64_t tsc_start_clock(void)
+{
+ /* See: Intel Doc #324264 */
+ unsigned int hi, lo;
+
+ asm volatile("CPUID\n\t"
+ "RDTSC\n\t"
+ "mov %%edx, %0\n\t"
+ "mov %%eax, %1\n\t"
+ : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
+ //FIXME: on 32bit use clobbered %eax + %edx
+ return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+static __always_inline uint64_t tsc_stop_clock(void)
+{
+ /* See: Intel Doc #324264 */
+ unsigned int hi, lo;
+
+ asm volatile("RDTSCP\n\t"
+ "mov %%edx, %0\n\t"
+ "mov %%eax, %1\n\t"
+ "CPUID\n\t"
+ : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx");
+ return ((uint64_t)lo) | (((uint64_t)hi) << 32);
+}
+
+/** Wall-clock based **
+ *
+ * use: getnstimeofday()
+ * getnstimeofday(&rec->ts_start);
+ * getnstimeofday(&rec->ts_stop);
+ *
+ * API changed see: Documentation/core-api/timekeeping.rst
+ * https://www.kernel.org/doc/html/latest/core-api/timekeeping.html#c.getnstimeofday
+ *
+ * We should instead use: ktime_get_real_ts64() is a direct
+ * replacement, but consider using monotonic time (ktime_get_ts64())
+ * and/or a ktime_t based interface (ktime_get()/ktime_get_real()).
+ */
+
+/** PMU (Performance Monitor Unit) based **
+ *
+ * Needed for calculating: Instructions Per Cycle (IPC)
+ * - The IPC number tell how efficient the CPU pipelining were
+ */
+//lookup: perf_event_create_kernel_counter()
+
+bool time_bench_PMU_config(bool enable);
+
+/* Raw reading via rdpmc() using fixed counters
+ *
+ * From: https://github.com/andikleen/simple-pmu
+ */
+enum {
+ FIXED_SELECT = (1U << 30), /* == 0x40000000 */
+ FIXED_INST_RETIRED_ANY = 0,
+ FIXED_CPU_CLK_UNHALTED_CORE = 1,
+ FIXED_CPU_CLK_UNHALTED_REF = 2,
+};
+
+static __always_inline unsigned int long long p_rdpmc(unsigned int in)
+{
+ unsigned int d, a;
+
+ asm volatile("rdpmc" : "=d"(d), "=a"(a) : "c"(in) : "memory");
+ return ((unsigned long long)d << 32) | a;
+}
+
+/* These PMU counter needs to be enabled, but I don't have the
+ * configure code implemented. My current hack is running:
+ * sudo perf stat -e cycles:k -e instructions:k insmod lib/ring_queue_test.ko
+ */
+/* Reading all pipelined instruction */
+static __always_inline unsigned long long pmc_inst(void)
+{
+ return p_rdpmc(FIXED_SELECT | FIXED_INST_RETIRED_ANY);
+}
+
+/* Reading CPU clock cycles */
+static __always_inline unsigned long long pmc_clk(void)
+{
+ return p_rdpmc(FIXED_SELECT | FIXED_CPU_CLK_UNHALTED_CORE);
+}
+
+/* Raw reading via MSR rdmsr() is likely wrong
+ * FIXME: How can I know which raw MSR registers are conf for what?
+ */
+#define MSR_IA32_PCM0 0x400000C1 /* PERFCTR0 */
+#define MSR_IA32_PCM1 0x400000C2 /* PERFCTR1 */
+#define MSR_IA32_PCM2 0x400000C3
+static inline uint64_t msr_inst(unsigned long long *msr_result)
+{
+ return rdmsrq_safe(MSR_IA32_PCM0, msr_result);
+}
+
+/** Generic functions **
+ */
+bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
+ int (*func)(struct time_bench_record *rec, void *data));
+bool time_bench_calc_stats(struct time_bench_record *rec);
+
+void time_bench_run_concurrent(uint32_t loops, int step, void *data,
+ const struct cpumask *mask, /* Support masking outsome CPUs*/
+ struct time_bench_sync *sync, struct time_bench_cpu *cpu_tasks,
+ int (*func)(struct time_bench_record *record, void *data));
+void time_bench_print_stats_cpumask(const char *desc,
+ struct time_bench_cpu *cpu_tasks,
+ const struct cpumask *mask);
+
+//FIXME: use rec->flags to select measurement, should be MACRO
+static __always_inline void time_bench_start(struct time_bench_record *rec)
+{
+ //getnstimeofday(&rec->ts_start);
+ ktime_get_real_ts64(&rec->ts_start);
+ if (rec->flags & TIME_BENCH_PMU) {
+ rec->pmc_inst_start = pmc_inst();
+ rec->pmc_clk_start = pmc_clk();
+ }
+ rec->tsc_start = tsc_start_clock();
+}
+
+static __always_inline void time_bench_stop(struct time_bench_record *rec,
+ uint64_t invoked_cnt)
+{
+ rec->tsc_stop = tsc_stop_clock();
+ if (rec->flags & TIME_BENCH_PMU) {
+ rec->pmc_inst_stop = pmc_inst();
+ rec->pmc_clk_stop = pmc_clk();
+ }
+ //getnstimeofday(&rec->ts_stop);
+ ktime_get_real_ts64(&rec->ts_stop);
+ rec->invoked_cnt = invoked_cnt;
+}
+
+#endif /* _LINUX_TIME_BENCH_H */
diff --git a/tools/testing/selftests/net/bench/test_bench_page_pool.sh b/tools/testing/selftests/net/bench/test_bench_page_pool.sh
new file mode 100755
index 000000000000..7b8b18cfedce
--- /dev/null
+++ b/tools/testing/selftests/net/bench/test_bench_page_pool.sh
@@ -0,0 +1,32 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+set -e
+
+DRIVER="./page_pool/bench_page_pool.ko"
+result=""
+
+function run_test()
+{
+ rmmod "bench_page_pool.ko" || true
+ insmod $DRIVER > /dev/null 2>&1
+ result=$(dmesg | tail -10)
+ echo "$result"
+
+ echo
+ echo "Fast path results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool01 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+
+ echo
+ echo "ptr_ring results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool02 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+
+ echo
+ echo "slow path results:"
+ echo "${result}" | grep -o -E "no-softirq-page_pool03 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns"
+}
+
+run_test
+
+exit 0
diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c
index 57ff67a3751e..da04b0b19b73 100644
--- a/tools/testing/selftests/net/bind_bhash.c
+++ b/tools/testing/selftests/net/bind_bhash.c
@@ -75,7 +75,7 @@ static void *setup(void *arg)
int *array = (int *)arg;
for (i = 0; i < MAX_CONNECTIONS; i++) {
- sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr);
+ sock_fd = bind_socket(SO_REUSEPORT, setup_addr);
if (sock_fd < 0) {
ret = sock_fd;
pthread_exit(&ret);
@@ -103,7 +103,7 @@ int main(int argc, const char *argv[])
setup_addr = use_v6 ? setup_addr_v6 : setup_addr_v4;
- listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr);
+ listener_fd = bind_socket(SO_REUSEPORT, setup_addr);
if (listen(listener_fd, 100) < 0) {
perror("listen failed");
return -1;
diff --git a/tools/testing/selftests/net/bind_timewait.c b/tools/testing/selftests/net/bind_timewait.c
index cb9fdf51ea59..40126f9b901e 100644
--- a/tools/testing/selftests/net/bind_timewait.c
+++ b/tools/testing/selftests/net/bind_timewait.c
@@ -4,7 +4,7 @@
#include <sys/socket.h>
#include <netinet/in.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(bind_timewait)
{
diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c
index b7b54d646b93..7d11548b2c61 100644
--- a/tools/testing/selftests/net/bind_wildcard.c
+++ b/tools/testing/selftests/net/bind_wildcard.c
@@ -4,7 +4,7 @@
#include <sys/socket.h>
#include <netinet/in.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static const __u32 in4addr_any = INADDR_ANY;
static const __u32 in4addr_loopback = INADDR_LOOPBACK;
diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py
index d10f420e4ef6..c856d266c8f3 100755
--- a/tools/testing/selftests/net/bpf_offload.py
+++ b/tools/testing/selftests/net/bpf_offload.py
@@ -184,8 +184,8 @@ def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True):
progs = [ p for p in progs if not p['orphaned'] ]
if expected is not None:
if len(progs) != expected:
- fail(True, "%d BPF programs loaded, expected %d" %
- (len(progs), expected))
+ fail(True, "%d BPF programs loaded, expected %d\nLoaded Progs:\n%s" %
+ (len(progs), expected, pp.pformat(progs)))
return progs
def bpftool_map_list(expected=None, ns=""):
@@ -207,20 +207,24 @@ def bpftool_prog_list_wait(expected=0, n_retry=20):
raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
def bpftool_map_list_wait(expected=0, n_retry=20, ns=""):
+ nmaps = None
for i in range(n_retry):
maps = bpftool_map_list(ns=ns)
- if len(maps) == expected:
+ nmaps = len(maps)
+ if nmaps == expected:
return maps
time.sleep(0.05)
raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None,
- fail=True, include_stderr=False):
+ fail=True, include_stderr=False, dev_bind=None):
args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name)
if prog_type is not None:
args += " type " + prog_type
if dev is not None:
args += " dev " + dev
+ elif dev_bind is not None:
+ args += " xdpmeta_dev " + dev_bind
if len(maps):
args += " map " + " map ".join(maps)
@@ -708,6 +712,7 @@ _, base_maps = bpftool("map")
base_map_names = [
'pid_iter.rodata', # created on each bpftool invocation
'libbpf_det_bind', # created on each bpftool invocation
+ 'libbpf_global',
]
# Check netdevsim
@@ -980,6 +985,16 @@ try:
rm("/sys/fs/bpf/offload")
sim.wait_for_flush()
+ bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/devbound",
+ dev_bind=sim['ifname'])
+ devbound = bpf_pinned("/sys/fs/bpf/devbound")
+ start_test("Test dev-bound program in generic mode...")
+ ret, _, err = sim.set_xdp(devbound, "generic", fail=False, include_stderr=True)
+ fail(ret == 0, "devbound program in generic mode allowed")
+ check_extack(err, "Can't attach device-bound programs in generic mode.", args)
+ rm("/sys/fs/bpf/devbound")
+ sim.wait_for_flush()
+
start_test("Test XDP load failure...")
sim.dfs["dev/bpf_bind_verifier_accept"] = 0
ret, _, err = bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload",
diff --git a/tools/testing/selftests/net/broadcast_ether_dst.sh b/tools/testing/selftests/net/broadcast_ether_dst.sh
new file mode 100755
index 000000000000..334a7eca8a80
--- /dev/null
+++ b/tools/testing/selftests/net/broadcast_ether_dst.sh
@@ -0,0 +1,83 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Author: Brett A C Sheffield <bacs@librecast.net>
+# Author: Oscar Maes <oscmaes92@gmail.com>
+#
+# Ensure destination ethernet field is correctly set for
+# broadcast packets
+
+source lib.sh
+
+CLIENT_IP4="192.168.0.1"
+GW_IP4="192.168.0.2"
+
+setup() {
+ setup_ns CLIENT_NS SERVER_NS
+
+ ip -net "${SERVER_NS}" link add link1 type veth \
+ peer name link0 netns "${CLIENT_NS}"
+
+ ip -net "${CLIENT_NS}" link set link0 up
+ ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}"/24 dev link0
+
+ ip -net "${SERVER_NS}" link set link1 up
+
+ ip -net "${CLIENT_NS}" route add default via "${GW_IP4}"
+ ip netns exec "${CLIENT_NS}" arp -s "${GW_IP4}" 00:11:22:33:44:55
+}
+
+cleanup() {
+ rm -f "${CAPFILE}" "${OUTPUT}"
+ ip -net "${SERVER_NS}" link del link1
+ cleanup_ns "${CLIENT_NS}" "${SERVER_NS}"
+}
+
+test_broadcast_ether_dst() {
+ local rc=0
+ CAPFILE=$(mktemp -u cap.XXXXXXXXXX)
+ OUTPUT=$(mktemp -u out.XXXXXXXXXX)
+
+ echo "Testing ethernet broadcast destination"
+
+ # start tcpdump listening for icmp
+ # tcpdump will exit after receiving a single packet
+ # timeout will kill tcpdump if it is still running after 2s
+ timeout 2s ip netns exec "${CLIENT_NS}" \
+ tcpdump -i link0 -c 1 -w "${CAPFILE}" icmp &> "${OUTPUT}" &
+ pid=$!
+ slowwait 1 grep -qs "listening" "${OUTPUT}"
+
+ # send broadcast ping
+ ip netns exec "${CLIENT_NS}" \
+ ping -W0.01 -c1 -b 255.255.255.255 &> /dev/null
+
+ # wait for tcpdump for exit after receiving packet
+ wait "${pid}"
+
+ # compare ethernet destination field to ff:ff:ff:ff:ff:ff
+ ether_dst=$(tcpdump -r "${CAPFILE}" -tnne 2>/dev/null | \
+ awk '{sub(/,/,"",$3); print $3}')
+ if [[ "${ether_dst}" == "ff:ff:ff:ff:ff:ff" ]]; then
+ echo "[ OK ]"
+ rc="${ksft_pass}"
+ else
+ echo "[FAIL] expected dst ether addr to be ff:ff:ff:ff:ff:ff," \
+ "got ${ether_dst}"
+ rc="${ksft_fail}"
+ fi
+
+ return "${rc}"
+}
+
+if [ ! -x "$(command -v tcpdump)" ]; then
+ echo "SKIP: Could not run test without tcpdump tool"
+ exit "${ksft_skip}"
+fi
+
+trap cleanup EXIT
+
+setup
+test_broadcast_ether_dst
+
+exit $?
diff --git a/tools/testing/selftests/net/broadcast_pmtu.sh b/tools/testing/selftests/net/broadcast_pmtu.sh
new file mode 100755
index 000000000000..726eb5d25839
--- /dev/null
+++ b/tools/testing/selftests/net/broadcast_pmtu.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Ensures broadcast route MTU is respected
+
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+CLIENT_IP4="192.168.0.1/24"
+CLIENT_BROADCAST_ADDRESS="192.168.0.255"
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+SERVER_IP4="192.168.0.2/24"
+
+setup() {
+ ip netns add "${CLIENT_NS}"
+ ip netns add "${SERVER_NS}"
+
+ ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}"
+
+ ip -net "${CLIENT_NS}" link set link0 up
+ ip -net "${CLIENT_NS}" link set link0 mtu 9000
+ ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}" dev link0
+
+ ip -net "${SERVER_NS}" link set link1 up
+ ip -net "${SERVER_NS}" link set link1 mtu 1500
+ ip -net "${SERVER_NS}" addr add "${SERVER_IP4}" dev link1
+
+ read -r -a CLIENT_BROADCAST_ENTRY <<< "$(ip -net "${CLIENT_NS}" route show table local type broadcast)"
+ ip -net "${CLIENT_NS}" route del "${CLIENT_BROADCAST_ENTRY[@]}"
+ ip -net "${CLIENT_NS}" route add "${CLIENT_BROADCAST_ENTRY[@]}" mtu 1500
+
+ ip net exec "${SERVER_NS}" sysctl -wq net.ipv4.icmp_echo_ignore_broadcasts=0
+}
+
+cleanup() {
+ ip -net "${SERVER_NS}" link del link1
+ ip netns del "${CLIENT_NS}"
+ ip netns del "${SERVER_NS}"
+}
+
+trap cleanup EXIT
+
+setup &&
+ echo "Testing for broadcast route MTU" &&
+ ip net exec "${CLIENT_NS}" ping -f -M want -q -c 1 -s 8000 -w 1 -b "${CLIENT_BROADCAST_ADDRESS}" > /dev/null 2>&1
+
+exit $?
+
diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh
index 7db292ec4884..5ec1c85c1623 100755
--- a/tools/testing/selftests/net/busy_poll_test.sh
+++ b/tools/testing/selftests/net/busy_poll_test.sh
@@ -1,6 +1,6 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-source net_helper.sh
+source lib.sh
NSIM_SV_ID=$((256 + RANDOM % 256))
NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID
@@ -27,6 +27,8 @@ NAPI_DEFER_HARD_IRQS=100
GRO_FLUSH_TIMEOUT=50000
SUSPEND_TIMEOUT=20000000
+NAPI_THREADED_MODE_BUSY_POLL=2
+
setup_ns()
{
set -e
@@ -62,6 +64,9 @@ cleanup_ns()
test_busypoll()
{
suspend_value=${1:-0}
+ napi_threaded_value=${2:-0}
+ prefer_busy_poll_value=${3:-$PREFER_BUSY_POLL}
+
tmp_file=$(mktemp)
out_file=$(mktemp)
@@ -73,10 +78,11 @@ test_busypoll()
-b${SERVER_IP} \
-m${MAX_EVENTS} \
-u${BUSY_POLL_USECS} \
- -P${PREFER_BUSY_POLL} \
+ -P${prefer_busy_poll_value} \
-g${BUSY_POLL_BUDGET} \
-i${NSIM_SV_IFIDX} \
-s${suspend_value} \
+ -t${napi_threaded_value} \
-o${out_file}&
wait_local_port_listen nssv ${SERVER_PORT} tcp
@@ -109,6 +115,15 @@ test_busypoll_with_suspend()
return $?
}
+test_busypoll_with_napi_threaded()
+{
+ # Only enable napi threaded poll. Set suspend timeout and prefer busy
+ # poll to 0.
+ test_busypoll 0 ${NAPI_THREADED_MODE_BUSY_POLL} 0
+
+ return $?
+}
+
###
### Code start
###
@@ -154,6 +169,13 @@ if [ $? -ne 0 ]; then
exit 1
fi
+test_busypoll_with_napi_threaded
+if [ $? -ne 0 ]; then
+ echo "test_busypoll_with_napi_threaded failed"
+ cleanup_ns
+ exit 1
+fi
+
echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK
echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL
diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c
index 99b0e8c17fca..3a81f9c94795 100644
--- a/tools/testing/selftests/net/busy_poller.c
+++ b/tools/testing/selftests/net/busy_poller.c
@@ -54,63 +54,74 @@ struct epoll_params {
#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
#endif
-static uint32_t cfg_port = 8000;
+static uint16_t cfg_port = 8000;
static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY };
static char *cfg_outfile;
static int cfg_max_events = 8;
-static int cfg_ifindex;
+static uint32_t cfg_ifindex;
/* busy poll params */
static uint32_t cfg_busy_poll_usecs;
-static uint32_t cfg_busy_poll_budget;
-static uint32_t cfg_prefer_busy_poll;
+static uint16_t cfg_busy_poll_budget;
+static uint8_t cfg_prefer_busy_poll;
-/* IRQ params */
+/* NAPI params */
static uint32_t cfg_defer_hard_irqs;
static uint64_t cfg_gro_flush_timeout;
static uint64_t cfg_irq_suspend_timeout;
+static enum netdev_napi_threaded cfg_napi_threaded_poll = NETDEV_NAPI_THREADED_DISABLED;
static void usage(const char *filepath)
{
error(1, 0,
- "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -i<ifindex>",
+ "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -t<napi_threaded_poll> -i<ifindex>",
filepath);
}
static void parse_opts(int argc, char **argv)
{
+ unsigned long long tmp;
int ret;
int c;
if (argc <= 1)
usage(argv[0]);
- while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) {
+ while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:t:")) != -1) {
+ /* most options take integer values, except o and b, so reduce
+ * code duplication a bit for the common case by calling
+ * strtoull here and leave bounds checking and casting per
+ * option below.
+ */
+ if (c != 'o' && c != 'b')
+ tmp = strtoull(optarg, NULL, 0);
+
switch (c) {
case 'u':
- cfg_busy_poll_usecs = strtoul(optarg, NULL, 0);
- if (cfg_busy_poll_usecs == ULONG_MAX ||
- cfg_busy_poll_usecs > UINT32_MAX)
+ if (tmp == ULLONG_MAX || tmp > UINT32_MAX)
error(1, ERANGE, "busy_poll_usecs too large");
+
+ cfg_busy_poll_usecs = (uint32_t)tmp;
break;
case 'P':
- cfg_prefer_busy_poll = strtoul(optarg, NULL, 0);
- if (cfg_prefer_busy_poll == ULONG_MAX ||
- cfg_prefer_busy_poll > 1)
+ if (tmp == ULLONG_MAX || tmp > 1)
error(1, ERANGE,
"prefer busy poll should be 0 or 1");
+
+ cfg_prefer_busy_poll = (uint8_t)tmp;
break;
case 'g':
- cfg_busy_poll_budget = strtoul(optarg, NULL, 0);
- if (cfg_busy_poll_budget == ULONG_MAX ||
- cfg_busy_poll_budget > UINT16_MAX)
+ if (tmp == ULLONG_MAX || tmp > UINT16_MAX)
error(1, ERANGE,
"busy poll budget must be [0, UINT16_MAX]");
+
+ cfg_busy_poll_budget = (uint16_t)tmp;
break;
case 'p':
- cfg_port = strtoul(optarg, NULL, 0);
- if (cfg_port > UINT16_MAX)
+ if (tmp == ULLONG_MAX || tmp > UINT16_MAX)
error(1, ERANGE, "port must be <= 65535");
+
+ cfg_port = (uint16_t)tmp;
break;
case 'b':
ret = inet_aton(optarg, &cfg_bind_addr);
@@ -124,41 +135,45 @@ static void parse_opts(int argc, char **argv)
error(1, 0, "outfile invalid");
break;
case 'm':
- cfg_max_events = strtol(optarg, NULL, 0);
-
- if (cfg_max_events == LONG_MIN ||
- cfg_max_events == LONG_MAX ||
- cfg_max_events <= 0)
+ if (tmp == ULLONG_MAX || tmp > INT_MAX)
error(1, ERANGE,
- "max events must be > 0 and < LONG_MAX");
+ "max events must be > 0 and <= INT_MAX");
+
+ cfg_max_events = (int)tmp;
break;
case 'd':
- cfg_defer_hard_irqs = strtoul(optarg, NULL, 0);
-
- if (cfg_defer_hard_irqs == ULONG_MAX ||
- cfg_defer_hard_irqs > INT32_MAX)
+ if (tmp == ULLONG_MAX || tmp > INT32_MAX)
error(1, ERANGE,
"defer_hard_irqs must be <= INT32_MAX");
+
+ cfg_defer_hard_irqs = (uint32_t)tmp;
break;
case 'r':
- cfg_gro_flush_timeout = strtoull(optarg, NULL, 0);
-
- if (cfg_gro_flush_timeout == ULLONG_MAX)
+ if (tmp == ULLONG_MAX || tmp > UINT64_MAX)
error(1, ERANGE,
- "gro_flush_timeout must be < ULLONG_MAX");
+ "gro_flush_timeout must be < UINT64_MAX");
+
+ cfg_gro_flush_timeout = (uint64_t)tmp;
break;
case 's':
- cfg_irq_suspend_timeout = strtoull(optarg, NULL, 0);
-
- if (cfg_irq_suspend_timeout == ULLONG_MAX)
+ if (tmp == ULLONG_MAX || tmp > UINT64_MAX)
error(1, ERANGE,
"irq_suspend_timeout must be < ULLONG_MAX");
+
+ cfg_irq_suspend_timeout = (uint64_t)tmp;
break;
case 'i':
- cfg_ifindex = strtoul(optarg, NULL, 0);
- if (cfg_ifindex == ULONG_MAX)
+ if (tmp == ULLONG_MAX || tmp > INT_MAX)
error(1, ERANGE,
- "ifindex must be < ULONG_MAX");
+ "ifindex must be <= INT_MAX");
+
+ cfg_ifindex = (int)tmp;
+ break;
+ case 't':
+ if (tmp > 2)
+ error(1, ERANGE, "napi threaded poll value must be 0-2");
+
+ cfg_napi_threaded_poll = (enum netdev_napi_threaded)tmp;
break;
}
}
@@ -215,7 +230,7 @@ static void setup_queue(void)
struct netdev_napi_set_req *set_req = NULL;
struct ynl_sock *ys;
struct ynl_error yerr;
- uint32_t napi_id;
+ uint32_t napi_id = 0;
ys = ynl_sock_create(&ynl_netdev_family, &yerr);
if (!ys)
@@ -239,6 +254,9 @@ static void setup_queue(void)
netdev_napi_set_req_set_irq_suspend_timeout(set_req,
cfg_irq_suspend_timeout);
+ if (cfg_napi_threaded_poll)
+ netdev_napi_set_req_set_threaded(set_req, cfg_napi_threaded_poll);
+
if (netdev_napi_set(ys, set_req))
error(1, 0, "can't set NAPI params: %s\n", yerr.msg);
@@ -277,8 +295,8 @@ static void run_poller(void)
* here
*/
epoll_params.busy_poll_usecs = cfg_busy_poll_usecs;
- epoll_params.busy_poll_budget = (uint16_t)cfg_busy_poll_budget;
- epoll_params.prefer_busy_poll = (uint8_t)cfg_prefer_busy_poll;
+ epoll_params.busy_poll_budget = cfg_busy_poll_budget;
+ epoll_params.prefer_busy_poll = cfg_prefer_busy_poll;
epoll_params.__pad = 0;
val = 1;
@@ -342,5 +360,9 @@ int main(int argc, char *argv[])
parse_opts(argc, argv);
setup_queue();
run_poller();
+
+ if (cfg_outfile)
+ free(cfg_outfile);
+
return 0;
}
diff --git a/tools/testing/selftests/net/can/.gitignore b/tools/testing/selftests/net/can/.gitignore
new file mode 100644
index 000000000000..764a53fc837f
--- /dev/null
+++ b/tools/testing/selftests/net/can/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+test_raw_filter
diff --git a/tools/testing/selftests/net/can/Makefile b/tools/testing/selftests/net/can/Makefile
new file mode 100644
index 000000000000..5b82e60a03e7
--- /dev/null
+++ b/tools/testing/selftests/net/can/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+
+TEST_PROGS := test_raw_filter.sh
+
+TEST_GEN_FILES := test_raw_filter
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/can/config b/tools/testing/selftests/net/can/config
new file mode 100644
index 000000000000..188f79796670
--- /dev/null
+++ b/tools/testing/selftests/net/can/config
@@ -0,0 +1,3 @@
+CONFIG_CAN=m
+CONFIG_CAN_DEV=m
+CONFIG_CAN_VCAN=m
diff --git a/tools/testing/selftests/net/can/test_raw_filter.c b/tools/testing/selftests/net/can/test_raw_filter.c
new file mode 100644
index 000000000000..bb8ae8854273
--- /dev/null
+++ b/tools/testing/selftests/net/can/test_raw_filter.c
@@ -0,0 +1,405 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+/*
+ * Copyright (c) 2011 Volkswagen Group Electronic Research
+ * All rights reserved.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <net/if.h>
+#include <linux/if.h>
+
+#include <linux/can.h>
+#include <linux/can/raw.h>
+
+#include "kselftest_harness.h"
+
+#define ID 0x123
+
+char CANIF[IFNAMSIZ];
+
+static int send_can_frames(int sock, int testcase)
+{
+ struct can_frame frame;
+
+ frame.can_dlc = 1;
+ frame.data[0] = testcase;
+
+ frame.can_id = ID;
+ if (write(sock, &frame, sizeof(frame)) < 0)
+ goto write_err;
+
+ frame.can_id = (ID | CAN_RTR_FLAG);
+ if (write(sock, &frame, sizeof(frame)) < 0)
+ goto write_err;
+
+ frame.can_id = (ID | CAN_EFF_FLAG);
+ if (write(sock, &frame, sizeof(frame)) < 0)
+ goto write_err;
+
+ frame.can_id = (ID | CAN_EFF_FLAG | CAN_RTR_FLAG);
+ if (write(sock, &frame, sizeof(frame)) < 0)
+ goto write_err;
+
+ return 0;
+
+write_err:
+ perror("write");
+ return 1;
+}
+
+FIXTURE(can_filters) {
+ int sock;
+};
+
+FIXTURE_SETUP(can_filters)
+{
+ struct sockaddr_can addr;
+ struct ifreq ifr;
+ int recv_own_msgs = 1;
+ int s, ret;
+
+ s = socket(PF_CAN, SOCK_RAW, CAN_RAW);
+ ASSERT_GE(s, 0)
+ TH_LOG("failed to create CAN_RAW socket: %d", errno);
+
+ strncpy(ifr.ifr_name, CANIF, sizeof(ifr.ifr_name));
+ ret = ioctl(s, SIOCGIFINDEX, &ifr);
+ ASSERT_GE(ret, 0)
+ TH_LOG("failed SIOCGIFINDEX: %d", errno);
+
+ addr.can_family = AF_CAN;
+ addr.can_ifindex = ifr.ifr_ifindex;
+
+ setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS,
+ &recv_own_msgs, sizeof(recv_own_msgs));
+
+ ret = bind(s, (struct sockaddr *)&addr, sizeof(addr));
+ ASSERT_EQ(ret, 0)
+ TH_LOG("failed bind socket: %d", errno);
+
+ self->sock = s;
+}
+
+FIXTURE_TEARDOWN(can_filters)
+{
+ close(self->sock);
+}
+
+FIXTURE_VARIANT(can_filters) {
+ int testcase;
+ canid_t id;
+ canid_t mask;
+ int exp_num_rx;
+ canid_t exp_flags[];
+};
+
+/* Receive all frames when filtering for the ID in standard frame format */
+FIXTURE_VARIANT_ADD(can_filters, base) {
+ .testcase = 1,
+ .id = ID,
+ .mask = CAN_SFF_MASK,
+ .exp_num_rx = 4,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Ignore EFF flag in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_eff) {
+ .testcase = 2,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_SFF_MASK,
+ .exp_num_rx = 4,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Ignore RTR flag in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_rtr) {
+ .testcase = 3,
+ .id = ID | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK,
+ .exp_num_rx = 4,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Ignore EFF and RTR flags in filter ID if not covered by filter mask */
+FIXTURE_VARIANT_ADD(can_filters, base_effrtr) {
+ .testcase = 4,
+ .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK,
+ .exp_num_rx = 4,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only SFF frames when expecting no EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff) {
+ .testcase = 5,
+ .id = ID,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only EFF frames when filter id and filter mask include EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_eff) {
+ .testcase = 6,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only SFF frames when expecting no EFF flag, ignoring RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_rtr) {
+ .testcase = 7,
+ .id = ID | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ 0,
+ CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only EFF frames when filter id and filter mask include EFF flag,
+ * ignoring RTR flag
+ */
+FIXTURE_VARIANT_ADD(can_filters, filter_eff_effrtr) {
+ .testcase = 8,
+ .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ CAN_EFF_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive no remote frames when filtering for no RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr) {
+ .testcase = 9,
+ .id = ID,
+ .mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ 0,
+ CAN_EFF_FLAG,
+ },
+};
+
+/* Receive no remote frames when filtering for no RTR flag, ignoring EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_eff) {
+ .testcase = 10,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ 0,
+ CAN_EFF_FLAG,
+ },
+};
+
+/* Receive only remote frames when filter includes RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_rtr) {
+ .testcase = 11,
+ .id = ID | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only remote frames when filter includes RTR flag, ignoring EFF
+ * flag
+ */
+FIXTURE_VARIANT_ADD(can_filters, filter_rtr_effrtr) {
+ .testcase = 12,
+ .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_RTR_FLAG,
+ .exp_num_rx = 2,
+ .exp_flags = {
+ CAN_RTR_FLAG,
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only SFF data frame when filtering for no flags */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr) {
+ .testcase = 13,
+ .id = ID,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ 0,
+ },
+};
+
+/* Receive only EFF data frame when filtering for EFF but no RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_eff) {
+ .testcase = 14,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ CAN_EFF_FLAG,
+ },
+};
+
+/* Receive only SFF remote frame when filtering for RTR but no EFF flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_rtr) {
+ .testcase = 15,
+ .id = ID | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only EFF remote frame when filtering for EFF and RTR flag */
+FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_effrtr) {
+ .testcase = 16,
+ .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ CAN_EFF_FLAG | CAN_RTR_FLAG,
+ },
+};
+
+/* Receive only SFF data frame when filtering for no EFF flag and no RTR flag
+ * but based on EFF mask
+ */
+FIXTURE_VARIANT_ADD(can_filters, eff) {
+ .testcase = 17,
+ .id = ID,
+ .mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ 0,
+ },
+};
+
+/* Receive only EFF data frame when filtering for EFF flag and no RTR flag but
+ * based on EFF mask
+ */
+FIXTURE_VARIANT_ADD(can_filters, eff_eff) {
+ .testcase = 18,
+ .id = ID | CAN_EFF_FLAG,
+ .mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG,
+ .exp_num_rx = 1,
+ .exp_flags = {
+ CAN_EFF_FLAG,
+ },
+};
+
+/* This test verifies that the raw CAN filters work, by checking if only frames
+ * with the expected set of flags are received. For each test case, the given
+ * filter (id and mask) is added and four CAN frames are sent with every
+ * combination of set/unset EFF/RTR flags.
+ */
+TEST_F(can_filters, test_filter)
+{
+ struct can_filter rfilter;
+ int ret;
+
+ rfilter.can_id = variant->id;
+ rfilter.can_mask = variant->mask;
+ setsockopt(self->sock, SOL_CAN_RAW, CAN_RAW_FILTER,
+ &rfilter, sizeof(rfilter));
+
+ TH_LOG("filters: can_id = 0x%08X can_mask = 0x%08X",
+ rfilter.can_id, rfilter.can_mask);
+
+ ret = send_can_frames(self->sock, variant->testcase);
+ ASSERT_EQ(ret, 0)
+ TH_LOG("failed to send CAN frames");
+
+ for (int i = 0; i <= variant->exp_num_rx; i++) {
+ struct can_frame frame;
+ struct timeval tv = {
+ .tv_sec = 0,
+ .tv_usec = 50000, /* 50ms timeout */
+ };
+ fd_set rdfs;
+
+ FD_ZERO(&rdfs);
+ FD_SET(self->sock, &rdfs);
+
+ ret = select(self->sock + 1, &rdfs, NULL, NULL, &tv);
+ ASSERT_GE(ret, 0)
+ TH_LOG("failed select for frame %d, err: %d)", i, errno);
+
+ ret = FD_ISSET(self->sock, &rdfs);
+ if (i == variant->exp_num_rx) {
+ ASSERT_EQ(ret, 0)
+ TH_LOG("too many frames received");
+ } else {
+ ASSERT_NE(ret, 0)
+ TH_LOG("too few frames received");
+
+ ret = read(self->sock, &frame, sizeof(frame));
+ ASSERT_GE(ret, 0)
+ TH_LOG("failed to read frame %d, err: %d", i, errno);
+
+ TH_LOG("rx: can_id = 0x%08X rx = %d", frame.can_id, i);
+
+ ASSERT_EQ(ID, frame.can_id & CAN_SFF_MASK)
+ TH_LOG("received wrong can_id");
+ ASSERT_EQ(variant->testcase, frame.data[0])
+ TH_LOG("received wrong test case");
+
+ ASSERT_EQ(frame.can_id & ~CAN_ERR_MASK,
+ variant->exp_flags[i])
+ TH_LOG("received unexpected flags");
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ char *ifname = getenv("CANIF");
+
+ if (!ifname) {
+ printf("CANIF environment variable must contain the test interface\n");
+ return KSFT_FAIL;
+ }
+
+ strncpy(CANIF, ifname, sizeof(CANIF) - 1);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/net/can/test_raw_filter.sh b/tools/testing/selftests/net/can/test_raw_filter.sh
new file mode 100755
index 000000000000..276d6c06ac95
--- /dev/null
+++ b/tools/testing/selftests/net/can/test_raw_filter.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ test_raw_filter
+"
+
+net_dir=$(dirname $0)/..
+source $net_dir/lib.sh
+
+export CANIF=${CANIF:-"vcan0"}
+BITRATE=${BITRATE:-500000}
+
+setup()
+{
+ if [[ $CANIF == vcan* ]]; then
+ ip link add name $CANIF type vcan || exit $ksft_skip
+ else
+ ip link set dev $CANIF type can bitrate $BITRATE || exit $ksft_skip
+ fi
+ ip link set dev $CANIF up
+ pwd
+}
+
+cleanup()
+{
+ ip link set dev $CANIF down
+ if [[ $CANIF == vcan* ]]; then
+ ip link delete $CANIF
+ fi
+}
+
+test_raw_filter()
+{
+ ./test_raw_filter
+ check_err $?
+ log_test "test_raw_filter"
+}
+
+trap cleanup EXIT
+setup
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/cmsg_ip.sh b/tools/testing/selftests/net/cmsg_ip.sh
new file mode 100755
index 000000000000..b55680e081ad
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_ip.sh
@@ -0,0 +1,187 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+IP4=172.16.0.1/24
+TGT4=172.16.0.2
+IP6=2001:db8:1::1/64
+TGT6=2001:db8:1::2
+TMPF=$(mktemp --suffix ".pcap")
+
+cleanup()
+{
+ rm -f $TMPF
+ cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+tcpdump -h | grep immediate-mode >> /dev/null
+if [ $? -ne 0 ]; then
+ echo "SKIP - tcpdump with --immediate-mode option required"
+ exit $ksft_skip
+fi
+
+# Namespaces
+setup_ns NS
+NSEXE="ip netns exec $NS"
+
+$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
+
+# Connectivity
+ip -netns $NS link add type dummy
+ip -netns $NS link set dev dummy0 up
+ip -netns $NS addr add $IP4 dev dummy0
+ip -netns $NS addr add $IP6 dev dummy0
+
+# Test
+BAD=0
+TOTAL=0
+
+check_result() {
+ ((TOTAL++))
+ if [ $1 -ne $2 ]; then
+ echo " Case $3 returned $1, expected $2"
+ ((BAD++))
+ fi
+}
+
+# IPV6_DONTFRAG
+for ovr in setsock cmsg both diff; do
+ for df in 0 1; do
+ for p in u U i r; do
+ [ $p == "u" ] && prot=UDP
+ [ $p == "U" ] && prot=UDP
+ [ $p == "i" ] && prot=ICMP
+ [ $p == "r" ] && prot=RAW
+
+ [ $ovr == "setsock" ] && m="-F $df"
+ [ $ovr == "cmsg" ] && m="-f $df"
+ [ $ovr == "both" ] && m="-F $df -f $df"
+ [ $ovr == "diff" ] && m="-F $((1 - df)) -f $df"
+
+ $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234
+ check_result $? $df "DONTFRAG $prot $ovr"
+ done
+ done
+done
+
+# IP_TOS + IPV6_TCLASS
+
+test_dscp() {
+ local -r IPVER=$1
+ local -r TGT=$2
+ local -r MATCH=$3
+
+ local -r TOS=0x10
+ local -r TOS2=0x20
+ local -r ECN=0x3
+
+ ip $IPVER -netns $NS rule add tos $TOS lookup 300
+ ip $IPVER -netns $NS route add table 300 prohibit any
+
+ for ovr in setsock cmsg both diff; do
+ for p in u U i r; do
+ [ $p == "u" ] && prot=UDP
+ [ $p == "U" ] && prot=UDP
+ [ $p == "i" ] && prot=ICMP
+ [ $p == "r" ] && prot=RAW
+
+ [ $ovr == "setsock" ] && m="-C"
+ [ $ovr == "cmsg" ] && m="-c"
+ [ $ovr == "both" ] && m="-C $((TOS2)) -c"
+ [ $ovr == "diff" ] && m="-C $((TOS )) -c"
+
+ $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
+ BG=$!
+ sleep 0.05
+
+ $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS2)) $TGT 1234
+ check_result $? 0 "$MATCH $prot $ovr - pass"
+
+ while [ -d /proc/$BG ]; do
+ $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS2)) $TGT 1234
+ done
+
+ tcpdump -r $TMPF -v 2>&1 | grep "$MATCH $TOS2" >> /dev/null
+ check_result $? 0 "$MATCH $prot $ovr - packet data"
+ rm $TMPF
+
+ [ $ovr == "both" ] && m="-C $((TOS )) -c"
+ [ $ovr == "diff" ] && m="-C $((TOS2)) -c"
+
+ # Match prohibit rule: expect failure
+ $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS)) -s $TGT 1234
+ check_result $? 1 "$MATCH $prot $ovr - rejection"
+
+ # Match prohibit rule: IPv4 masks ECN: expect failure
+ if [[ "$IPVER" == "-4" ]]; then
+ $NSEXE ./cmsg_sender $IPVER -p $p $m "$((TOS | ECN))" -s $TGT 1234
+ check_result $? 1 "$MATCH $prot $ovr - rejection (ECN)"
+ fi
+ done
+ done
+}
+
+test_dscp -4 $TGT4 tos
+test_dscp -6 $TGT6 class
+
+# IP_TTL + IPV6_HOPLIMIT
+test_ttl_hoplimit() {
+ local -r IPVER=$1
+ local -r TGT=$2
+ local -r MATCH=$3
+
+ local -r LIM=4
+
+ for ovr in setsock cmsg both diff; do
+ for p in u U i r; do
+ [ $p == "u" ] && prot=UDP
+ [ $p == "U" ] && prot=UDP
+ [ $p == "i" ] && prot=ICMP
+ [ $p == "r" ] && prot=RAW
+
+ [ $ovr == "setsock" ] && m="-L"
+ [ $ovr == "cmsg" ] && m="-l"
+ [ $ovr == "both" ] && m="-L $LIM -l"
+ [ $ovr == "diff" ] && m="-L $((LIM + 1)) -l"
+
+ $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
+ BG=$!
+ sleep 0.05
+
+ $NSEXE ./cmsg_sender $IPVER -p $p $m $LIM $TGT 1234
+ check_result $? 0 "$MATCH $prot $ovr - pass"
+
+ while [ -d /proc/$BG ]; do
+ $NSEXE ./cmsg_sender $IPVER -p $p $m $LIM $TGT 1234
+ done
+
+ tcpdump -r $TMPF -v 2>&1 | grep "$MATCH $LIM[^0-9]" >> /dev/null
+ check_result $? 0 "$MATCH $prot $ovr - packet data"
+ rm $TMPF
+ done
+ done
+}
+
+test_ttl_hoplimit -4 $TGT4 ttl
+test_ttl_hoplimit -6 $TGT6 hlim
+
+# IPV6 exthdr
+for p in u U i r; do
+ # Very basic "does it crash" test
+ for h in h d r; do
+ $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234
+ check_result $? 0 "ExtHdr $prot $ovr - pass"
+ done
+done
+
+# Summary
+if [ $BAD -ne 0 ]; then
+ echo "FAIL - $BAD/$TOTAL cases failed"
+ exit 1
+else
+ echo "OK"
+ exit 0
+fi
diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh
deleted file mode 100755
index 8bc23fb4c82b..000000000000
--- a/tools/testing/selftests/net/cmsg_ipv6.sh
+++ /dev/null
@@ -1,154 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-source lib.sh
-
-IP6=2001:db8:1::1/64
-TGT6=2001:db8:1::2
-TMPF=$(mktemp --suffix ".pcap")
-
-cleanup()
-{
- rm -f $TMPF
- cleanup_ns $NS
-}
-
-trap cleanup EXIT
-
-tcpdump -h | grep immediate-mode >> /dev/null
-if [ $? -ne 0 ]; then
- echo "SKIP - tcpdump with --immediate-mode option required"
- exit $ksft_skip
-fi
-
-# Namespaces
-setup_ns NS
-NSEXE="ip netns exec $NS"
-
-$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
-
-# Connectivity
-ip -netns $NS link add type dummy
-ip -netns $NS link set dev dummy0 up
-ip -netns $NS addr add $IP6 dev dummy0
-
-# Test
-BAD=0
-TOTAL=0
-
-check_result() {
- ((TOTAL++))
- if [ $1 -ne $2 ]; then
- echo " Case $3 returned $1, expected $2"
- ((BAD++))
- fi
-}
-
-# IPV6_DONTFRAG
-for ovr in setsock cmsg both diff; do
- for df in 0 1; do
- for p in u i r; do
- [ $p == "u" ] && prot=UDP
- [ $p == "i" ] && prot=ICMP
- [ $p == "r" ] && prot=RAW
-
- [ $ovr == "setsock" ] && m="-F $df"
- [ $ovr == "cmsg" ] && m="-f $df"
- [ $ovr == "both" ] && m="-F $df -f $df"
- [ $ovr == "diff" ] && m="-F $((1 - df)) -f $df"
-
- $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234
- check_result $? $df "DONTFRAG $prot $ovr"
- done
- done
-done
-
-# IPV6_TCLASS
-TOS=0x10
-TOS2=0x20
-
-ip -6 -netns $NS rule add tos $TOS lookup 300
-ip -6 -netns $NS route add table 300 prohibit any
-
-for ovr in setsock cmsg both diff; do
- for p in u i r; do
- [ $p == "u" ] && prot=UDP
- [ $p == "i" ] && prot=ICMP
- [ $p == "r" ] && prot=RAW
-
- [ $ovr == "setsock" ] && m="-C"
- [ $ovr == "cmsg" ] && m="-c"
- [ $ovr == "both" ] && m="-C $((TOS2)) -c"
- [ $ovr == "diff" ] && m="-C $((TOS )) -c"
-
- $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
- BG=$!
- sleep 0.05
-
- $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234
- check_result $? 0 "TCLASS $prot $ovr - pass"
-
- while [ -d /proc/$BG ]; do
- $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234
- done
-
- tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null
- check_result $? 0 "TCLASS $prot $ovr - packet data"
- rm $TMPF
-
- [ $ovr == "both" ] && m="-C $((TOS )) -c"
- [ $ovr == "diff" ] && m="-C $((TOS2)) -c"
-
- $NSEXE ./cmsg_sender -6 -p $p $m $((TOS)) -s $TGT6 1234
- check_result $? 1 "TCLASS $prot $ovr - rejection"
- done
-done
-
-# IPV6_HOPLIMIT
-LIM=4
-
-for ovr in setsock cmsg both diff; do
- for p in u i r; do
- [ $p == "u" ] && prot=UDP
- [ $p == "i" ] && prot=ICMP
- [ $p == "r" ] && prot=RAW
-
- [ $ovr == "setsock" ] && m="-L"
- [ $ovr == "cmsg" ] && m="-l"
- [ $ovr == "both" ] && m="-L $LIM -l"
- [ $ovr == "diff" ] && m="-L $((LIM + 1)) -l"
-
- $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
- BG=$!
- sleep 0.05
-
- $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234
- check_result $? 0 "HOPLIMIT $prot $ovr - pass"
-
- while [ -d /proc/$BG ]; do
- $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234
- done
-
- tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null
- check_result $? 0 "HOPLIMIT $prot $ovr - packet data"
- rm $TMPF
- done
-done
-
-# IPV6 exthdr
-for p in u i r; do
- # Very basic "does it crash" test
- for h in h d r; do
- $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234
- check_result $? 0 "ExtHdr $prot $ovr - pass"
- done
-done
-
-# Summary
-if [ $BAD -ne 0 ]; then
- echo "FAIL - $BAD/$TOTAL cases failed"
- exit 1
-else
- echo "OK"
- exit 0
-fi
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 876c2db02a63..67a72b1a2f3d 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -16,7 +16,7 @@
#include <linux/udp.h>
#include <sys/socket.h>
-#include "../kselftest.h"
+#include "kselftest.h"
enum {
ERN_SUCCESS = 0,
@@ -33,6 +33,7 @@ enum {
ERN_RECVERR,
ERN_CMSG_RD,
ERN_CMSG_RCV,
+ ERN_SEND_MORE,
};
struct option_cmsg_u32 {
@@ -46,6 +47,7 @@ struct options {
const char *service;
unsigned int size;
unsigned int num_pkt;
+ bool msg_more;
struct {
unsigned int mark;
unsigned int dontfrag;
@@ -59,6 +61,7 @@ struct options {
unsigned int proto;
} sock;
struct option_cmsg_u32 mark;
+ struct option_cmsg_u32 priority;
struct {
bool ena;
unsigned int delay;
@@ -71,7 +74,7 @@ struct options {
struct option_cmsg_u32 tclass;
struct option_cmsg_u32 hlimit;
struct option_cmsg_u32 exthdr;
- } v6;
+ } cmsg;
} opt = {
.size = 13,
.num_pkt = 1,
@@ -93,21 +96,24 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
"\t\t-S send() size\n"
"\t\t-4/-6 Force IPv4 / IPv6 only\n"
"\t\t-p prot Socket protocol\n"
- "\t\t (u = UDP (default); i = ICMP; r = RAW)\n"
+ "\t\t (u = UDP (default); i = ICMP; r = RAW;\n"
+ "\t\t U = UDP with MSG_MORE)\n"
"\n"
"\t\t-m val Set SO_MARK with given value\n"
"\t\t-M val Set SO_MARK via setsockopt\n"
+ "\t\t-P val Set SO_PRIORITY via setsockopt\n"
+ "\t\t-Q val Set SO_PRIORITY via cmsg\n"
"\t\t-d val Set SO_TXTIME with given delay (usec)\n"
"\t\t-t Enable time stamp reporting\n"
"\t\t-f val Set don't fragment via cmsg\n"
"\t\t-F val Set don't fragment via setsockopt\n"
- "\t\t-c val Set TCLASS via cmsg\n"
- "\t\t-C val Set TCLASS via setsockopt\n"
- "\t\t-l val Set HOPLIMIT via cmsg\n"
- "\t\t-L val Set HOPLIMIT via setsockopt\n"
+ "\t\t-c val Set TOS/TCLASS via cmsg\n"
+ "\t\t-C val Set TOS/TCLASS via setsockopt\n"
+ "\t\t-l val Set TTL/HOPLIMIT via cmsg\n"
+ "\t\t-L val Set TTL/HOPLIMIT via setsockopt\n"
"\t\t-H type Add an IPv6 header option\n"
- "\t\t (h = HOP; d = DST; r = RTDST)"
- "");
+ "\t\t (h = HOP; d = DST; r = RTDST)\n"
+ "\n");
exit(ERN_HELP);
}
@@ -115,7 +121,7 @@ static void cs_parse_args(int argc, char *argv[])
{
int o;
- while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) {
+ while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:Q:")) != -1) {
switch (o) {
case 's':
opt.silent_send = true;
@@ -130,8 +136,11 @@ static void cs_parse_args(int argc, char *argv[])
opt.sock.family = AF_INET6;
break;
case 'p':
- if (*optarg == 'u' || *optarg == 'U') {
+ if (*optarg == 'u') {
opt.sock.proto = IPPROTO_UDP;
+ } else if (*optarg == 'U') {
+ opt.sock.proto = IPPROTO_UDP;
+ opt.msg_more = true;
} else if (*optarg == 'i' || *optarg == 'I') {
opt.sock.proto = IPPROTO_ICMP;
} else if (*optarg == 'r') {
@@ -148,6 +157,10 @@ static void cs_parse_args(int argc, char *argv[])
opt.mark.ena = true;
opt.mark.val = atoi(optarg);
break;
+ case 'Q':
+ opt.priority.ena = true;
+ opt.priority.val = atoi(optarg);
+ break;
case 'M':
opt.sockopt.mark = atoi(optarg);
break;
@@ -162,37 +175,37 @@ static void cs_parse_args(int argc, char *argv[])
opt.ts.ena = true;
break;
case 'f':
- opt.v6.dontfrag.ena = true;
- opt.v6.dontfrag.val = atoi(optarg);
+ opt.cmsg.dontfrag.ena = true;
+ opt.cmsg.dontfrag.val = atoi(optarg);
break;
case 'F':
opt.sockopt.dontfrag = atoi(optarg);
break;
case 'c':
- opt.v6.tclass.ena = true;
- opt.v6.tclass.val = atoi(optarg);
+ opt.cmsg.tclass.ena = true;
+ opt.cmsg.tclass.val = atoi(optarg);
break;
case 'C':
opt.sockopt.tclass = atoi(optarg);
break;
case 'l':
- opt.v6.hlimit.ena = true;
- opt.v6.hlimit.val = atoi(optarg);
+ opt.cmsg.hlimit.ena = true;
+ opt.cmsg.hlimit.val = atoi(optarg);
break;
case 'L':
opt.sockopt.hlimit = atoi(optarg);
break;
case 'H':
- opt.v6.exthdr.ena = true;
+ opt.cmsg.exthdr.ena = true;
switch (optarg[0]) {
case 'h':
- opt.v6.exthdr.val = IPV6_HOPOPTS;
+ opt.cmsg.exthdr.val = IPV6_HOPOPTS;
break;
case 'd':
- opt.v6.exthdr.val = IPV6_DSTOPTS;
+ opt.cmsg.exthdr.val = IPV6_DSTOPTS;
break;
case 'r':
- opt.v6.exthdr.val = IPV6_RTHDRDSTOPTS;
+ opt.cmsg.exthdr.val = IPV6_RTHDRDSTOPTS;
break;
default:
printf("Error: hdr type: %s\n", optarg);
@@ -253,11 +266,21 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
SOL_SOCKET, SO_MARK, &opt.mark);
ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
- SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag);
- ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
- SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass);
- ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
- SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit);
+ SOL_SOCKET, SO_PRIORITY, &opt.priority);
+
+ if (opt.sock.family == AF_INET) {
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IP, IP_TOS, &opt.cmsg.tclass);
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IP, IP_TTL, &opt.cmsg.hlimit);
+ } else {
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IPV6, IPV6_DONTFRAG, &opt.cmsg.dontfrag);
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IPV6, IPV6_TCLASS, &opt.cmsg.tclass);
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IPV6, IPV6_HOPLIMIT, &opt.cmsg.hlimit);
+ }
if (opt.txtime.ena) {
__u64 txtime;
@@ -288,14 +311,14 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
*(__u32 *)CMSG_DATA(cmsg) = SOF_TIMESTAMPING_TX_SCHED |
SOF_TIMESTAMPING_TX_SOFTWARE;
}
- if (opt.v6.exthdr.ena) {
+ if (opt.cmsg.exthdr.ena) {
cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
cmsg_len += CMSG_SPACE(8);
if (cbuf_sz < cmsg_len)
error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
cmsg->cmsg_level = SOL_IPV6;
- cmsg->cmsg_type = opt.v6.exthdr.val;
+ cmsg->cmsg_type = opt.cmsg.exthdr.val;
cmsg->cmsg_len = CMSG_LEN(8);
*(__u64 *)CMSG_DATA(cmsg) = 0;
}
@@ -396,23 +419,35 @@ static void ca_set_sockopts(int fd)
setsockopt(fd, SOL_SOCKET, SO_MARK,
&opt.sockopt.mark, sizeof(opt.sockopt.mark)))
error(ERN_SOCKOPT, errno, "setsockopt SO_MARK");
- if (opt.sockopt.dontfrag &&
- setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG,
- &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag)))
- error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG");
- if (opt.sockopt.tclass &&
- setsockopt(fd, SOL_IPV6, IPV6_TCLASS,
- &opt.sockopt.tclass, sizeof(opt.sockopt.tclass)))
- error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS");
- if (opt.sockopt.hlimit &&
- setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS,
- &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
- error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT");
if (opt.sockopt.priority &&
setsockopt(fd, SOL_SOCKET, SO_PRIORITY,
&opt.sockopt.priority, sizeof(opt.sockopt.priority)))
error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY");
+ if (opt.sock.family == AF_INET) {
+ if (opt.sockopt.tclass &&
+ setsockopt(fd, SOL_IP, IP_TOS,
+ &opt.sockopt.tclass, sizeof(opt.sockopt.tclass)))
+ error(ERN_SOCKOPT, errno, "setsockopt IP_TOS");
+ if (opt.sockopt.hlimit &&
+ setsockopt(fd, SOL_IP, IP_TTL,
+ &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
+ error(ERN_SOCKOPT, errno, "setsockopt IP_TTL");
+ } else {
+ if (opt.sockopt.dontfrag &&
+ setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG,
+ &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag)))
+ error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG");
+ if (opt.sockopt.tclass &&
+ setsockopt(fd, SOL_IPV6, IPV6_TCLASS,
+ &opt.sockopt.tclass, sizeof(opt.sockopt.tclass)))
+ error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS");
+ if (opt.sockopt.hlimit &&
+ setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS,
+ &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
+ error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT");
+ }
+
if (opt.txtime.ena) {
struct sock_txtime so_txtime = {
.clockid = CLOCK_MONOTONIC,
@@ -456,7 +491,8 @@ int main(int argc, char *argv[])
if (err) {
fprintf(stderr, "Can't resolve address [%s]:%s\n",
opt.host, opt.service);
- return ERN_SOCK_CREATE;
+ err = ERN_SOCK_CREATE;
+ goto err_free_buff;
}
if (ai->ai_family == AF_INET6 && opt.sock.proto == IPPROTO_ICMP)
@@ -465,8 +501,8 @@ int main(int argc, char *argv[])
fd = socket(ai->ai_family, opt.sock.type, opt.sock.proto);
if (fd < 0) {
fprintf(stderr, "Can't open socket: %s\n", strerror(errno));
- freeaddrinfo(ai);
- return ERN_RESOLVE;
+ err = ERN_RESOLVE;
+ goto err_free_info;
}
if (opt.sock.proto == IPPROTO_ICMP) {
@@ -502,7 +538,7 @@ int main(int argc, char *argv[])
cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf));
for (i = 0; i < opt.num_pkt; i++) {
- err = sendmsg(fd, &msg, 0);
+ err = sendmsg(fd, &msg, opt.msg_more ? MSG_MORE : 0);
if (err < 0) {
if (!opt.silent_send)
fprintf(stderr, "send failed: %s\n", strerror(errno));
@@ -513,6 +549,14 @@ int main(int argc, char *argv[])
err = ERN_SEND_SHORT;
goto err_out;
}
+ if (opt.msg_more) {
+ err = write(fd, NULL, 0);
+ if (err < 0) {
+ fprintf(stderr, "send more: %s\n", strerror(errno));
+ err = ERN_SEND_MORE;
+ goto err_out;
+ }
+ }
}
err = ERN_SUCCESS;
@@ -531,6 +575,9 @@ int main(int argc, char *argv[])
err_out:
close(fd);
+err_free_info:
freeaddrinfo(ai);
+err_free_buff:
+ free(buf);
return err;
}
diff --git a/tools/testing/selftests/net/cmsg_so_priority.sh b/tools/testing/selftests/net/cmsg_so_priority.sh
new file mode 100755
index 000000000000..ee07d8653262
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_so_priority.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+readonly KSFT_SKIP=4
+
+IP4=192.0.2.1/24
+TGT4=192.0.2.2
+TGT4_RAW=192.0.2.3
+IP6=2001:db8::1/64
+TGT6=2001:db8::2
+TGT6_RAW=2001:db8::3
+PORT=1234
+TOTAL_TESTS=0
+FAILED_TESTS=0
+
+if ! command -v jq &> /dev/null; then
+ echo "SKIP cmsg_so_priroity.sh test: jq is not installed." >&2
+ exit "$KSFT_SKIP"
+fi
+
+check_result() {
+ ((TOTAL_TESTS++))
+ if [ "$1" -ne 0 ]; then
+ ((FAILED_TESTS++))
+ fi
+}
+
+cleanup()
+{
+ cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+setup_ns NS
+
+create_filter() {
+ local handle=$1
+ local vlan_prio=$2
+ local ip_type=$3
+ local proto=$4
+ local dst_ip=$5
+ local ip_proto
+
+ if [[ "$proto" == "u" ]]; then
+ ip_proto="udp"
+ elif [[ "$ip_type" == "ipv4" && "$proto" == "i" ]]; then
+ ip_proto="icmp"
+ elif [[ "$ip_type" == "ipv6" && "$proto" == "i" ]]; then
+ ip_proto="icmpv6"
+ fi
+
+ tc -n $NS filter add dev dummy1 \
+ egress pref 1 handle "$handle" proto 802.1q \
+ flower vlan_prio "$vlan_prio" vlan_ethtype "$ip_type" \
+ dst_ip "$dst_ip" ${ip_proto:+ip_proto $ip_proto} \
+ action pass
+}
+
+ip -n $NS link set dev lo up
+ip -n $NS link add name dummy1 up type dummy
+
+ip -n $NS link add link dummy1 name dummy1.10 up type vlan id 10 \
+ egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+ip -n $NS address add $IP4 dev dummy1.10
+ip -n $NS address add $IP6 dev dummy1.10 nodad
+
+ip netns exec $NS sysctl -wq net.ipv4.ping_group_range='0 2147483647'
+
+ip -n $NS neigh add $TGT4 lladdr 00:11:22:33:44:55 nud permanent \
+ dev dummy1.10
+ip -n $NS neigh add $TGT6 lladdr 00:11:22:33:44:55 nud permanent \
+ dev dummy1.10
+ip -n $NS neigh add $TGT4_RAW lladdr 00:11:22:33:44:66 nud permanent \
+ dev dummy1.10
+ip -n $NS neigh add $TGT6_RAW lladdr 00:11:22:33:44:66 nud permanent \
+ dev dummy1.10
+
+tc -n $NS qdisc add dev dummy1 clsact
+
+FILTER_COUNTER=10
+
+for i in 4 6; do
+ for proto in u i r; do
+ echo "Test IPV$i, prot: $proto"
+ for priority in {0..7}; do
+ if [[ $i == 4 && $proto == "r" ]]; then
+ TGT=$TGT4_RAW
+ elif [[ $i == 6 && $proto == "r" ]]; then
+ TGT=$TGT6_RAW
+ elif [ $i == 4 ]; then
+ TGT=$TGT4
+ else
+ TGT=$TGT6
+ fi
+
+ handle="${FILTER_COUNTER}${priority}"
+
+ create_filter $handle $priority ipv$i $proto $TGT
+
+ pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \
+ | jq ".[] | select(.options.handle == ${handle}) | \
+ .options.actions[0].stats.packets")
+
+ if [[ $pkts == 0 ]]; then
+ check_result 0
+ else
+ echo "prio $priority: expected 0, got $pkts"
+ check_result 1
+ fi
+
+ ip netns exec $NS ./cmsg_sender -$i -Q $priority \
+ -p $proto $TGT $PORT
+
+ pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \
+ | jq ".[] | select(.options.handle == ${handle}) | \
+ .options.actions[0].stats.packets")
+ if [[ $pkts == 1 ]]; then
+ check_result 0
+ else
+ echo "prio $priority -Q: expected 1, got $pkts"
+ check_result 1
+ fi
+
+ ip netns exec $NS ./cmsg_sender -$i -P $priority \
+ -p $proto $TGT $PORT
+
+ pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \
+ | jq ".[] | select(.options.handle == ${handle}) | \
+ .options.actions[0].stats.packets")
+ if [[ $pkts == 2 ]]; then
+ check_result 0
+ else
+ echo "prio $priority -P: expected 2, got $pkts"
+ check_result 1
+ fi
+ done
+ FILTER_COUNTER=$((FILTER_COUNTER + 10))
+ done
+done
+
+if [ $FAILED_TESTS -ne 0 ]; then
+ echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed"
+ exit 1
+else
+ echo "OK - All $TOTAL_TESTS tests passed"
+ exit 0
+fi
diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh
index 1d7e756644bc..478af0aefa97 100755
--- a/tools/testing/selftests/net/cmsg_time.sh
+++ b/tools/testing/selftests/net/cmsg_time.sh
@@ -34,13 +34,28 @@ BAD=0
TOTAL=0
check_result() {
+ local ret=$1
+ local got=$2
+ local exp=$3
+ local case=$4
+ local xfail=$5
+ local xf=
+ local inc=
+
+ if [ "$xfail" == "xfail" ]; then
+ xf="(XFAIL)"
+ inc=0
+ else
+ inc=1
+ fi
+
((TOTAL++))
- if [ $1 -ne 0 ]; then
- echo " Case $4 returned $1, expected 0"
- ((BAD++))
+ if [ $ret -ne 0 ]; then
+ echo " Case $case returned $ret, expected 0 $xf"
+ ((BAD+=inc))
elif [ "$2" != "$3" ]; then
- echo " Case $4 returned '$2', expected '$3'"
- ((BAD++))
+ echo " Case $case returned '$got', expected '$exp' $xf"
+ ((BAD+=inc))
fi
}
@@ -66,14 +81,14 @@ for i in "-4 $TGT4" "-6 $TGT6"; do
awk '/SND/ { if ($3 > 1000) print "OK"; }')
check_result $? "$ts" "OK" "$prot - TXTIME abs"
- [ "$KSFT_MACHINE_SLOW" = yes ] && delay=8000 || delay=1000
+ [ "$KSFT_MACHINE_SLOW" = yes ] && xfail=xfail
- ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d $delay |
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 |
awk '/SND/ {snd=$3}
/SCHED/ {sch=$3}
- END { if (snd - sch > '$((delay/2))') print "OK";
- else print snd, "-", sch, "<", '$((delay/2))'; }')
- check_result $? "$ts" "OK" "$prot - TXTIME rel"
+ END { if (snd - sch > 500) print "OK";
+ else print snd, "-", sch, "<", 500; }')
+ check_result $? "$ts" "OK" "$prot - TXTIME rel" $xfail
done
done
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 5b9baf708950..1e1f253118f5 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,109 +1,130 @@
-CONFIG_USER_NS=y
-CONFIG_NET_NS=y
+CONFIG_AMT=m
+CONFIG_BAREUDP=m
CONFIG_BONDING=m
CONFIG_BPF_SYSCALL=y
-CONFIG_TEST_BPF=m
-CONFIG_NUMA=y
-CONFIG_RPS=y
-CONFIG_SYSFS=y
-CONFIG_PROC_SYSCTL=y
-CONFIG_NET_VRF=y
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_IPV6=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_VETH=y
-CONFIG_NET_IPVTI=y
-CONFIG_IPV6_VTI=y
-CONFIG_DUMMY=y
-CONFIG_BRIDGE_VLAN_FILTERING=y
CONFIG_BRIDGE=y
+CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_CAN=m
+CONFIG_CAN_DEV=m
+CONFIG_CAN_VXCAN=m
+CONFIG_CRYPTO_ARIA=y
CONFIG_CRYPTO_CHACHA20POLY1305=m
-CONFIG_VLAN_8021Q=y
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SM4_GENERIC=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_BTF_MODULES=n
+CONFIG_DUMMY=y
CONFIG_GENEVE=m
CONFIG_IFB=y
CONFIG_INET_DIAG=y
CONFIG_INET_ESP=y
CONFIG_INET_ESP_OFFLOAD=y
-CONFIG_NET_FOU=y
-CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_NETFILTER=y
-CONFIG_NETFILTER_ADVANCED=y
-CONFIG_NF_CONNTRACK=m
-CONFIG_IPV6_MROUTE=y
-CONFIG_IPV6_SIT=y
-CONFIG_IP_DCCP=m
-CONFIG_NF_NAT=m
+CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_IPTABLES=m
-CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
+CONFIG_IP6_NF_MANGLE=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_RAW=m
+CONFIG_IP6_NF_TARGET_REJECT=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IP_SCTP=m
+CONFIG_IPV6=y
CONFIG_IPV6_GRE=m
+CONFIG_IPV6_ILA=m
+CONFIG_IPV6_IOAM6_LWTUNNEL=y
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6_RPL_LWTUNNEL=y
CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPV6_SIT=y
+CONFIG_IPV6_VTI=y
+CONFIG_IPVLAN=m
+CONFIG_KALLSYMS=y
+CONFIG_L2TP=m
CONFIG_L2TP_ETH=m
CONFIG_L2TP_IP=m
-CONFIG_L2TP=m
CONFIG_L2TP_V3=y
CONFIG_MACSEC=m
CONFIG_MACVLAN=y
CONFIG_MACVTAP=y
CONFIG_MPLS=y
+CONFIG_MPLS_IPTUNNEL=m
+CONFIG_MPLS_ROUTING=m
CONFIG_MPTCP=y
-CONFIG_NF_TABLES=m
-CONFIG_NF_TABLES_IPV6=y
-CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_NAT=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_GACT=m
+CONFIG_NET_ACT_MIRRED=m
CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_FLOWER=m
CONFIG_NET_CLS_MATCHALL=m
CONFIG_NET_CLS_U32=m
-CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NETDEVSIM=m
+CONFIG_NET_DROP_MONITOR=m
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_NETFILTER_XT_NAT=m
+CONFIG_NETFILTER_XT_TARGET_HL=m
+CONFIG_NET_FOU=y
+CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_DEMUX=m
CONFIG_NET_IPIP=y
+CONFIG_NET_IPVTI=y
+CONFIG_NETKIT=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_NS=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_SCH_ETF=m
+CONFIG_NET_SCH_FQ=m
CONFIG_NET_SCH_FQ_CODEL=m
CONFIG_NET_SCH_HTB=m
-CONFIG_NET_SCH_FQ=m
-CONFIG_NET_SCH_ETF=m
+CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_SCH_NETEM=y
CONFIG_NET_SCH_PRIO=m
-CONFIG_NFT_COMPAT=m
+CONFIG_NET_VRF=y
+CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_OVS=y
CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_NAT=m
+CONFIG_NF_TABLES=m
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_NAT=m
+CONFIG_NUMA=y
CONFIG_OPENVSWITCH=m
CONFIG_OPENVSWITCH_GENEVE=m
CONFIG_OPENVSWITCH_GRE=m
CONFIG_OPENVSWITCH_VXLAN=m
+CONFIG_PROC_SYSCTL=y
CONFIG_PSAMPLE=m
+CONFIG_RPS=y
+CONFIG_SYSFS=y
CONFIG_TCP_MD5SIG=y
CONFIG_TEST_BLACKHOLE_DEV=m
-CONFIG_KALLSYMS=y
+CONFIG_TEST_BPF=m
CONFIG_TLS=m
CONFIG_TRACEPOINTS=y
-CONFIG_NET_DROP_MONITOR=m
-CONFIG_NETDEVSIM=m
-CONFIG_MPLS_ROUTING=m
-CONFIG_MPLS_IPTUNNEL=m
-CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_CLS_FLOWER=m
-CONFIG_NET_ACT_TUNNEL_KEY=m
-CONFIG_NET_ACT_MIRRED=m
-CONFIG_BAREUDP=m
-CONFIG_IPV6_IOAM6_LWTUNNEL=y
-CONFIG_CRYPTO_SM4_GENERIC=y
-CONFIG_AMT=m
CONFIG_TUN=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VLAN_8021Q=y
CONFIG_VXLAN=m
-CONFIG_IP_SCTP=m
-CONFIG_NETFILTER_XT_MATCH_POLICY=m
-CONFIG_CRYPTO_ARIA=y
CONFIG_XFRM_INTERFACE=m
CONFIG_XFRM_USER=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
-CONFIG_IP6_NF_MATCH_RPFILTER=m
diff --git a/tools/testing/selftests/net/epoll_busy_poll.c b/tools/testing/selftests/net/epoll_busy_poll.c
index 16e457c2f877..adf8dd0b5e0b 100644
--- a/tools/testing/selftests/net/epoll_busy_poll.c
+++ b/tools/testing/selftests/net/epoll_busy_poll.c
@@ -23,7 +23,7 @@
#include <sys/ioctl.h>
#include <sys/socket.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
/* if the headers haven't been updated, we need to define some things */
#if !defined(EPOLL_IOC_TYPE)
diff --git a/tools/testing/selftests/net/fcnal-ipv4.sh b/tools/testing/selftests/net/fcnal-ipv4.sh
new file mode 100755
index 000000000000..82f9c867c3e8
--- /dev/null
+++ b/tools/testing/selftests/net/fcnal-ipv4.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+./fcnal-test.sh -t ipv4
diff --git a/tools/testing/selftests/net/fcnal-ipv6.sh b/tools/testing/selftests/net/fcnal-ipv6.sh
new file mode 100755
index 000000000000..ab1fc7aa3caf
--- /dev/null
+++ b/tools/testing/selftests/net/fcnal-ipv6.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+./fcnal-test.sh -t ipv6
diff --git a/tools/testing/selftests/net/fcnal-other.sh b/tools/testing/selftests/net/fcnal-other.sh
new file mode 100755
index 000000000000..a840cf80b32e
--- /dev/null
+++ b/tools/testing/selftests/net/fcnal-other.sh
@@ -0,0 +1,2 @@
+#!/bin/sh
+./fcnal-test.sh -t other
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 899dbad0104b..844a580ae74e 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -189,7 +189,7 @@ show_hint()
kill_procs()
{
killall nettest ping ping6 >/dev/null 2>&1
- sleep 1
+ slowwait 2 sh -c 'test -z "$(pgrep '"'^(nettest|ping|ping6)$'"')"'
}
set_ping_group()
@@ -424,6 +424,8 @@ create_ns()
ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0
+ ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.accept_dad=0
}
# create veth pair to connect namespaces and apply addresses.
@@ -875,7 +877,7 @@ ipv4_tcp_md5_novrf()
# basic use case
log_start
run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: Single address config"
@@ -883,7 +885,7 @@ ipv4_tcp_md5_novrf()
log_start
show_hint "Should timeout due to MD5 mismatch"
run_cmd nettest -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: Server no config, client uses password"
@@ -891,7 +893,7 @@ ipv4_tcp_md5_novrf()
log_start
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: Client uses wrong password"
@@ -899,7 +901,7 @@ ipv4_tcp_md5_novrf()
log_start
show_hint "Should timeout due to MD5 mismatch"
run_cmd nettest -s -M ${MD5_PW} -m ${NSB_LO_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: Client address does not match address configured with password"
@@ -910,7 +912,7 @@ ipv4_tcp_md5_novrf()
# client in prefix
log_start
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: Prefix config"
@@ -918,7 +920,7 @@ ipv4_tcp_md5_novrf()
log_start
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: Prefix config, client uses wrong password"
@@ -926,7 +928,7 @@ ipv4_tcp_md5_novrf()
log_start
show_hint "Should timeout due to MD5 mismatch"
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: Prefix config, client address not in configured prefix"
}
@@ -943,7 +945,7 @@ ipv4_tcp_md5()
# basic use case
log_start
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Single address config"
@@ -951,7 +953,7 @@ ipv4_tcp_md5()
log_start
show_hint "Should timeout since server does not have MD5 auth"
run_cmd nettest -s -I ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Server no config, client uses password"
@@ -959,7 +961,7 @@ ipv4_tcp_md5()
log_start
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Client uses wrong password"
@@ -967,7 +969,7 @@ ipv4_tcp_md5()
log_start
show_hint "Should timeout since server config differs from client"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Client address does not match address configured with password"
@@ -978,7 +980,7 @@ ipv4_tcp_md5()
# client in prefix
log_start
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Prefix config"
@@ -986,7 +988,7 @@ ipv4_tcp_md5()
log_start
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password"
@@ -994,7 +996,7 @@ ipv4_tcp_md5()
log_start
show_hint "Should timeout since client address is outside of prefix"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix"
@@ -1005,14 +1007,14 @@ ipv4_tcp_md5()
log_start
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF"
log_start
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF"
@@ -1020,7 +1022,7 @@ ipv4_tcp_md5()
show_hint "Should timeout since client in default VRF uses VRF password"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw"
@@ -1028,21 +1030,21 @@ ipv4_tcp_md5()
show_hint "Should timeout since client in VRF uses default VRF password"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw"
log_start
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF"
log_start
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF"
@@ -1050,7 +1052,7 @@ ipv4_tcp_md5()
show_hint "Should timeout since client in default VRF uses VRF password"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw"
@@ -1058,7 +1060,7 @@ ipv4_tcp_md5()
show_hint "Should timeout since client in VRF uses default VRF password"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw"
@@ -1082,14 +1084,14 @@ test_ipv4_md5_vrf__vrf_server__no_bind_ifindex()
log_start
show_hint "Simulates applications using VRF without TCP_MD5SIG_FLAG_IFINDEX"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: VRF-bound server, unbound key accepts connection"
log_start
show_hint "Binding both the socket and the key is not required but it works"
run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: VRF-bound server, bound key accepts connection"
}
@@ -1103,25 +1105,25 @@ test_ipv4_md5_vrf__global_server__bind_ifindex0()
log_start
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Global server, Key bound to ifindex=0 rejects VRF connection"
log_start
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Global server, key bound to ifindex=0 accepts non-VRF connection"
log_start
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts VRF connection"
log_start
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts non-VRF connection"
@@ -1193,7 +1195,7 @@ ipv4_tcp_novrf()
do
log_start
run_cmd nettest -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "Global server"
done
@@ -1201,7 +1203,7 @@ ipv4_tcp_novrf()
a=${NSA_IP}
log_start
run_cmd nettest -s -I ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "Device server"
@@ -1221,13 +1223,13 @@ ipv4_tcp_novrf()
do
log_start
run_cmd_nsb nettest -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -r ${a} -0 ${NSA_IP}
log_test_addr ${a} $? 0 "Client"
log_start
run_cmd_nsb nettest -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 0 "Client, device bind"
@@ -1249,7 +1251,7 @@ ipv4_tcp_novrf()
do
log_start
run_cmd nettest -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -0 ${a} -1 ${a}
log_test_addr ${a} $? 0 "Global server, local connection"
done
@@ -1257,7 +1259,7 @@ ipv4_tcp_novrf()
a=${NSA_IP}
log_start
run_cmd nettest -s -I ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -0 ${a}
log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
@@ -1266,7 +1268,7 @@ ipv4_tcp_novrf()
log_start
show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope"
run_cmd nettest -s -I ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a}
log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
done
@@ -1274,7 +1276,7 @@ ipv4_tcp_novrf()
a=${NSA_IP}
log_start
run_cmd nettest -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -0 ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 0 "Global server, device client, local connection"
@@ -1283,7 +1285,7 @@ ipv4_tcp_novrf()
log_start
show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
run_cmd nettest -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 1 "Global server, device client, local connection"
done
@@ -1291,7 +1293,7 @@ ipv4_tcp_novrf()
a=${NSA_IP}
log_start
run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -d ${NSA_DEV} -r ${a} -0 ${a}
log_test_addr ${a} $? 0 "Device server, device client, local connection"
@@ -1323,19 +1325,19 @@ ipv4_tcp_vrf()
log_start
show_hint "Should fail 'Connection refused' since global server with VRF is disabled"
run_cmd nettest -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 1 "Global server"
log_start
run_cmd nettest -s -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "VRF server"
log_start
run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "Device server"
@@ -1352,7 +1354,7 @@ ipv4_tcp_vrf()
log_start
show_hint "Should fail 'Connection refused' since global server with VRF is disabled"
run_cmd nettest -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 1 "Global server, local connection"
@@ -1374,14 +1376,14 @@ ipv4_tcp_vrf()
log_start
show_hint "client socket should be bound to VRF"
run_cmd nettest -s -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "Global server"
log_start
show_hint "client socket should be bound to VRF"
run_cmd nettest -s -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "VRF server"
@@ -1396,7 +1398,7 @@ ipv4_tcp_vrf()
log_start
show_hint "client socket should be bound to device"
run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "Device server"
@@ -1406,7 +1408,7 @@ ipv4_tcp_vrf()
log_start
show_hint "Should fail 'Connection refused' since client is not bound to VRF"
run_cmd nettest -s -I ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a}
log_test_addr ${a} $? 1 "Global server, local connection"
done
@@ -1418,13 +1420,13 @@ ipv4_tcp_vrf()
do
log_start
run_cmd_nsb nettest -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -r ${a} -d ${VRF}
log_test_addr ${a} $? 0 "Client, VRF bind"
log_start
run_cmd_nsb nettest -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 0 "Client, device bind"
@@ -1443,7 +1445,7 @@ ipv4_tcp_vrf()
do
log_start
run_cmd nettest -s -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -d ${VRF} -0 ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local connection"
done
@@ -1451,26 +1453,26 @@ ipv4_tcp_vrf()
a=${NSA_IP}
log_start
run_cmd nettest -s -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a}
log_test_addr ${a} $? 0 "VRF server, device client, local connection"
log_start
show_hint "Should fail 'No route to host' since client is out of VRF scope"
run_cmd nettest -s -I ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a}
log_test_addr ${a} $? 1 "VRF server, unbound client, local connection"
log_start
run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -d ${VRF} -0 ${a}
log_test_addr ${a} $? 0 "Device server, VRF client, local connection"
log_start
run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a}
log_test_addr ${a} $? 0 "Device server, device client, local connection"
}
@@ -1509,7 +1511,7 @@ ipv4_udp_novrf()
do
log_start
run_cmd nettest -D -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "Global server"
@@ -1522,7 +1524,7 @@ ipv4_udp_novrf()
a=${NSA_IP}
log_start
run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "Device server"
@@ -1533,31 +1535,31 @@ ipv4_udp_novrf()
do
log_start
run_cmd_nsb nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -D -r ${a} -0 ${NSA_IP}
log_test_addr ${a} $? 0 "Client"
log_start
run_cmd_nsb nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP}
log_test_addr ${a} $? 0 "Client, device bind"
log_start
run_cmd_nsb nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP}
log_test_addr ${a} $? 0 "Client, device send via cmsg"
log_start
run_cmd_nsb nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP}
log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF"
log_start
run_cmd_nsb nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} -U
log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF, with connect()"
@@ -1580,7 +1582,7 @@ ipv4_udp_novrf()
do
log_start
run_cmd nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -r ${a} -0 ${a} -1 ${a}
log_test_addr ${a} $? 0 "Global server, local connection"
done
@@ -1588,7 +1590,7 @@ ipv4_udp_novrf()
a=${NSA_IP}
log_start
run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -r ${a}
log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
@@ -1597,7 +1599,7 @@ ipv4_udp_novrf()
log_start
show_hint "Should fail 'Connection refused' since address is out of device scope"
run_cmd nettest -s -D -I ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -r ${a}
log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
done
@@ -1605,25 +1607,25 @@ ipv4_udp_novrf()
a=${NSA_IP}
log_start
run_cmd nettest -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Global server, device client, local connection"
log_start
run_cmd nettest -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -C -r ${a}
log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection"
log_start
run_cmd nettest -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -S -r ${a}
log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection"
log_start
run_cmd nettest -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} -U
log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
@@ -1636,28 +1638,28 @@ ipv4_udp_novrf()
log_start
show_hint "Should fail since addresses on loopback are out of device scope"
run_cmd nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 2 "Global server, device client, local connection"
log_start
show_hint "Should fail since addresses on loopback are out of device scope"
run_cmd nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C
log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection"
log_start
show_hint "Should fail since addresses on loopback are out of device scope"
run_cmd nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S
log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection"
log_start
show_hint "Should fail since addresses on loopback are out of device scope"
run_cmd nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -U
log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
@@ -1667,7 +1669,7 @@ ipv4_udp_novrf()
a=${NSA_IP}
log_start
run_cmd nettest -D -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -r ${a} -0 ${a}
log_test_addr ${a} $? 0 "Device server, device client, local conn"
@@ -1709,19 +1711,19 @@ ipv4_udp_vrf()
log_start
show_hint "Fails because ingress is in a VRF and global server is disabled"
run_cmd nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 1 "Global server"
log_start
run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "VRF server"
log_start
run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server"
@@ -1733,7 +1735,7 @@ ipv4_udp_vrf()
log_start
show_hint "Should fail 'Connection refused' since global server is out of scope"
run_cmd nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 1 "Global server, VRF client, local connection"
done
@@ -1741,26 +1743,26 @@ ipv4_udp_vrf()
a=${NSA_IP}
log_start
run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
log_start
run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "VRF server, enslaved device client, local connection"
a=${NSA_IP}
log_start
run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
log_start
run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
@@ -1775,19 +1777,19 @@ ipv4_udp_vrf()
do
log_start
run_cmd nettest -D -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "Global server"
log_start
run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "VRF server"
log_start
run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server"
@@ -1802,13 +1804,13 @@ ipv4_udp_vrf()
#
log_start
run_cmd_nsb nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -d ${VRF} -D -r ${NSB_IP} -1 ${NSA_IP}
log_test $? 0 "VRF client"
log_start
run_cmd_nsb nettest -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -d ${NSA_DEV} -D -r ${NSB_IP} -1 ${NSA_IP}
log_test $? 0 "Enslaved device client"
@@ -1829,31 +1831,31 @@ ipv4_udp_vrf()
a=${NSA_IP}
log_start
run_cmd nettest -D -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
log_start
run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
log_start
run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "VRF server, device client, local conn"
log_start
run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
log_start
run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
@@ -1861,7 +1863,7 @@ ipv4_udp_vrf()
do
log_start
run_cmd nettest -D -s -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
done
@@ -1870,7 +1872,7 @@ ipv4_udp_vrf()
do
log_start
run_cmd nettest -s -D -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
done
@@ -2093,7 +2095,7 @@ ipv4_rt()
do
log_start
run_cmd nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest ${varg} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2107,7 +2109,7 @@ ipv4_rt()
do
log_start
run_cmd nettest ${varg} -s -I ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest ${varg} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2120,7 +2122,7 @@ ipv4_rt()
a=${NSA_IP}
log_start
run_cmd nettest ${varg} -s -I ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest ${varg} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2134,7 +2136,7 @@ ipv4_rt()
#
log_start
run_cmd_nsb nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2145,7 +2147,7 @@ ipv4_rt()
log_start
run_cmd_nsb nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2161,7 +2163,7 @@ ipv4_rt()
do
log_start
run_cmd nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${VRF} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2175,7 +2177,7 @@ ipv4_rt()
do
log_start
run_cmd nettest ${varg} -I ${VRF} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${VRF} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2189,7 +2191,7 @@ ipv4_rt()
log_start
run_cmd nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2200,7 +2202,7 @@ ipv4_rt()
log_start
run_cmd nettest ${varg} -I ${VRF} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2211,7 +2213,7 @@ ipv4_rt()
log_start
run_cmd nettest ${varg} -I ${NSA_DEV} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -2561,7 +2563,7 @@ ipv6_tcp_md5_novrf()
# basic use case
log_start
run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: Single address config"
@@ -2569,7 +2571,7 @@ ipv6_tcp_md5_novrf()
log_start
show_hint "Should timeout due to MD5 mismatch"
run_cmd nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: Server no config, client uses password"
@@ -2577,7 +2579,7 @@ ipv6_tcp_md5_novrf()
log_start
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: Client uses wrong password"
@@ -2585,7 +2587,7 @@ ipv6_tcp_md5_novrf()
log_start
show_hint "Should timeout due to MD5 mismatch"
run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_LO_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: Client address does not match address configured with password"
@@ -2596,7 +2598,7 @@ ipv6_tcp_md5_novrf()
# client in prefix
log_start
run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: Prefix config"
@@ -2604,7 +2606,7 @@ ipv6_tcp_md5_novrf()
log_start
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: Prefix config, client uses wrong password"
@@ -2612,7 +2614,7 @@ ipv6_tcp_md5_novrf()
log_start
show_hint "Should timeout due to MD5 mismatch"
run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: Prefix config, client address not in configured prefix"
}
@@ -2629,7 +2631,7 @@ ipv6_tcp_md5()
# basic use case
log_start
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Single address config"
@@ -2637,7 +2639,7 @@ ipv6_tcp_md5()
log_start
show_hint "Should timeout since server does not have MD5 auth"
run_cmd nettest -6 -s -I ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Server no config, client uses password"
@@ -2645,7 +2647,7 @@ ipv6_tcp_md5()
log_start
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Client uses wrong password"
@@ -2653,7 +2655,7 @@ ipv6_tcp_md5()
log_start
show_hint "Should timeout since server config differs from client"
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Client address does not match address configured with password"
@@ -2664,7 +2666,7 @@ ipv6_tcp_md5()
# client in prefix
log_start
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Prefix config"
@@ -2672,7 +2674,7 @@ ipv6_tcp_md5()
log_start
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password"
@@ -2680,7 +2682,7 @@ ipv6_tcp_md5()
log_start
show_hint "Should timeout since client address is outside of prefix"
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix"
@@ -2691,14 +2693,14 @@ ipv6_tcp_md5()
log_start
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF"
log_start
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF"
@@ -2706,7 +2708,7 @@ ipv6_tcp_md5()
show_hint "Should timeout since client in default VRF uses VRF password"
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw"
@@ -2714,21 +2716,21 @@ ipv6_tcp_md5()
show_hint "Should timeout since client in VRF uses default VRF password"
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw"
log_start
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF"
log_start
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF"
@@ -2736,7 +2738,7 @@ ipv6_tcp_md5()
show_hint "Should timeout since client in default VRF uses VRF password"
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw"
@@ -2744,7 +2746,7 @@ ipv6_tcp_md5()
show_hint "Should timeout since client in VRF uses default VRF password"
run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw"
@@ -2772,7 +2774,7 @@ ipv6_tcp_novrf()
do
log_start
run_cmd nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Global server"
done
@@ -2793,7 +2795,7 @@ ipv6_tcp_novrf()
do
log_start
run_cmd_nsb nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Client"
done
@@ -2802,7 +2804,7 @@ ipv6_tcp_novrf()
do
log_start
run_cmd_nsb nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 0 "Client, device bind"
done
@@ -2822,7 +2824,7 @@ ipv6_tcp_novrf()
do
log_start
run_cmd nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Global server, local connection"
done
@@ -2830,7 +2832,7 @@ ipv6_tcp_novrf()
a=${NSA_IP6}
log_start
run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a} -0 ${a}
log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
@@ -2839,7 +2841,7 @@ ipv6_tcp_novrf()
log_start
show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope"
run_cmd nettest -6 -s -I ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a}
log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
done
@@ -2847,7 +2849,7 @@ ipv6_tcp_novrf()
a=${NSA_IP6}
log_start
run_cmd nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a}
log_test_addr ${a} $? 0 "Global server, device client, local connection"
@@ -2856,7 +2858,7 @@ ipv6_tcp_novrf()
log_start
show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope"
run_cmd nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 1 "Global server, device client, local connection"
done
@@ -2865,7 +2867,7 @@ ipv6_tcp_novrf()
do
log_start
run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Device server, device client, local conn"
done
@@ -2898,7 +2900,7 @@ ipv6_tcp_vrf()
log_start
show_hint "Should fail 'Connection refused' since global server with VRF is disabled"
run_cmd nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 1 "Global server"
done
@@ -2907,7 +2909,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "VRF server"
done
@@ -2916,7 +2918,7 @@ ipv6_tcp_vrf()
a=${NSA_LINKIP6}%${NSB_DEV}
log_start
run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "VRF server"
@@ -2924,7 +2926,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Device server"
done
@@ -2943,7 +2945,7 @@ ipv6_tcp_vrf()
log_start
show_hint "Should fail 'Connection refused' since global server with VRF is disabled"
run_cmd nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 1 "Global server, local connection"
@@ -2964,7 +2966,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd nettest -6 -s -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Global server"
done
@@ -2973,7 +2975,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "VRF server"
done
@@ -2982,13 +2984,13 @@ ipv6_tcp_vrf()
a=${NSA_LINKIP6}%${NSB_DEV}
log_start
run_cmd nettest -6 -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Global server"
log_start
run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "VRF server"
@@ -2996,7 +2998,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Device server"
done
@@ -3016,7 +3018,7 @@ ipv6_tcp_vrf()
log_start
show_hint "Fails 'Connection refused' since client is not in VRF"
run_cmd nettest -6 -s -I ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a}
log_test_addr ${a} $? 1 "Global server, local connection"
done
@@ -3029,7 +3031,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd_nsb nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${VRF}
log_test_addr ${a} $? 0 "Client, VRF bind"
done
@@ -3038,7 +3040,7 @@ ipv6_tcp_vrf()
log_start
show_hint "Fails since VRF device does not allow linklocal addresses"
run_cmd_nsb nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${VRF}
log_test_addr ${a} $? 1 "Client, VRF bind"
@@ -3046,7 +3048,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd_nsb nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 0 "Client, device bind"
done
@@ -3071,7 +3073,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local connection"
done
@@ -3079,7 +3081,7 @@ ipv6_tcp_vrf()
a=${NSA_IP6}
log_start
run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a}
log_test_addr ${a} $? 0 "VRF server, device client, local connection"
@@ -3087,13 +3089,13 @@ ipv6_tcp_vrf()
log_start
show_hint "Should fail since unbound client is out of VRF scope"
run_cmd nettest -6 -s -I ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a}
log_test_addr ${a} $? 1 "VRF server, unbound client, local connection"
log_start
run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a}
log_test_addr ${a} $? 0 "Device server, VRF client, local connection"
@@ -3101,7 +3103,7 @@ ipv6_tcp_vrf()
do
log_start
run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a}
log_test_addr ${a} $? 0 "Device server, device client, local connection"
done
@@ -3141,13 +3143,13 @@ ipv6_udp_novrf()
do
log_start
run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Global server"
log_start
run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Device server"
done
@@ -3155,7 +3157,7 @@ ipv6_udp_novrf()
a=${NSA_LO_IP6}
log_start
run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Global server"
@@ -3165,7 +3167,7 @@ ipv6_udp_novrf()
#log_start
#show_hint "Should fail since loopback address is out of scope"
#run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- #sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
#run_cmd_nsb nettest -6 -D -r ${a}
#log_test_addr ${a} $? 1 "Device server"
@@ -3185,25 +3187,25 @@ ipv6_udp_novrf()
do
log_start
run_cmd_nsb nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -6 -D -r ${a} -0 ${NSA_IP6}
log_test_addr ${a} $? 0 "Client"
log_start
run_cmd_nsb nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP6}
log_test_addr ${a} $? 0 "Client, device bind"
log_start
run_cmd_nsb nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP6}
log_test_addr ${a} $? 0 "Client, device send via cmsg"
log_start
run_cmd_nsb nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP6}
log_test_addr ${a} $? 0 "Client, device bind via IPV6_UNICAST_IF"
@@ -3225,7 +3227,7 @@ ipv6_udp_novrf()
do
log_start
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -r ${a} -0 ${a} -1 ${a}
log_test_addr ${a} $? 0 "Global server, local connection"
done
@@ -3233,7 +3235,7 @@ ipv6_udp_novrf()
a=${NSA_IP6}
log_start
run_cmd nettest -6 -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
@@ -3242,7 +3244,7 @@ ipv6_udp_novrf()
log_start
show_hint "Should fail 'Connection refused' since address is out of device scope"
run_cmd nettest -6 -s -D -I ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -r ${a}
log_test_addr ${a} $? 1 "Device server, local connection"
done
@@ -3250,19 +3252,19 @@ ipv6_udp_novrf()
a=${NSA_IP6}
log_start
run_cmd nettest -6 -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Global server, device client, local connection"
log_start
run_cmd nettest -6 -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -C -r ${a}
log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection"
log_start
run_cmd nettest -6 -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -S -r ${a}
log_test_addr ${a} $? 0 "Global server, device client via IPV6_UNICAST_IF, local connection"
@@ -3271,28 +3273,28 @@ ipv6_udp_novrf()
log_start
show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV}
log_test_addr ${a} $? 1 "Global server, device client, local connection"
log_start
show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C
log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection"
log_start
show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S
log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection"
log_start
show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -U
log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
done
@@ -3300,7 +3302,7 @@ ipv6_udp_novrf()
a=${NSA_IP6}
log_start
run_cmd nettest -6 -D -s -I ${NSA_DEV} -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} -0 ${a}
log_test_addr ${a} $? 0 "Device server, device client, local conn"
@@ -3314,7 +3316,7 @@ ipv6_udp_novrf()
run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV}
log_start
run_cmd nettest -6 -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${NSA_IP6}
log_test $? 0 "UDP in - LLA to GUA"
@@ -3338,7 +3340,7 @@ ipv6_udp_vrf()
log_start
show_hint "Should fail 'Connection refused' since global server is disabled"
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 1 "Global server"
done
@@ -3347,7 +3349,7 @@ ipv6_udp_vrf()
do
log_start
run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "VRF server"
done
@@ -3356,7 +3358,7 @@ ipv6_udp_vrf()
do
log_start
run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server"
done
@@ -3378,7 +3380,7 @@ ipv6_udp_vrf()
log_start
show_hint "Should fail 'Connection refused' since global server is disabled"
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 1 "Global server, VRF client, local conn"
done
@@ -3387,7 +3389,7 @@ ipv6_udp_vrf()
do
log_start
run_cmd nettest -6 -D -I ${VRF} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
done
@@ -3396,25 +3398,25 @@ ipv6_udp_vrf()
log_start
show_hint "Should fail 'Connection refused' since global server is disabled"
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 1 "Global server, device client, local conn"
log_start
run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "VRF server, device client, local conn"
log_start
run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
log_start
run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
@@ -3429,7 +3431,7 @@ ipv6_udp_vrf()
do
log_start
run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Global server"
done
@@ -3438,7 +3440,7 @@ ipv6_udp_vrf()
do
log_start
run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "VRF server"
done
@@ -3447,7 +3449,7 @@ ipv6_udp_vrf()
do
log_start
run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server"
done
@@ -3465,7 +3467,7 @@ ipv6_udp_vrf()
#
log_start
run_cmd_nsb nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${NSB_IP6}
log_test $? 0 "VRF client"
@@ -3476,7 +3478,7 @@ ipv6_udp_vrf()
log_start
run_cmd_nsb nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_IP6}
log_test $? 0 "Enslaved device client"
@@ -3491,13 +3493,13 @@ ipv6_udp_vrf()
a=${NSA_IP6}
log_start
run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
#log_start
run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
@@ -3505,13 +3507,13 @@ ipv6_udp_vrf()
a=${VRF_IP6}
log_start
run_cmd nettest -6 -D -s -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
log_start
run_cmd nettest -6 -D -I ${VRF} -s -3 ${VRF} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
@@ -3527,25 +3529,25 @@ ipv6_udp_vrf()
a=${NSA_IP6}
log_start
run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Global server, device client, local conn"
log_start
run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "VRF server, device client, local conn"
log_start
run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Device server, VRF client, local conn"
log_start
run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Device server, device client, local conn"
@@ -3557,7 +3559,7 @@ ipv6_udp_vrf()
# link local addresses
log_start
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -d ${NSB_DEV} -r ${NSA_LINKIP6}
log_test $? 0 "Global server, linklocal IP"
@@ -3568,7 +3570,7 @@ ipv6_udp_vrf()
log_start
run_cmd_nsb nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_LINKIP6}
log_test $? 0 "Enslaved device client, linklocal IP"
@@ -3579,7 +3581,7 @@ ipv6_udp_vrf()
log_start
run_cmd nettest -6 -D -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSA_LINKIP6}
log_test $? 0 "Enslaved device client, local conn - linklocal IP"
@@ -3592,7 +3594,7 @@ ipv6_udp_vrf()
run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV}
log_start
run_cmd nettest -6 -s -D &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 udp
run_cmd_nsb nettest -6 -D -r ${NSA_IP6}
log_test $? 0 "UDP in - LLA to GUA"
@@ -3667,7 +3669,7 @@ ipv6_addr_bind_novrf()
# when it really should not
a=${NSA_LO_IP6}
log_start
- show_hint "Tecnically should fail since address is not on device but kernel allows"
+ show_hint "Technically should fail since address is not on device but kernel allows"
run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address"
}
@@ -3724,7 +3726,7 @@ ipv6_addr_bind_vrf()
# passes when it really should not
a=${VRF_IP6}
log_start
- show_hint "Tecnically should fail since address is not on device but kernel allows"
+ show_hint "Technically should fail since address is not on device but kernel allows"
run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind"
@@ -3771,7 +3773,7 @@ ipv6_rt()
do
log_start
run_cmd nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest ${varg} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3785,7 +3787,7 @@ ipv6_rt()
do
log_start
run_cmd nettest ${varg} -I ${VRF} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest ${varg} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3799,7 +3801,7 @@ ipv6_rt()
do
log_start
run_cmd nettest ${varg} -I ${NSA_DEV} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest ${varg} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3814,7 +3816,7 @@ ipv6_rt()
#
log_start
run_cmd_nsb nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP6} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3825,7 +3827,7 @@ ipv6_rt()
log_start
run_cmd_nsb nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSB} 12345 tcp
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP6} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3842,7 +3844,7 @@ ipv6_rt()
do
log_start
run_cmd nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${VRF} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3856,7 +3858,7 @@ ipv6_rt()
do
log_start
run_cmd nettest ${varg} -I ${VRF} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${VRF} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3869,7 +3871,7 @@ ipv6_rt()
a=${NSA_IP6}
log_start
run_cmd nettest ${varg} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3880,7 +3882,7 @@ ipv6_rt()
log_start
run_cmd nettest ${varg} -I ${VRF} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3891,7 +3893,7 @@ ipv6_rt()
log_start
run_cmd nettest ${varg} -I ${NSA_DEV} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
sleep 3
run_cmd ip link del ${VRF}
@@ -3950,7 +3952,7 @@ netfilter_tcp_reset()
do
log_start
run_cmd nettest -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx"
done
@@ -3968,7 +3970,7 @@ netfilter_icmp()
do
log_start
run_cmd nettest ${arg} -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest ${arg} -r ${a}
log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach"
done
@@ -4007,7 +4009,7 @@ netfilter_tcp6_reset()
do
log_start
run_cmd nettest -6 -s &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx"
done
@@ -4025,7 +4027,7 @@ netfilter_icmp6()
do
log_start
run_cmd nettest -6 -s ${arg} &
- sleep 1
+ wait_local_port_listen ${NSA} 12345 tcp
run_cmd_nsb nettest -6 ${arg} -r ${a}
log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach"
done
@@ -4221,12 +4223,12 @@ use_case_snat_on_vrf()
run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} &
- sleep 1
+ wait_local_port_listen ${NSB} ${port} tcp
run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port}
log_test $? 0 "IPv4 TCP connection over VRF with SNAT"
run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} &
- sleep 1
+ wait_local_port_listen ${NSB} ${port} tcp
run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port}
log_test $? 0 "IPv6 TCP connection over VRF with SNAT"
@@ -4272,6 +4274,7 @@ EOF
TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter"
TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter"
TESTS_OTHER="use_cases"
+# note: each TEST_ group needs a dedicated runner, e.g. fcnal-ipv4.sh
PAUSE_ON_FAIL=no
PAUSE=no
@@ -4302,6 +4305,8 @@ elif [ "$TESTS" = "ipv4" ]; then
TESTS="$TESTS_IPV4"
elif [ "$TESTS" = "ipv6" ]; then
TESTS="$TESTS_IPV6"
+elif [ "$TESTS" = "other" ]; then
+ TESTS="$TESTS_OTHER"
fi
check_gen_prog "nettest"
diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh
index d5e3abb8658c..9931a1e36e3d 100755
--- a/tools/testing/selftests/net/fdb_flush.sh
+++ b/tools/testing/selftests/net/fdb_flush.sh
@@ -583,7 +583,7 @@ vxlan_test_flush_by_remote_attributes()
$IP link del dev vx10
$IP link add name vx10 type vxlan dstport "$VXPORT" external
- # For multicat FDB entries, the VXLAN driver stores a linked list of
+ # For multicast FDB entries, the VXLAN driver stores a linked list of
# remotes for a given key. Verify that only the expected remotes are
# flushed.
multicast_fdb_entries_add
diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh
index c03151e7791c..0b8a2465dd04 100755
--- a/tools/testing/selftests/net/fdb_notify.sh
+++ b/tools/testing/selftests/net/fdb_notify.sh
@@ -40,16 +40,16 @@ do_test_dup()
test_dup_bridge()
{
- ip_link_add br up type bridge vlan_filtering 1
+ adf_ip_link_add br up type bridge vlan_filtering 1
do_test_dup add "bridge" dev br self
do_test_dup del "bridge" dev br self
}
test_dup_vxlan_self()
{
- ip_link_add br up type bridge vlan_filtering 1
- ip_link_add vx up type vxlan id 2000 dstport 4789
- ip_link_master vx br
+ adf_ip_link_add br up type bridge vlan_filtering 1
+ adf_ip_link_add vx up type vxlan id 2000 dstport 4789
+ adf_ip_link_set_master vx br
do_test_dup add "vxlan" dev vx self dst 192.0.2.1
do_test_dup del "vxlan" dev vx self dst 192.0.2.1
@@ -57,9 +57,9 @@ test_dup_vxlan_self()
test_dup_vxlan_master()
{
- ip_link_add br up type bridge vlan_filtering 1
- ip_link_add vx up type vxlan id 2000 dstport 4789
- ip_link_master vx br
+ adf_ip_link_add br up type bridge vlan_filtering 1
+ adf_ip_link_add vx up type vxlan id 2000 dstport 4789
+ adf_ip_link_set_master vx br
do_test_dup add "vxlan master" dev vx master
do_test_dup del "vxlan master" dev vx master
@@ -67,8 +67,8 @@ test_dup_vxlan_master()
test_dup_macvlan_self()
{
- ip_link_add dd up type dummy
- ip_link_add mv up link dd type macvlan mode passthru
+ adf_ip_link_add dd up type dummy
+ adf_ip_link_add mv up link dd type macvlan mode passthru
do_test_dup add "macvlan self" dev mv self
do_test_dup del "macvlan self" dev mv self
@@ -76,10 +76,10 @@ test_dup_macvlan_self()
test_dup_macvlan_master()
{
- ip_link_add br up type bridge vlan_filtering 1
- ip_link_add dd up type dummy
- ip_link_add mv up link dd type macvlan mode passthru
- ip_link_master mv br
+ adf_ip_link_add br up type bridge vlan_filtering 1
+ adf_ip_link_add dd up type dummy
+ adf_ip_link_add mv up link dd type macvlan mode passthru
+ adf_ip_link_set_master mv br
do_test_dup add "macvlan master" dev mv self
do_test_dup del "macvlan master" dev mv self
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 77c83d9508d3..2b0a90581e2f 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -76,11 +76,13 @@ log_test()
printf "TEST: %-60s [ OK ]\n" "${msg}"
nsuccess=$((nsuccess+1))
else
- ret=1
- nfail=$((nfail+1))
if [[ $rc -eq $ksft_skip ]]; then
+ [[ $ret -eq 0 ]] && ret=$ksft_skip
+ nskip=$((nskip+1))
printf "TEST: %-60s [SKIP]\n" "${msg}"
else
+ ret=1
+ nfail=$((nfail+1))
printf "TEST: %-60s [FAIL]\n" "${msg}"
fi
@@ -465,8 +467,8 @@ ipv6_fdb_grp_fcnal()
log_test $? 0 "Get Fdb nexthop group by id"
# fdb nexthop group can only contain fdb nexthops
- run_cmd "$IP nexthop add id 63 via 2001:db8:91::4"
- run_cmd "$IP nexthop add id 64 via 2001:db8:91::5"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::4 dev veth1"
+ run_cmd "$IP nexthop add id 64 via 2001:db8:91::5 dev veth1"
run_cmd "$IP nexthop add id 103 group 63/64 fdb"
log_test $? 2 "Fdb Nexthop group with non-fdb nexthops"
@@ -492,6 +494,26 @@ ipv6_fdb_grp_fcnal()
run_cmd "$IP nexthop add id 69 encap mpls 101 via 2001:db8:91::8 dev veth1 fdb"
log_test $? 2 "Fdb Nexthop with encap"
+ # Replace FDB nexthop to non-FDB and vice versa
+ run_cmd "$IP nexthop add id 70 via 2001:db8:91::2 fdb"
+ run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 dev veth1"
+ log_test $? 0 "Replace FDB nexthop to non-FDB nexthop"
+ run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 fdb"
+ log_test $? 0 "Replace non-FDB nexthop to FDB nexthop"
+
+ # Replace FDB nexthop address while in a group
+ run_cmd "$IP nexthop add id 71 group 70 fdb"
+ run_cmd "$IP nexthop replace id 70 via 2001:db8:91::3 fdb"
+ log_test $? 0 "Replace FDB nexthop address while in a group"
+
+ # Cannot replace FDB nexthop to non-FDB and vice versa while in a group
+ run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 dev veth1"
+ log_test $? 2 "Replace FDB nexthop to non-FDB nexthop while in a group"
+ run_cmd "$IP nexthop add id 72 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 73 group 72"
+ run_cmd "$IP nexthop replace id 72 via 2001:db8:91::2 fdb"
+ log_test $? 2 "Replace non-FDB nexthop to FDB nexthop while in a group"
+
run_cmd "$IP link add name vx10 type vxlan id 1010 local 2001:db8:91::9 remote 2001:db8:91::10 dstport 4789 nolearning noudpcsum tos inherit ttl 100"
run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self"
log_test $? 0 "Fdb mac add with nexthop group"
@@ -545,15 +567,15 @@ ipv4_fdb_grp_fcnal()
log_test $? 0 "Get Fdb nexthop group by id"
# fdb nexthop group can only contain fdb nexthops
- run_cmd "$IP nexthop add id 14 via 172.16.1.2"
- run_cmd "$IP nexthop add id 15 via 172.16.1.3"
+ run_cmd "$IP nexthop add id 14 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 15 via 172.16.1.3 dev veth1"
run_cmd "$IP nexthop add id 103 group 14/15 fdb"
log_test $? 2 "Fdb Nexthop group with non-fdb nexthops"
# Non fdb nexthop group can not contain fdb nexthops
run_cmd "$IP nexthop add id 16 via 172.16.1.2 fdb"
run_cmd "$IP nexthop add id 17 via 172.16.1.3 fdb"
- run_cmd "$IP nexthop add id 104 group 14/15"
+ run_cmd "$IP nexthop add id 104 group 16/17"
log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops"
# fdb nexthop cannot have blackhole
@@ -572,6 +594,26 @@ ipv4_fdb_grp_fcnal()
run_cmd "$IP nexthop add id 17 encap mpls 101 via 172.16.1.2 dev veth1 fdb"
log_test $? 2 "Fdb Nexthop with encap"
+ # Replace FDB nexthop to non-FDB and vice versa
+ run_cmd "$IP nexthop add id 18 via 172.16.1.2 fdb"
+ run_cmd "$IP nexthop replace id 18 via 172.16.1.2 dev veth1"
+ log_test $? 0 "Replace FDB nexthop to non-FDB nexthop"
+ run_cmd "$IP nexthop replace id 18 via 172.16.1.2 fdb"
+ log_test $? 0 "Replace non-FDB nexthop to FDB nexthop"
+
+ # Replace FDB nexthop address while in a group
+ run_cmd "$IP nexthop add id 19 group 18 fdb"
+ run_cmd "$IP nexthop replace id 18 via 172.16.1.3 fdb"
+ log_test $? 0 "Replace FDB nexthop address while in a group"
+
+ # Cannot replace FDB nexthop to non-FDB and vice versa while in a group
+ run_cmd "$IP nexthop replace id 18 via 172.16.1.2 dev veth1"
+ log_test $? 2 "Replace FDB nexthop to non-FDB nexthop while in a group"
+ run_cmd "$IP nexthop add id 20 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 21 group 20"
+ run_cmd "$IP nexthop replace id 20 via 172.16.1.2 fdb"
+ log_test $? 2 "Replace non-FDB nexthop to FDB nexthop while in a group"
+
run_cmd "$IP link add name vx10 type vxlan id 1010 local 10.0.0.1 remote 10.0.0.2 dstport 4789 nolearning noudpcsum tos inherit ttl 100"
run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self"
log_test $? 0 "Fdb mac add with nexthop group"
@@ -580,7 +622,7 @@ ipv4_fdb_grp_fcnal()
run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 12 self"
log_test $? 255 "Fdb mac add with nexthop"
- run_cmd "$IP ro add 172.16.0.0/22 nhid 15"
+ run_cmd "$IP ro add 172.16.0.0/22 nhid 16"
log_test $? 2 "Route add with fdb nexthop"
run_cmd "$IP ro add 172.16.0.0/22 nhid 103"
@@ -741,7 +783,7 @@ ipv6_fcnal()
run_cmd "$IP nexthop add id 52 via 2001:db8:92::3"
log_test $? 2 "Create nexthop - gw only"
- # gw is not reachable throught given dev
+ # gw is not reachable through given dev
run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1"
log_test $? 2 "Create nexthop - invalid gw+dev combination"
@@ -2528,6 +2570,7 @@ done
if [ "$TESTS" != "none" ]; then
printf "\nTests passed: %3d\n" ${nsuccess}
printf "Tests failed: %3d\n" ${nfail}
+ printf "Tests skipped: %2d\n" ${nskip}
fi
exit $ret
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 1d58b3b87465..5fbdd2a0b537 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -256,6 +256,24 @@ fib_rule6_test()
fib_rule6_test_match_n_redirect "$match" "$match" \
"$getnomatch" "sport and dport redirect to table" \
"sport and dport no redirect to table"
+
+ match="sport 100-200 dport 300-400"
+ getmatch="sport 100 dport 400"
+ getnomatch="sport 100 dport 401"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" \
+ "sport and dport range redirect to table" \
+ "sport and dport range no redirect to table"
+ fi
+
+ ip rule help 2>&1 | grep sport | grep -q MASK
+ if [ $? -eq 0 ]; then
+ match="sport 0x0f00/0xff00 dport 0x000f/0x00ff"
+ getmatch="sport 0x0f11 dport 0x220f"
+ getnomatch="sport 0x1f11 dport 0x221f"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "sport and dport masked redirect to table" \
+ "sport and dport masked no redirect to table"
fi
fib_check_iproute_support "ipproto" "ipproto"
@@ -291,6 +309,73 @@ fib_rule6_test()
"$getnomatch" "iif dscp redirect to table" \
"iif dscp no redirect to table"
fi
+
+ ip rule help 2>&1 | grep -q "DSCP\[/MASK\]"
+ if [ $? -eq 0 ]; then
+ match="dscp 0x0f/0x0f"
+ tosmatch=$(printf 0x"%x" $((0x1f << 2)))
+ tosnomatch=$(printf 0x"%x" $((0x1e << 2)))
+ getmatch="tos $tosmatch"
+ getnomatch="tos $tosnomatch"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "dscp masked redirect to table" \
+ "dscp masked no redirect to table"
+
+ match="dscp 0x0f/0x0f"
+ getmatch="from $SRC_IP6 iif $DEV tos $tosmatch"
+ getnomatch="from $SRC_IP6 iif $DEV tos $tosnomatch"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif dscp masked redirect to table" \
+ "iif dscp masked no redirect to table"
+ fi
+
+ fib_check_iproute_support "flowlabel" "flowlabel"
+ if [ $? -eq 0 ]; then
+ match="flowlabel 0xfffff"
+ getmatch="flowlabel 0xfffff"
+ getnomatch="flowlabel 0xf"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "flowlabel redirect to table" \
+ "flowlabel no redirect to table"
+
+ match="flowlabel 0xfffff"
+ getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff"
+ getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif flowlabel redirect to table" \
+ "iif flowlabel no redirect to table"
+
+ match="flowlabel 0x08000/0x08000"
+ getmatch="flowlabel 0xfffff"
+ getnomatch="flowlabel 0xf7fff"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "flowlabel masked redirect to table" \
+ "flowlabel masked no redirect to table"
+
+ match="flowlabel 0x08000/0x08000"
+ getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff"
+ getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf7fff"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif flowlabel masked redirect to table" \
+ "iif flowlabel masked no redirect to table"
+ fi
+
+ $IP link show dev $DEV | grep -q vrf0
+ if [ $? -eq 0 ]; then
+ match="oif vrf0"
+ getmatch="oif $DEV"
+ getnomatch="oif lo"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "VRF oif redirect to table" \
+ "VRF oif no redirect to table"
+
+ match="from $SRC_IP6 iif vrf0"
+ getmatch="from $SRC_IP6 iif $DEV"
+ getnomatch="from $SRC_IP6 iif lo"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "VRF iif redirect to table" \
+ "VRF iif no redirect to table"
+ fi
}
fib_rule6_vrf_test()
@@ -431,10 +516,7 @@ fib_rule4_test()
fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
"oif redirect to table" "oif no redirect to table"
- # Enable forwarding and disable rp_filter as all the addresses are in
- # the same subnet and egress device == ingress device.
ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1
- ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0
match="from $SRC_IP iif $DEV"
getnomatch="from $SRC_IP iif lo"
fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \
@@ -494,6 +576,24 @@ fib_rule4_test()
fib_rule4_test_match_n_redirect "$match" "$match" \
"$getnomatch" "sport and dport redirect to table" \
"sport and dport no redirect to table"
+
+ match="sport 100-200 dport 300-400"
+ getmatch="sport 100 dport 400"
+ getnomatch="sport 100 dport 401"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" \
+ "sport and dport range redirect to table" \
+ "sport and dport range no redirect to table"
+ fi
+
+ ip rule help 2>&1 | grep sport | grep -q MASK
+ if [ $? -eq 0 ]; then
+ match="sport 0x0f00/0xff00 dport 0x000f/0x00ff"
+ getmatch="sport 0x0f11 dport 0x220f"
+ getnomatch="sport 0x1f11 dport 0x221f"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "sport and dport masked redirect to table" \
+ "sport and dport masked no redirect to table"
fi
fib_check_iproute_support "ipproto" "ipproto"
@@ -530,6 +630,42 @@ fib_rule4_test()
"$getnomatch" "iif dscp redirect to table" \
"iif dscp no redirect to table"
fi
+
+ ip rule help 2>&1 | grep -q "DSCP\[/MASK\]"
+ if [ $? -eq 0 ]; then
+ match="dscp 0x0f/0x0f"
+ tosmatch=$(printf 0x"%x" $((0x1f << 2)))
+ tosnomatch=$(printf 0x"%x" $((0x1e << 2)))
+ getmatch="tos $tosmatch"
+ getnomatch="tos $tosnomatch"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "dscp masked redirect to table" \
+ "dscp masked no redirect to table"
+
+ match="dscp 0x0f/0x0f"
+ getmatch="from $SRC_IP iif $DEV tos $tosmatch"
+ getnomatch="from $SRC_IP iif $DEV tos $tosnomatch"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif dscp masked redirect to table" \
+ "iif dscp masked no redirect to table"
+ fi
+
+ $IP link show dev $DEV | grep -q vrf0
+ if [ $? -eq 0 ]; then
+ match="oif vrf0"
+ getmatch="oif $DEV"
+ getnomatch="oif lo"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "VRF oif redirect to table" \
+ "VRF oif no redirect to table"
+
+ match="from $SRC_IP iif vrf0"
+ getmatch="from $SRC_IP iif $DEV"
+ getnomatch="from $SRC_IP iif lo"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "VRF iif redirect to table" \
+ "VRF iif no redirect to table"
+ fi
}
fib_rule4_vrf_test()
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 3ea6f886a210..a88f797c549a 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -11,7 +11,8 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \
ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
- ipv4_mpath_list ipv6_mpath_list"
+ ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance \
+ fib6_ra_to_static"
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -1085,6 +1086,35 @@ route_setup()
set +e
}
+forwarding_cleanup()
+{
+ cleanup_ns $ns3
+
+ route_cleanup
+}
+
+# extend route_setup with an ns3 reachable through ns2 over both devices
+forwarding_setup()
+{
+ forwarding_cleanup
+
+ route_setup
+
+ setup_ns ns3
+
+ ip link add veth5 netns $ns3 type veth peer name veth6 netns $ns2
+ ip -netns $ns3 link set veth5 up
+ ip -netns $ns2 link set veth6 up
+
+ ip -netns $ns3 -4 addr add dev veth5 172.16.105.1/24
+ ip -netns $ns2 -4 addr add dev veth6 172.16.105.2/24
+ ip -netns $ns3 -4 route add 172.16.100.0/22 via 172.16.105.2
+
+ ip -netns $ns3 -6 addr add dev veth5 2001:db8:105::1/64 nodad
+ ip -netns $ns2 -6 addr add dev veth6 2001:db8:105::2/64 nodad
+ ip -netns $ns3 -6 route add 2001:db8:101::/33 via 2001:db8:105::2
+}
+
# assumption is that basic add of a single path route works
# otherwise just adding an address on an interface is broken
ipv6_rt_add()
@@ -1447,6 +1477,68 @@ ipv6_route_metrics_test()
route_cleanup
}
+fib6_ra_to_static()
+{
+ setup
+
+ echo
+ echo "Fib6 route promotion from RA-learned to static test"
+ set -e
+
+ # ra6 is required for the test. (ipv6toolkit)
+ if [ ! -x "$(command -v ra6)" ]; then
+ echo "SKIP: ra6 not found."
+ set +e
+ cleanup &> /dev/null
+ return
+ fi
+
+ # Create a pair of veth devices to send a RA message from one
+ # device to another.
+ $IP link add veth1 type veth peer name veth2
+ $IP link set dev veth1 up
+ $IP link set dev veth2 up
+ $IP -6 address add 2001:10::1/64 dev veth1 nodad
+ $IP -6 address add 2001:10::2/64 dev veth2 nodad
+
+ # Make veth1 ready to receive RA messages.
+ $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2
+
+ # Send a RA message with a prefix from veth2.
+ $NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60
+
+ # Wait for the RA message.
+ sleep 1
+
+ # systemd may mess up the test. Make sure that
+ # systemd-networkd.service and systemd-networkd.socket are stopped.
+ check_rt_num_clean 2 $($IP -6 route list|grep expires|wc -l) || return
+
+ # Configure static address on the same prefix
+ $IP -6 address add 2001:12::dead/64 dev veth1 nodad
+
+ # On-link route won't expire anymore, default route still owned by RA
+ check_rt_num 1 $($IP -6 route list |grep expires|wc -l)
+
+ # Send a second RA message with a prefix from veth2.
+ $NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60
+ sleep 1
+
+ # Expire is not back, on-link route is still static
+ check_rt_num 1 $($IP -6 route list |grep expires|wc -l)
+
+ $IP -6 address del 2001:12::dead/64 dev veth1 nodad
+
+ # Expire is back, on-link route is now owned by RA again
+ check_rt_num 2 $($IP -6 route list |grep expires|wc -l)
+
+ log_test $ret 0 "ipv6 promote RA route to static"
+
+ set +e
+
+ cleanup &> /dev/null
+}
+
# add route for a prefix, flushing any existing routes first
# expected to be the first step of a test
add_route()
@@ -2531,9 +2623,6 @@ ipv4_mpath_list_test()
run_cmd "ip -n $ns2 route add 203.0.113.0/24
nexthop via 172.16.201.2 nexthop via 172.16.202.2"
run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
- run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0"
- run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0"
- run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0"
set +e
local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]')
@@ -2600,6 +2689,93 @@ ipv6_mpath_list_test()
route_cleanup
}
+tc_set_flower_counter__saddr_syn() {
+ tc_set_flower_counter $1 $2 $3 "src_ip $4 ip_proto tcp tcp_flags 0x2"
+}
+
+ip_mpath_balance_dep_check()
+{
+ if [ ! -x "$(command -v socat)" ]; then
+ echo "socat command not found. Skipping test"
+ return 1
+ fi
+
+ if [ ! -x "$(command -v jq)" ]; then
+ echo "jq command not found. Skipping test"
+ return 1
+ fi
+}
+
+ip_mpath_balance() {
+ local -r ipver=$1
+ local -r daddr=$2
+ local -r num_conn=20
+
+ for i in $(seq 1 $num_conn); do
+ ip netns exec $ns3 socat $ipver TCP-LISTEN:8000 STDIO >/dev/null &
+ sleep 0.02
+ echo -n a | ip netns exec $ns1 socat $ipver STDIO TCP:$daddr:8000
+ done
+
+ local -r syn0="$(tc_get_flower_counter $ns1 veth1)"
+ local -r syn1="$(tc_get_flower_counter $ns1 veth3)"
+ local -r syns=$((syn0+syn1))
+
+ [ "$VERBOSE" = "1" ] && echo "multipath: syns seen: ($syn0,$syn1)"
+
+ [[ $syns -ge $num_conn ]] && [[ $syn0 -gt 0 ]] && [[ $syn1 -gt 0 ]]
+}
+
+ipv4_mpath_balance_test()
+{
+ echo
+ echo "IPv4 multipath load balance test"
+
+ ip_mpath_balance_dep_check || return 1
+ forwarding_setup
+
+ $IP route add 172.16.105.1 \
+ nexthop via 172.16.101.2 \
+ nexthop via 172.16.103.2
+
+ ip netns exec $ns1 \
+ sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+
+ tc_set_flower_counter__saddr_syn $ns1 4 veth1 172.16.101.1
+ tc_set_flower_counter__saddr_syn $ns1 4 veth3 172.16.103.1
+
+ ip_mpath_balance -4 172.16.105.1
+
+ log_test $? 0 "IPv4 multipath loadbalance"
+
+ forwarding_cleanup
+}
+
+ipv6_mpath_balance_test()
+{
+ echo
+ echo "IPv6 multipath load balance test"
+
+ ip_mpath_balance_dep_check || return 1
+ forwarding_setup
+
+ $IP route add 2001:db8:105::1\
+ nexthop via 2001:db8:101::2 \
+ nexthop via 2001:db8:103::2
+
+ ip netns exec $ns1 \
+ sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+
+ tc_set_flower_counter__saddr_syn $ns1 6 veth1 2001:db8:101::1
+ tc_set_flower_counter__saddr_syn $ns1 6 veth3 2001:db8:103::1
+
+ ip_mpath_balance -6 "[2001:db8:105::1]"
+
+ log_test $? 0 "IPv6 multipath loadbalance"
+
+ forwarding_cleanup
+}
+
################################################################################
# usage
@@ -2683,6 +2859,9 @@ do
fib6_gc_test|ipv6_gc) fib6_gc_test;;
ipv4_mpath_list) ipv4_mpath_list_test;;
ipv6_mpath_list) ipv6_mpath_list_test;;
+ ipv4_mpath_balance) ipv4_mpath_balance_test;;
+ ipv6_mpath_balance) ipv6_mpath_balance_test;;
+ fib6_ra_to_static) fib6_ra_to_static;;
help) echo "Test names: $TESTS"; exit 0;;
esac
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 7d885cff8d79..ff4a00d91a26 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -1,6 +1,9 @@
# SPDX-License-Identifier: GPL-2.0+ OR MIT
-TEST_PROGS = bridge_fdb_learning_limit.sh \
+TEST_PROGS := \
+ bridge_activity_notify.sh \
+ bridge_fdb_learning_limit.sh \
+ bridge_fdb_local_vlan_0.sh \
bridge_igmp.sh \
bridge_locked_port.sh \
bridge_mdb.sh \
@@ -18,64 +21,64 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
gre_custom_multipath_hash.sh \
gre_inner_v4_multipath.sh \
gre_inner_v6_multipath.sh \
- gre_multipath_nh_res.sh \
- gre_multipath_nh.sh \
gre_multipath.sh \
+ gre_multipath_nh.sh \
+ gre_multipath_nh_res.sh \
ip6_forward_instats_vrf.sh \
ip6gre_custom_multipath_hash.sh \
+ ip6gre_flat.sh \
ip6gre_flat_key.sh \
ip6gre_flat_keys.sh \
- ip6gre_flat.sh \
+ ip6gre_hier.sh \
ip6gre_hier_key.sh \
ip6gre_hier_keys.sh \
- ip6gre_hier.sh \
ip6gre_inner_v4_multipath.sh \
ip6gre_inner_v6_multipath.sh \
+ ipip_flat_gre.sh \
ipip_flat_gre_key.sh \
ipip_flat_gre_keys.sh \
- ipip_flat_gre.sh \
+ ipip_hier_gre.sh \
ipip_hier_gre_key.sh \
ipip_hier_gre_keys.sh \
- ipip_hier_gre.sh \
lib_sh_test.sh \
local_termination.sh \
min_max_mtu.sh \
+ mirror_gre.sh \
mirror_gre_bound.sh \
mirror_gre_bridge_1d.sh \
mirror_gre_bridge_1d_vlan.sh \
- mirror_gre_bridge_1q_lag.sh \
mirror_gre_bridge_1q.sh \
+ mirror_gre_bridge_1q_lag.sh \
mirror_gre_changes.sh \
mirror_gre_flower.sh \
mirror_gre_lag_lacp.sh \
mirror_gre_neigh.sh \
mirror_gre_nh.sh \
- mirror_gre.sh \
- mirror_gre_vlan_bridge_1q.sh \
mirror_gre_vlan.sh \
+ mirror_gre_vlan_bridge_1q.sh \
mirror_vlan.sh \
no_forwarding.sh \
pedit_dsfield.sh \
pedit_ip.sh \
pedit_l4port.sh \
- q_in_vni_ipv6.sh \
q_in_vni.sh \
+ q_in_vni_ipv6.sh \
+ router.sh \
router_bridge.sh \
router_bridge_1d.sh \
router_bridge_1d_lag.sh \
router_bridge_lag.sh \
+ router_bridge_pvid_vlan_upper.sh \
router_bridge_vlan.sh \
router_bridge_vlan_upper.sh \
- router_bridge_pvid_vlan_upper.sh \
router_bridge_vlan_upper_pvid.sh \
router_broadcast.sh \
- router_mpath_nh_res.sh \
router_mpath_nh.sh \
+ router_mpath_nh_res.sh \
router_mpath_seed.sh \
router_multicast.sh \
router_multipath.sh \
router_nh.sh \
- router.sh \
router_vid_1.sh \
sch_ets.sh \
sch_red.sh \
@@ -85,30 +88,34 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
skbedit_priority.sh \
tc_actions.sh \
tc_chains.sh \
- tc_flower_router.sh \
tc_flower.sh \
- tc_flower_l2_miss.sh \
tc_flower_cfm.sh \
+ tc_flower_l2_miss.sh \
tc_flower_port_range.sh \
+ tc_flower_router.sh \
tc_mpls_l2vpn.sh \
tc_police.sh \
tc_shblocks.sh \
tc_tunnel_key.sh \
tc_vlan_modify.sh \
- vxlan_asymmetric_ipv6.sh \
vxlan_asymmetric.sh \
+ vxlan_asymmetric_ipv6.sh \
+ vxlan_bridge_1d.sh \
vxlan_bridge_1d_ipv6.sh \
- vxlan_bridge_1d_port_8472_ipv6.sh \
vxlan_bridge_1d_port_8472.sh \
- vxlan_bridge_1d.sh \
+ vxlan_bridge_1d_port_8472_ipv6.sh \
+ vxlan_bridge_1q.sh \
vxlan_bridge_1q_ipv6.sh \
- vxlan_bridge_1q_port_8472_ipv6.sh \
+ vxlan_bridge_1q_mc_ul.sh \
vxlan_bridge_1q_port_8472.sh \
- vxlan_bridge_1q.sh \
+ vxlan_bridge_1q_port_8472_ipv6.sh \
+ vxlan_reserved.sh \
+ vxlan_symmetric.sh \
vxlan_symmetric_ipv6.sh \
- vxlan_symmetric.sh
+# end of TEST_PROGS
-TEST_FILES := devlink_lib.sh \
+TEST_FILES := \
+ devlink_lib.sh \
fib_offload_lib.sh \
forwarding.config.sample \
ip6gre_lib.sh \
@@ -123,10 +130,12 @@ TEST_FILES := devlink_lib.sh \
sch_ets_tests.sh \
sch_tbf_core.sh \
sch_tbf_etsprio.sh \
- tc_common.sh
+ tc_common.sh \
+# end of TEST_FILES
TEST_INCLUDES := \
+ $(wildcard ../lib/sh/*.sh) \
../lib.sh \
- $(wildcard ../lib/sh/*.sh)
+# end of TEST_INCLUDES
include ../../lib.mk
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
index a652429bfd53..392a5a91ed37 100644
--- a/tools/testing/selftests/net/forwarding/README
+++ b/tools/testing/selftests/net/forwarding/README
@@ -6,7 +6,7 @@ to easily create and test complex environments.
Unfortunately, these namespaces can not be used with actual switching
ASICs, as their ports can not be migrated to other network namespaces
-(dev->netns_local) and most of them probably do not support the
+(dev->netns_immutable) and most of them probably do not support the
L1-separation provided by namespaces.
However, a similar kind of flexibility can be achieved by using VRFs and
@@ -57,6 +57,21 @@ o Code shall be checked using ShellCheck [1] prior to submission.
1. https://www.shellcheck.net/
+Cleanups
+--------
+
+o lib.sh brings in defer.sh (by way of ../lib.sh) by default. Consider
+ making use of the defer primitive to schedule automatic cleanups. This
+ makes it harder to forget to remove a temporary netdevice, kill a running
+ process or perform other cleanup when the test script is interrupted.
+
+o When adding a helper that dirties the environment, but schedules all
+ necessary cleanups through defer, consider prefixing it adf_ for
+ consistency with lib.sh and ../lib.sh helpers. This serves as an
+ immediately visible bit of documentation about the helper API.
+
+o Definitely do the above for any new code in lib.sh, if practical.
+
Customization
=============
diff --git a/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh
new file mode 100755
index 000000000000..522a5b1b046c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh
@@ -0,0 +1,170 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +------------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | 192.0.2.1/28 | | 192.0.2.2/28 |
+# | + $h1 | | + $h2 |
+# +----|------------------+ +----|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|--------------------------------------------------|-----------------+ |
+# | | + $swp1 BR1 (802.1d) + $swp2 | |
+# | | | |
+# | +-----------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ new_inactive_test
+ existing_active_test
+ norefresh_test
+"
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ adf_simple_if_init "$h1" 192.0.2.1/28
+}
+
+h2_create()
+{
+ adf_simple_if_init "$h2" 192.0.2.2/28
+}
+
+switch_create()
+{
+ adf_ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 \
+ ageing_time "$LOW_AGEING_TIME"
+ adf_ip_link_set_up br1
+
+ adf_ip_link_set_master "$swp1" br1
+ adf_ip_link_set_up "$swp1"
+
+ adf_ip_link_set_master "$swp2" br1
+ adf_ip_link_set_up "$swp2"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ adf_vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+fdb_active_wait()
+{
+ local mac=$1; shift
+
+ bridge -d fdb get "$mac" br br1 | grep -q -v "inactive"
+}
+
+fdb_inactive_wait()
+{
+ local mac=$1; shift
+
+ bridge -d fdb get "$mac" br br1 | grep -q "inactive"
+}
+
+new_inactive_test()
+{
+ local mac="00:11:22:33:44:55"
+
+ # Add a new FDB entry as static and inactive and check that it
+ # becomes active upon traffic.
+ RET=0
+
+ bridge fdb add "$mac" dev "$swp1" master static activity_notify inactive
+ bridge -d fdb get "$mac" br br1 | grep -q "inactive"
+ check_err $? "FDB entry not present as \"inactive\" when should"
+
+ $MZ "$h1" -c 1 -p 64 -a "$mac" -b bcast -t ip -q
+
+ busywait "$BUSYWAIT_TIMEOUT" fdb_active_wait "$mac"
+ check_err $? "FDB entry present as \"inactive\" when should not"
+
+ log_test "Transition from inactive to active"
+
+ bridge fdb del "$mac" dev "$swp1" master
+}
+
+existing_active_test()
+{
+ local mac="00:11:22:33:44:55"
+ local ageing_time
+
+ # Enable activity notifications on an existing dynamic FDB entry and
+ # check that it becomes inactive after the ageing time passed.
+ RET=0
+
+ bridge fdb add "$mac" dev "$swp1" master dynamic
+ bridge fdb replace "$mac" dev "$swp1" master static activity_notify norefresh
+
+ bridge -d fdb get "$mac" br br1 | grep -q "activity_notify"
+ check_err $? "FDB entry not present as \"activity_notify\" when should"
+
+ bridge -d fdb get "$mac" br br1 | grep -q "inactive"
+ check_fail $? "FDB entry present as \"inactive\" when should not"
+
+ ageing_time=$(bridge_ageing_time_get br1)
+ slowwait $((ageing_time * 2)) fdb_inactive_wait "$mac"
+ check_err $? "FDB entry not present as \"inactive\" when should"
+
+ log_test "Transition from active to inactive"
+
+ bridge fdb del "$mac" dev "$swp1" master
+}
+
+norefresh_test()
+{
+ local mac="00:11:22:33:44:55"
+ local updated_time
+
+ # Check that the "updated" time is reset when replacing an FDB entry
+ # without the "norefresh" keyword and that it is not reset when
+ # replacing with the "norefresh" keyword.
+ RET=0
+
+ bridge fdb add "$mac" dev "$swp1" master static
+ sleep 1
+
+ bridge fdb replace "$mac" dev "$swp1" master static activity_notify
+ updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]')
+ if [[ $updated_time -ne 0 ]]; then
+ check_err 1 "\"updated\" time was not reset when should"
+ fi
+
+ sleep 1
+ bridge fdb replace "$mac" dev "$swp1" master static norefresh
+ updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]')
+ if [[ $updated_time -eq 0 ]]; then
+ check_err 1 "\"updated\" time was reset when should not"
+ fi
+
+ log_test "Resetting of \"updated\" time"
+
+ bridge fdb del "$mac" dev "$swp1" master
+}
+
+if ! bridge fdb help 2>&1 | grep -q "activity_notify"; then
+ echo "SKIP: iproute2 too old, missing bridge FDB activity notification control"
+ exit "$ksft_skip"
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh
new file mode 100755
index 000000000000..694de8ba97e4
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh
@@ -0,0 +1,387 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +-----------------------+ +-----------------------+
+# | H1 (vrf) | | H2 (vrf) | | H3 (vrf) |
+# | + $h1 | | + $h2 | | + $h3 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 | | | 192.0.2.18/28 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | | | 2001:db8:2::2/64 |
+# | | | | | | | | |
+# +----|------------------+ +----|------------------+ +----|------------------+
+# | | |
+# +----|-------------------------|-------------------------|------------------+
+# | +--|-------------------------|------------------+ | |
+# | | + $swp1 + $swp2 | + $swp3 |
+# | | | 192.0.2.17/28 |
+# | | BR1 (802.1q) | 2001:db8:2::1/64 |
+# | | 192.0.2.3/28 | |
+# | | 2001:db8:1::3/64 | |
+# | +-----------------------------------------------+ SW |
+# +---------------------------------------------------------------------------+
+#
+#shellcheck disable=SC2317 # SC doesn't see our uses of functions.
+#shellcheck disable=SC2034 # ... and global variables
+
+ALL_TESTS="
+ test_d_no_sharing
+ test_d_sharing
+ test_q_no_sharing
+ test_q_sharing
+ test_addr_set
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+pMAC=00:11:22:33:44:55
+bMAC=00:11:22:33:44:66
+mMAC=00:11:22:33:44:77
+xMAC=00:11:22:33:44:88
+
+host_create()
+{
+ local h=$1; shift
+ local ipv4=$1; shift
+ local ipv6=$1; shift
+
+ adf_simple_if_init "$h" "$ipv4" "$ipv6"
+ adf_ip_route_add vrf "v$h" 192.0.2.16/28 nexthop via 192.0.2.3
+ adf_ip_route_add vrf "v$h" 2001:db8:2::/64 nexthop via 2001:db8:1::3
+}
+
+h3_create()
+{
+ adf_simple_if_init "$h3" 192.0.2.18/28 2001:db8:2::2/64
+ adf_ip_route_add vrf "v$h3" 192.0.2.0/28 nexthop via 192.0.2.17
+ adf_ip_route_add vrf "v$h3" 2001:db8:1::/64 nexthop via 2001:db8:2::1
+
+ tc qdisc add dev "$h3" clsact
+ defer tc qdisc del dev "$h3" clsact
+
+ tc filter add dev "$h3" ingress proto ip pref 104 \
+ flower skip_hw ip_proto udp dst_port 4096 \
+ action pass
+ defer tc filter del dev "$h3" ingress proto ip pref 104
+
+ tc qdisc add dev "$h2" clsact
+ defer tc qdisc del dev "$h2" clsact
+
+ tc filter add dev "$h2" ingress proto ip pref 104 \
+ flower skip_hw ip_proto udp dst_port 4096 \
+ action pass
+ defer tc filter del dev "$h2" ingress proto ip pref 104
+}
+
+switch_create()
+{
+ adf_ip_link_set_up "$swp1"
+
+ adf_ip_link_set_up "$swp2"
+
+ adf_ip_addr_add "$swp3" 192.0.2.17/28
+ adf_ip_addr_add "$swp3" 2001:db8:2::1/64
+ adf_ip_link_set_up "$swp3"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ adf_vrf_prepare
+ adf_forwarding_enable
+
+ host_create "$h1" 192.0.2.1/28 2001:db8:1::1/64
+ host_create "$h2" 192.0.2.2/28 2001:db8:1::2/64
+ h3_create
+
+ switch_create
+}
+
+adf_bridge_configure()
+{
+ local dev
+
+ adf_ip_addr_add br 192.0.2.3/28
+ adf_ip_addr_add br 2001:db8:1::3/64
+
+ adf_bridge_vlan_add dev br vid 1 pvid untagged self
+ adf_bridge_vlan_add dev br vid 2 self
+ adf_bridge_vlan_add dev br vid 3 self
+
+ for dev in "$swp1" "$swp2"; do
+ adf_ip_link_set_master "$dev" br
+ adf_bridge_vlan_add dev "$dev" vid 1 pvid untagged
+ adf_bridge_vlan_add dev "$dev" vid 2
+ adf_bridge_vlan_add dev "$dev" vid 3
+ done
+}
+
+adf_bridge_create()
+{
+ local mac
+
+ adf_ip_link_add br up type bridge vlan_default_pvid 0 "$@"
+ mac=$(mac_get br)
+ adf_bridge_configure
+ adf_ip_link_set_addr br "$mac"
+}
+
+check_fdb_local_vlan_0_support()
+{
+ if adf_ip_link_add XXbr up type bridge vlan_filtering 1 \
+ fdb_local_vlan_0 1 &>/dev/null; then
+ return 0
+ fi
+
+ log_test_skip "FDB sharing" \
+ "iproute 2 or the kernel do not support fdb_local_vlan_0"
+}
+
+check_mac_presence()
+{
+ local should_fail=$1; shift
+ local dev=$1; shift
+ local vlan=$1; shift
+ local mac
+
+ mac=$(mac_get "$dev")
+
+ if ((vlan == 0)); then
+ vlan=null
+ fi
+
+ bridge -j fdb show dev "$dev" |
+ jq -e --arg mac "$mac" --argjson vlan "$vlan" \
+ '.[] | select(.mac == $mac) | select(.vlan == $vlan)' > /dev/null
+ check_err_fail "$should_fail" $? "FDB dev $dev vid $vlan addr $mac exists"
+}
+
+do_sharing_test()
+{
+ local should_fail=$1; shift
+ local what=$1; shift
+ local dev
+
+ RET=0
+
+ for dev in "$swp1" "$swp2" br; do
+ check_mac_presence 0 "$dev" 0
+ check_mac_presence "$should_fail" "$dev" 1
+ check_mac_presence "$should_fail" "$dev" 2
+ check_mac_presence "$should_fail" "$dev" 3
+ done
+
+ log_test "$what"
+}
+
+do_end_to_end_test()
+{
+ local mac=$1; shift
+ local what=$1; shift
+ local probe_dev=${1-$h3}; shift
+ local expect=${1-10}; shift
+
+ local t0
+ local t1
+ local dd
+
+ RET=0
+
+ # In mausezahn, use $dev MAC as the destination MAC. In the MAC sharing
+ # context, that will cause an FDB miss on VLAN 1 and prompt a second
+ # lookup in VLAN 0.
+
+ t0=$(tc_rule_stats_get "$probe_dev" 104 ingress)
+
+ $MZ "$h1" -c 10 -p 64 -a own -b "$mac" \
+ -A 192.0.2.1 -B 192.0.2.18 -t udp "dp=4096,sp=2048" -q
+ sleep 1
+
+ t1=$(tc_rule_stats_get "$probe_dev" 104 ingress)
+ dd=$((t1 - t0))
+
+ ((dd == expect))
+ check_err $? "Expected $expect packets on $probe_dev got $dd"
+
+ log_test "$what"
+}
+
+do_tests()
+{
+ local should_fail=$1; shift
+ local what=$1; shift
+ local swp1_mac
+ local br_mac
+
+ swp1_mac=$(mac_get "$swp1")
+ br_mac=$(mac_get br)
+
+ do_sharing_test "$should_fail" "$what"
+ do_end_to_end_test "$swp1_mac" "$what: end to end, $swp1 MAC"
+ do_end_to_end_test "$br_mac" "$what: end to end, br MAC"
+}
+
+bridge_standard()
+{
+ local vlan_filtering=$1; shift
+
+ if ((vlan_filtering)); then
+ echo 802.1q
+ else
+ echo 802.1d
+ fi
+}
+
+nonexistent_fdb_test()
+{
+ local vlan_filtering=$1; shift
+ local standard
+
+ standard=$(bridge_standard "$vlan_filtering")
+
+ # We expect flooding, so $h2 should get the traffic.
+ do_end_to_end_test "$xMAC" "$standard: Nonexistent FDB" "$h2"
+}
+
+misleading_fdb_test()
+{
+ local vlan_filtering=$1; shift
+ local standard
+
+ standard=$(bridge_standard "$vlan_filtering")
+
+ defer_scope_push
+ # Add an FDB entry on VLAN 0. The lookup on VLAN-aware bridge
+ # shouldn't pick this up even with fdb_local_vlan_0 enabled, so
+ # the traffic should be flooded. This all holds on
+ # vlan_filtering bridge, on non-vlan_filtering one the FDB entry
+ # is expected to be found as usual, no flooding takes place.
+ #
+ # Adding only on VLAN 0 is a bit tricky, because bridge is
+ # trying to be nice and interprets the request as if the FDB
+ # should be added on each VLAN.
+
+ bridge fdb add "$mMAC" dev "$swp1" master
+ bridge fdb del "$mMAC" dev "$swp1" vlan 1 master
+ bridge fdb del "$mMAC" dev "$swp1" vlan 2 master
+ bridge fdb del "$mMAC" dev "$swp1" vlan 3 master
+
+ local expect=$((vlan_filtering ? 10 : 0))
+ do_end_to_end_test "$mMAC" \
+ "$standard: Lookup of non-local MAC on VLAN 0" \
+ "$h2" "$expect"
+ defer_scope_pop
+}
+
+change_mac()
+{
+ local dev=$1; shift
+ local mac=$1; shift
+ local cur_mac
+
+ cur_mac=$(mac_get "$dev")
+
+ log_info "Change $dev MAC $cur_mac -> $mac"
+ adf_ip_link_set_addr "$dev" "$mac"
+ defer log_info "Change $dev MAC back"
+}
+
+do_test_no_sharing()
+{
+ local vlan_filtering=$1; shift
+ local standard
+
+ standard=$(bridge_standard "$vlan_filtering")
+
+ adf_bridge_create vlan_filtering "$vlan_filtering"
+ setup_wait
+
+ do_tests 0 "$standard, no FDB sharing"
+
+ change_mac "$swp1" "$pMAC"
+ change_mac br "$bMAC"
+
+ do_tests 0 "$standard, no FDB sharing after MAC change"
+
+ in_defer_scope check_fdb_local_vlan_0_support || return
+
+ log_info "Set fdb_local_vlan_0=1"
+ ip link set dev br type bridge fdb_local_vlan_0 1
+
+ do_tests 1 "$standard, fdb sharing after toggle"
+}
+
+do_test_sharing()
+{
+ local vlan_filtering=$1; shift
+ local standard
+
+ standard=$(bridge_standard "$vlan_filtering")
+
+ in_defer_scope check_fdb_local_vlan_0_support || return
+
+ adf_bridge_create vlan_filtering "$vlan_filtering" fdb_local_vlan_0 1
+ setup_wait
+
+ do_tests 1 "$standard, FDB sharing"
+
+ nonexistent_fdb_test "$vlan_filtering"
+ misleading_fdb_test "$vlan_filtering"
+
+ change_mac "$swp1" "$pMAC"
+ change_mac br "$bMAC"
+
+ do_tests 1 "$standard, FDB sharing after MAC change"
+
+ log_info "Set fdb_local_vlan_0=0"
+ ip link set dev br type bridge fdb_local_vlan_0 0
+
+ do_tests 0 "$standard, No FDB sharing after toggle"
+}
+
+test_d_no_sharing()
+{
+ do_test_no_sharing 0
+}
+
+test_d_sharing()
+{
+ do_test_sharing 0
+}
+
+test_q_no_sharing()
+{
+ do_test_no_sharing 1
+}
+
+test_q_sharing()
+{
+ do_test_sharing 1
+}
+
+adf_addr_set_bridge_create()
+{
+ adf_ip_link_add br up type bridge vlan_filtering 0
+ adf_ip_link_set_addr br "$(mac_get br)"
+ adf_bridge_configure
+}
+
+test_addr_set()
+{
+ adf_addr_set_bridge_create
+ setup_wait
+
+ do_end_to_end_test "$(mac_get br)" "NET_ADDR_SET: end to end, br MAC"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+tests_run
diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index e6a3e04fd83f..d4e7dd659354 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,10 +1,24 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
- v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
- v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \
- v3exc_timeout_test v3star_ex_auto_add_test"
+ALL_TESTS="
+ v2reportleave_test
+ v3include_test
+ v3inc_allow_test
+ v3inc_is_include_test
+ v3inc_is_exclude_test
+ v3inc_to_exclude_test
+ v3exc_allow_test
+ v3exc_is_include_test
+ v3exc_is_exclude_test
+ v3exc_to_exclude_test
+ v3inc_block_test
+ v3exc_block_test
+ v3exc_timeout_test
+ v3star_ex_auto_add_test
+ v2per_vlan_snooping_port_stp_test
+ v2per_vlan_snooping_vlan_stp_test
+"
NUM_NETIFS=4
CHECK_TC="yes"
TEST_GROUP="239.10.10.10"
@@ -554,6 +568,64 @@ v3star_ex_auto_add_test()
v3cleanup $swp2 $TEST_GROUP
}
+v2per_vlan_snooping_stp_test()
+{
+ local is_port=$1
+
+ local msg="port"
+ [[ $is_port -ne 1 ]] && msg="vlan"
+
+ ip link set br0 up type bridge vlan_filtering 1 \
+ mcast_igmp_version 2 \
+ mcast_snooping 1 \
+ mcast_vlan_snooping 1 \
+ mcast_querier 1 \
+ mcast_stats_enabled 1
+ bridge vlan global set vid 1 dev br0 \
+ mcast_snooping 1 \
+ mcast_querier 1 \
+ mcast_query_interval 100 \
+ mcast_startup_query_count 0
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4
+ sleep 5
+ local tx_s=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]')
+
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3
+ sleep 5
+ local tx_e=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]')
+
+ RET=0
+ local tx=$(expr $tx_e - $tx_s)
+ test $tx -gt 0
+ check_err $? "No IGMP queries after STP state becomes forwarding"
+ log_test "per vlan snooping with $msg stp state change"
+
+ # restore settings
+ bridge vlan global set vid 1 dev br0 \
+ mcast_querier 0 \
+ mcast_query_interval 12500 \
+ mcast_startup_query_count 2
+ ip link set br0 up type bridge vlan_filtering 0 \
+ mcast_vlan_snooping 0 \
+ mcast_stats_enabled 0
+}
+
+v2per_vlan_snooping_port_stp_test()
+{
+ v2per_vlan_snooping_stp_test 1
+}
+
+v2per_vlan_snooping_vlan_stp_test()
+{
+ v2per_vlan_snooping_stp_test 0
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
index d9d587454d20..e86d77946585 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -28,6 +28,7 @@ ALL_TESTS="
cfg_test
fwd_test
ctrl_test
+ disable_test
"
NUM_NETIFS=4
@@ -64,7 +65,10 @@ h2_destroy()
switch_create()
{
- ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ local vlan_filtering=$1; shift
+
+ ip link add name br0 type bridge \
+ vlan_filtering "$vlan_filtering" vlan_default_pvid 0 \
mcast_snooping 1 mcast_igmp_version 3 mcast_mld_version 2
bridge vlan add vid 10 dev br0 self
bridge vlan add vid 20 dev br0 self
@@ -118,7 +122,7 @@ setup_prepare()
h1_create
h2_create
- switch_create
+ switch_create 1
}
cleanup()
@@ -149,7 +153,7 @@ cfg_test_host_common()
check_err $? "Failed to add $name host entry"
bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null
- check_fail $? "Managed to replace $name host entry"
+ check_err $? "Failed to replace $name host entry"
bridge mdb del dev br0 port br0 grp $grp $state vid 10
bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
@@ -1357,6 +1361,98 @@ ctrl_test()
ctrl_mldv2_is_in_test
}
+check_group()
+{
+ local group=$1; shift
+ local vid=$1; shift
+ local should_fail=$1; shift
+ local when=$1; shift
+ local -a vidkws
+
+ if ((vid)); then
+ vidkws=(vid "$vid")
+ fi
+
+ bridge mdb get dev br0 grp "$group" "${vidkws[@]}" 2>/dev/null |
+ grep -q "port $swp1"
+ check_err_fail "$should_fail" $? "$group seen $when snooping disable:"
+}
+
+__disable_test()
+{
+ local vid=$1; shift
+ local what=$1; shift
+ local -a vidkws
+
+ if ((vid)); then
+ vidkws=(vid "$vid")
+ fi
+
+ RET=0
+
+ bridge mdb add dev br0 port "$swp1" grp ff0e::1 permanent \
+ "${vidkws[@]}" filter_mode include source_list 2001:db8:1::1
+ bridge mdb add dev br0 port "$swp1" grp ff0e::2 permanent \
+ "${vidkws[@]}" filter_mode exclude
+
+ bridge mdb add dev br0 port "$swp1" grp ff0e::3 \
+ "${vidkws[@]}" filter_mode include source_list 2001:db8:1::2
+ bridge mdb add dev br0 port "$swp1" grp ff0e::4 \
+ "${vidkws[@]}" filter_mode exclude
+
+ bridge mdb add dev br0 port "$swp1" grp 239.1.1.1 permanent \
+ "${vidkws[@]}" filter_mode include source_list 192.0.2.1
+ bridge mdb add dev br0 port "$swp1" grp 239.1.1.2 permanent \
+ "${vidkws[@]}" filter_mode exclude
+
+ bridge mdb add dev br0 port "$swp1" grp 239.1.1.3 \
+ "${vidkws[@]}" filter_mode include source_list 192.0.2.2
+ bridge mdb add dev br0 port "$swp1" grp 239.1.1.4 \
+ "${vidkws[@]}" filter_mode exclude
+
+ check_group ff0e::1 "$vid" 0 "before"
+ check_group ff0e::2 "$vid" 0 "before"
+ check_group ff0e::3 "$vid" 0 "before"
+ check_group ff0e::4 "$vid" 0 "before"
+
+ check_group 239.1.1.1 "$vid" 0 "before"
+ check_group 239.1.1.2 "$vid" 0 "before"
+ check_group 239.1.1.3 "$vid" 0 "before"
+ check_group 239.1.1.4 "$vid" 0 "before"
+
+ ip link set dev br0 type bridge mcast_snooping 0
+
+ check_group ff0e::1 "$vid" 0 "after"
+ check_group ff0e::2 "$vid" 0 "after"
+ check_group ff0e::3 "$vid" 1 "after"
+ check_group ff0e::4 "$vid" 1 "after"
+
+ check_group 239.1.1.1 "$vid" 0 "after"
+ check_group 239.1.1.2 "$vid" 0 "after"
+ check_group 239.1.1.3 "$vid" 1 "after"
+ check_group 239.1.1.4 "$vid" 1 "after"
+
+ log_test "$what: Flush after disable"
+
+ ip link set dev br0 type bridge mcast_snooping 1
+ sleep 10
+}
+
+disable_test()
+{
+ __disable_test 10 802.1q
+
+ switch_destroy
+ switch_create 0
+ setup_wait
+
+ __disable_test 0 802.1d
+
+ switch_destroy
+ switch_create 1
+ setup_wait
+}
+
if ! bridge mdb help 2>&1 | grep -q "flush"; then
echo "SKIP: iproute2 too old, missing bridge mdb flush support"
exit $ksft_skip
diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index f84ab2e65754..4cacef5a813a 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -1,10 +1,23 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
- mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
- mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \
- mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test"
+ALL_TESTS="
+ mldv2include_test
+ mldv2inc_allow_test
+ mldv2inc_is_include_test
+ mldv2inc_is_exclude_test
+ mldv2inc_to_exclude_test
+ mldv2exc_allow_test
+ mldv2exc_is_include_test
+ mldv2exc_is_exclude_test
+ mldv2exc_to_exclude_test
+ mldv2inc_block_test
+ mldv2exc_block_test
+ mldv2exc_timeout_test
+ mldv2star_ex_auto_add_test
+ mldv2per_vlan_snooping_port_stp_test
+ mldv2per_vlan_snooping_vlan_stp_test
+"
NUM_NETIFS=4
CHECK_TC="yes"
TEST_GROUP="ff02::cc"
@@ -554,6 +567,66 @@ mldv2star_ex_auto_add_test()
mldv2cleanup $swp2
}
+mldv2per_vlan_snooping_stp_test()
+{
+ local is_port=$1
+
+ local msg="port"
+ [[ $is_port -ne 1 ]] && msg="vlan"
+
+ ip link set br0 up type bridge vlan_filtering 1 \
+ mcast_mld_version 2 \
+ mcast_snooping 1 \
+ mcast_vlan_snooping 1 \
+ mcast_querier 1 \
+ mcast_stats_enabled 1
+ bridge vlan global set vid 1 dev br0 \
+ mcast_mld_version 2 \
+ mcast_snooping 1 \
+ mcast_querier 1 \
+ mcast_query_interval 100 \
+ mcast_startup_query_count 0
+
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4
+ sleep 5
+ local tx_s=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["mld_queries"]["tx_v2"]')
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3
+ sleep 5
+ local tx_e=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["mld_queries"]["tx_v2"]')
+
+ RET=0
+ local tx=$(expr $tx_e - $tx_s)
+ test $tx -gt 0
+ check_err $? "No MLD queries after STP state becomes forwarding"
+ log_test "per vlan snooping with $msg stp state change"
+
+ # restore settings
+ bridge vlan global set vid 1 dev br0 \
+ mcast_querier 0 \
+ mcast_query_interval 12500 \
+ mcast_startup_query_count 2 \
+ mcast_mld_version 1
+ ip link set br0 up type bridge vlan_filtering 0 \
+ mcast_vlan_snooping 0 \
+ mcast_stats_enabled 0
+}
+
+mldv2per_vlan_snooping_port_stp_test()
+{
+ mldv2per_vlan_snooping_stp_test 1
+}
+
+mldv2per_vlan_snooping_vlan_stp_test()
+{
+ mldv2per_vlan_snooping_stp_test 0
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index 90f8a244ea90..e59fba366a0a 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid"
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid 8021p drop_untagged"
NUM_NETIFS=4
CHECK_TC="yes"
source lib.sh
@@ -194,6 +194,100 @@ other_tpid()
tc qdisc del dev $h2 clsact
}
+8021p_do()
+{
+ local should_fail=$1; shift
+ local mac=de:ad:be:ef:13:37
+
+ tc filter add dev $h2 ingress protocol all pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ -q $h1 -c 1 -b $mac -a own "81:00 00:00 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa"
+ sleep 1
+
+ tc -j -s filter show dev $h2 ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err_fail $should_fail $? "802.1p-tagged reception"
+
+ tc filter del dev $h2 ingress pref 1
+}
+
+8021p()
+{
+ RET=0
+
+ tc qdisc add dev $h2 clsact
+ ip link set $h2 promisc on
+
+ # Test that with the default_pvid, 1, packets tagged with VID 0 are
+ # accepted.
+ 8021p_do 0
+
+ # Test that packets tagged with VID 0 are still accepted after changing
+ # the default_pvid.
+ ip link set br0 type bridge vlan_default_pvid 10
+ 8021p_do 0
+
+ log_test "Reception of 802.1p-tagged traffic"
+
+ ip link set $h2 promisc off
+ tc qdisc del dev $h2 clsact
+}
+
+send_untagged_and_8021p()
+{
+ ping_do $h1 192.0.2.2
+ check_fail $?
+
+ 8021p_do 1
+}
+
+drop_untagged()
+{
+ RET=0
+
+ tc qdisc add dev $h2 clsact
+ ip link set $h2 promisc on
+
+ # Test that with no PVID, untagged and 802.1p-tagged traffic is
+ # dropped.
+ ip link set br0 type bridge vlan_default_pvid 1
+
+ # First we reconfigure the default_pvid, 1, as a non-PVID VLAN.
+ bridge vlan add dev $swp1 vid 1 untagged
+ send_untagged_and_8021p
+ bridge vlan add dev $swp1 vid 1 pvid untagged
+
+ # Next we try to delete VID 1 altogether
+ bridge vlan del dev $swp1 vid 1
+ send_untagged_and_8021p
+ bridge vlan add dev $swp1 vid 1 pvid untagged
+
+ # Set up the bridge without a default_pvid, then check that the 8021q
+ # module, when the bridge port goes down and then up again, does not
+ # accidentally re-enable untagged packet reception.
+ ip link set br0 type bridge vlan_default_pvid 0
+ ip link set $swp1 down
+ ip link set $swp1 up
+ setup_wait
+ send_untagged_and_8021p
+
+ # Remove swp1 as a bridge port and let it rejoin the bridge while it
+ # has no default_pvid.
+ ip link set $swp1 nomaster
+ ip link set $swp1 master br0
+ send_untagged_and_8021p
+
+ # Restore settings
+ ip link set br0 type bridge vlan_default_pvid 1
+
+ log_test "Dropping of untagged and 802.1p-tagged traffic with no PVID"
+
+ ip link set $h2 promisc off
+ tc qdisc del dev $h2 clsact
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
index 1c8a26046589..2b5700b61ffa 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding pvid_change"
NUM_NETIFS=4
source lib.sh
@@ -77,12 +77,16 @@ cleanup()
ping_ipv4()
{
- ping_test $h1 192.0.2.2
+ local msg=$1
+
+ ping_test $h1 192.0.2.2 "$msg"
}
ping_ipv6()
{
- ping6_test $h1 2001:db8:1::2
+ local msg=$1
+
+ ping6_test $h1 2001:db8:1::2 "$msg"
}
learning()
@@ -95,6 +99,21 @@ flooding()
flood_test $swp2 $h1 $h2
}
+pvid_change()
+{
+ # Test that the changing of the VLAN-aware PVID does not affect
+ # VLAN-unaware forwarding
+ bridge vlan add vid 3 dev $swp1 pvid untagged
+
+ ping_ipv4 " with bridge port $swp1 PVID changed"
+ ping_ipv6 " with bridge port $swp1 PVID changed"
+
+ bridge vlan del vid 3 dev $swp1
+
+ ping_ipv4 " with bridge port $swp1 PVID deleted"
+ ping_ipv6 " with bridge port $swp1 PVID deleted"
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index 8d7a1a004b7c..ce64518aaa11 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -1,23 +1,23 @@
+CONFIG_BPF_SYSCALL=y
CONFIG_BRIDGE=m
-CONFIG_VLAN_8021Q=m
+CONFIG_BRIDGE_IGMP_SNOOPING=y
CONFIG_BRIDGE_VLAN_FILTERING=y
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_NET_VRF=m
-CONFIG_BPF_SYSCALL=y
CONFIG_CGROUP_BPF=y
CONFIG_DUMMY=m
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
CONFIG_IPV6=y
CONFIG_IPV6_GRE=m
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6_PIMSM_V2=y
-CONFIG_IP_MROUTE=y
-CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
-CONFIG_IP_PIMSM_V1=y
-CONFIG_IP_PIMSM_V2=y
CONFIG_MACVLAN=m
+CONFIG_NAMESPACES=y
CONFIG_NET_ACT_CT=m
+CONFIG_NET_ACT_GACT=m
CONFIG_NET_ACT_MIRRED=m
CONFIG_NET_ACT_MPLS=m
CONFIG_NET_ACT_PEDIT=m
@@ -26,29 +26,30 @@ CONFIG_NET_ACT_SAMPLE=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_TUNNEL_KEY=m
CONFIG_NET_ACT_VLAN=m
+CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_FLOWER=m
CONFIG_NET_CLS_MATCHALL=m
-CONFIG_NET_CLS_BASIC=m
CONFIG_NET_EMATCH=y
CONFIG_NET_EMATCH_META=m
+CONFIG_NETFILTER=y
CONFIG_NET_IPGRE=m
CONFIG_NET_IPGRE_DEMUX=m
CONFIG_NET_IPIP=m
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_NS=y
CONFIG_NET_SCH_ETS=m
CONFIG_NET_SCH_INGRESS=m
-CONFIG_NET_ACT_GACT=m
CONFIG_NET_SCH_PRIO=m
CONFIG_NET_SCH_RED=m
CONFIG_NET_SCH_TBF=m
CONFIG_NET_TC_SKB_EXT=y
CONFIG_NET_TEAM=y
CONFIG_NET_TEAM_MODE_LOADBALANCE=y
-CONFIG_NETFILTER=y
+CONFIG_NET_VRF=m
CONFIG_NF_CONNTRACK=m
CONFIG_NF_FLOW_TABLE=m
CONFIG_NF_TABLES=m
CONFIG_VETH=m
-CONFIG_NAMESPACES=y
-CONFIG_NET_NS=y
+CONFIG_VLAN_8021Q=m
CONFIG_VXLAN=m
CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
index 7d531f7091e6..5dbfab0e23e3 100755
--- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
@@ -226,7 +226,7 @@ send_flowlabel()
# Generate 16384 echo requests, each with a random flow label.
ip vrf exec v$h1 sh -c \
"for _ in {1..16384}; do \
- $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1; \
+ $PING6 -F 0 -c 1 -q 2001:db8:4::2 >/dev/null 2>&1; \
done"
}
diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
index dda11a4a9450..b4f17a5bbc61 100755
--- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
@@ -321,7 +321,7 @@ send_flowlabel()
# Generate 16384 echo requests, each with a random flow label.
ip vrf exec v$h1 sh -c \
"for _ in {1..16384}; do \
- $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \
+ $PING6 -F 0 -c 1 -q 2001:db8:2::2 >/dev/null 2>&1; \
done"
}
diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
index 49fa94b53a1c..25036e38043c 100755
--- a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
+++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
@@ -95,7 +95,7 @@ ipv6_in_too_big_err()
# Send too big packets
ip vrf exec $vrf_name \
- $PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+ $PING6 -s 1300 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null
local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors)
test "$((t1 - t0))" -ne 0
@@ -131,7 +131,7 @@ ipv6_in_addr_err()
# Disable forwarding temporary while sending the packet
sysctl -qw net.ipv6.conf.all.forwarding=0
ip vrf exec $vrf_name \
- $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+ $PING6 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null
sysctl -qw net.ipv6.conf.all.forwarding=1
local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors)
@@ -150,7 +150,7 @@ ipv6_in_discard()
# Add a policy to discard
ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block
ip vrf exec $vrf_name \
- $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+ $PING6 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null
ip xfrm policy del dst 2001:1:2::2/128 dir fwd
local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards)
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
index e28b4a079e52..b24acfa52a3a 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
@@ -323,7 +323,7 @@ send_flowlabel()
# Generate 16384 echo requests, each with a random flow label.
ip vrf exec v$h1 sh -c \
"for _ in {1..16384}; do \
- $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1; \
+ $PING6 -F 0 -c 1 -q 2001:db8:2::2 >/dev/null 2>&1; \
done"
}
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 7337f398f9cc..a9034f0bb58b 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -37,6 +37,7 @@ declare -A NETIFS=(
: "${TEAMD:=teamd}"
: "${MCD:=smcrouted}"
: "${MC_CLI:=smcroutectl}"
+: "${MCD_TABLE_NAME:=selftests}"
# Constants for netdevice bring-up:
# Default time in seconds to wait for an interface to come up before giving up
@@ -68,6 +69,7 @@ declare -A NETIFS=(
: "${REQUIRE_JQ:=yes}"
: "${REQUIRE_MZ:=yes}"
: "${REQUIRE_MTOOLS:=no}"
+: "${REQUIRE_TEAMD:=no}"
# Whether to override MAC addresses on interfaces participating in the test.
: "${STABLE_MAC_ADDRS:=no}"
@@ -140,6 +142,20 @@ check_tc_version()
fi
}
+check_tc_erspan_support()
+{
+ local dev=$1; shift
+
+ tc filter add dev $dev ingress pref 1 handle 1 flower \
+ erspan_opts 1:0:0:0 &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing erspan support"
+ return $ksft_skip
+ fi
+ tc filter del dev $dev ingress pref 1 handle 1 flower \
+ erspan_opts 1:0:0:0 &> /dev/null
+}
+
# Old versions of tc don't understand "mpls_uc"
check_tc_mpls_support()
{
@@ -290,16 +306,6 @@ if [[ "$CHECK_TC" = "yes" ]]; then
check_tc_version
fi
-require_command()
-{
- local cmd=$1; shift
-
- if [[ ! -x "$(command -v "$cmd")" ]]; then
- echo "SKIP: $cmd not installed"
- exit $ksft_skip
- fi
-}
-
# IPv6 support was added in v3.0
check_mtools_version()
{
@@ -321,6 +327,9 @@ fi
if [[ "$REQUIRE_MZ" = "yes" ]]; then
require_command $MZ
fi
+if [[ "$REQUIRE_TEAMD" = "yes" ]]; then
+ require_command $TEAMD
+fi
if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
# https://github.com/troglobit/mtools
require_command msend
@@ -531,9 +540,9 @@ setup_wait_dev_with_timeout()
return 1
}
-setup_wait()
+setup_wait_n()
{
- local num_netifs=${1:-$NUM_NETIFS}
+ local num_netifs=$1; shift
local i
for ((i = 1; i <= num_netifs; ++i)); do
@@ -544,6 +553,11 @@ setup_wait()
sleep $WAIT_TIME
}
+setup_wait()
+{
+ setup_wait_n "$NUM_NETIFS"
+}
+
wait_for_dev()
{
local dev=$1; shift
@@ -557,30 +571,6 @@ wait_for_dev()
fi
}
-cmd_jq()
-{
- local cmd=$1
- local jq_exp=$2
- local jq_opts=$3
- local ret
- local output
-
- output="$($cmd)"
- # it the command fails, return error right away
- ret=$?
- if [[ $ret -ne 0 ]]; then
- return $ret
- fi
- output=$(echo $output | jq -r $jq_opts "$jq_exp")
- ret=$?
- if [[ $ret -ne 0 ]]; then
- return $ret
- fi
- echo $output
- # return success only in case of non-empty output
- [ ! -z "$output" ]
-}
-
pre_cleanup()
{
if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
@@ -609,6 +599,12 @@ vrf_cleanup()
ip -4 rule del pref 32765
}
+adf_vrf_prepare()
+{
+ vrf_prepare
+ defer vrf_cleanup
+}
+
__last_tb_id=0
declare -A __TB_IDS
@@ -721,6 +717,12 @@ simple_if_fini()
vrf_destroy $vrf_name
}
+adf_simple_if_init()
+{
+ simple_if_init "$@"
+ defer simple_if_fini "$@"
+}
+
tunnel_create()
{
local name=$1; shift
@@ -932,13 +934,6 @@ packets_rate()
echo $(((t1 - t0) / interval))
}
-mac_get()
-{
- local if_name=$1
-
- ip -j link show dev $if_name | jq -r '.[]["address"]'
-}
-
ether_addr_to_u64()
{
local addr="$1"
@@ -1028,6 +1023,12 @@ forwarding_restore()
sysctl_restore net.ipv4.conf.all.forwarding
}
+adf_forwarding_enable()
+{
+ forwarding_enable
+ defer forwarding_restore
+}
+
declare -A MTU_ORIG
mtu_set()
{
@@ -1284,8 +1285,8 @@ ping_do()
vrf_name=$(master_name_get $if_name)
ip vrf exec $vrf_name \
- $PING $args $dip -c $PING_COUNT -i 0.1 \
- -w $PING_TIMEOUT &> /dev/null
+ $PING $args -c $PING_COUNT -i 0.1 \
+ -w $PING_TIMEOUT $dip &> /dev/null
}
ping_test()
@@ -1315,8 +1316,8 @@ ping6_do()
vrf_name=$(master_name_get $if_name)
ip vrf exec $vrf_name \
- $PING6 $args $dip -c $PING_COUNT -i 0.1 \
- -w $PING_TIMEOUT &> /dev/null
+ $PING6 $args -c $PING_COUNT -i 0.1 \
+ -w $PING_TIMEOUT $dip &> /dev/null
}
ping6_test()
@@ -1770,6 +1771,51 @@ mc_send()
msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
}
+adf_mcd_start()
+{
+ local ifs=("$@")
+
+ local table_name="$MCD_TABLE_NAME"
+ local smcroutedir
+ local pid
+ local if
+ local i
+
+ check_command "$MCD" || return 1
+ check_command "$MC_CLI" || return 1
+
+ smcroutedir=$(mktemp -d)
+ defer rm -rf "$smcroutedir"
+
+ for ((i = 1; i <= NUM_NETIFS; ++i)); do
+ echo "phyint ${NETIFS[p$i]} enable" >> \
+ "$smcroutedir/$table_name.conf"
+ done
+
+ for if in "${ifs[@]}"; do
+ if ! ip_link_has_flag "$if" MULTICAST; then
+ ip link set dev "$if" multicast on
+ defer ip link set dev "$if" multicast off
+ fi
+
+ echo "phyint $if enable" >> \
+ "$smcroutedir/$table_name.conf"
+ done
+
+ "$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \
+ -P "$smcroutedir/$table_name.pid"
+ busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid"
+ pid=$(cat "$smcroutedir/$table_name.pid")
+ defer kill_process "$pid"
+}
+
+mc_cli()
+{
+ local table_name="$MCD_TABLE_NAME"
+
+ "$MC_CLI" -I "$table_name" "$@"
+}
+
start_ip_monitor()
{
local mtype=$1; shift
diff --git a/tools/testing/selftests/net/forwarding/lib_sh_test.sh b/tools/testing/selftests/net/forwarding/lib_sh_test.sh
index ff2accccaf4d..b4eda6c6199e 100755
--- a/tools/testing/selftests/net/forwarding/lib_sh_test.sh
+++ b/tools/testing/selftests/net/forwarding/lib_sh_test.sh
@@ -30,6 +30,11 @@ tfail()
do_test "tfail" false
}
+tfail2()
+{
+ do_test "tfail2" false
+}
+
txfail()
{
FAIL_TO_XFAIL=yes do_test "txfail" false
@@ -132,6 +137,8 @@ test_ret()
ret_subtest $ksft_fail "tfail" txfail tfail
ret_subtest $ksft_xfail "txfail" txfail txfail
+
+ ret_subtest $ksft_fail "tfail2" tfail2 tfail
}
exit_status_tests_run()
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
index c35548767756..892895659c7e 100755
--- a/tools/testing/selftests/net/forwarding/local_termination.sh
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -7,7 +7,6 @@ ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge test_vlan \
NUM_NETIFS=2
PING_COUNT=1
REQUIRE_MTOOLS=yes
-REQUIRE_MZ=no
source lib.sh
@@ -177,6 +176,8 @@ run_test()
local rcv_dmac=$(mac_get $rcv_if_name)
local should_receive
+ setup_wait
+
tcpdump_start $rcv_if_name
mc_route_prepare $send_if_name
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
index fe4d7c906a70..8d4ae6c952a1 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
@@ -49,6 +49,7 @@ ALL_TESTS="
test_mirror_gretap_second
"
+REQUIRE_TEAMD="yes"
NUM_NETIFS=6
source lib.sh
source mirror_lib.sh
@@ -237,7 +238,7 @@ test_lag_slave()
ip neigh flush dev br1
setup_wait_dev $up_dev
setup_wait_dev $host_dev
- $ARPING -I br1 192.0.2.130 -qfc 1
+ $ARPING -I br1 -qfc 1 192.0.2.130
sleep 2
mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 ">= 10"
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
index 1261e6f46e34..ff7049582d35 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
@@ -53,6 +53,7 @@ ALL_TESTS="
test_mirror_gretap_second
"
+REQUIRE_TEAMD="yes"
NUM_NETIFS=6
source lib.sh
source mirror_lib.sh
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
index 1b902cc579f6..a21c771908b3 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -196,7 +196,7 @@ test_span_gre_forbidden_egress()
bridge vlan add dev $swp3 vid 555
# Re-prime FDB
- $ARPING -I br1.555 192.0.2.130 -fqc 1
+ $ARPING -I br1.555 -fqc 1 192.0.2.130
sleep 1
quick_test_span_gre_dir $tundev
@@ -290,7 +290,7 @@ test_span_gre_fdb_roaming()
bridge fdb del dev $swp2 $h3mac vlan 555 master 2>/dev/null
# Re-prime FDB
- $ARPING -I br1.555 192.0.2.130 -fqc 1
+ $ARPING -I br1.555 -fqc 1 192.0.2.130
sleep 1
quick_test_span_gre_dir $tundev
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
index b98ea9449b8b..dfb6646cb97b 100755
--- a/tools/testing/selftests/net/forwarding/router.sh
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -18,6 +18,8 @@
# | 2001:db8:1::1/64 2001:db8:2::1/64 |
# | |
# +-----------------------------------------------------------------+
+#
+#shellcheck disable=SC2034 # SC doesn't see our uses of global variables
ALL_TESTS="
ping_ipv4
@@ -27,6 +29,7 @@ ALL_TESTS="
ipv4_sip_equal_dip
ipv6_sip_equal_dip
ipv4_dip_link_local
+ ipv4_sip_link_local
"
NUM_NETIFS=4
@@ -330,6 +333,32 @@ ipv4_dip_link_local()
tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
}
+ipv4_sip_link_local()
+{
+ local sip=169.254.1.1
+
+ RET=0
+
+ # Disable rpfilter to prevent packets to be dropped because of it.
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf."$rp1".rp_filter 0
+
+ tc filter add dev "$rp2" egress protocol ip pref 1 handle 101 \
+ flower src_ip "$sip" action pass
+
+ $MZ "$h1" -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b "$rp1mac" \
+ -A "$sip" -B 198.51.100.2 -q
+
+ tc_check_packets "dev $rp2 egress" 101 5
+ check_err $? "Packets were dropped"
+
+ log_test "IPv4 source IP is link-local"
+
+ tc filter del dev "$rp2" egress protocol ip pref 1 handle 101 flower
+ sysctl_restore net.ipv4.conf."$rp1".rp_filter
+ sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh
index e064b946e821..16583a470ec3 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh
@@ -109,6 +109,7 @@ ALL_TESTS="
ping_ipv4
ping_ipv6
"
+REQUIRE_TEAMD="yes"
NUM_NETIFS=8
source lib.sh
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh
index f05ffe213c46..2a4cd1af1b85 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh
@@ -76,6 +76,7 @@
ping_ipv4
ping_ipv6
"}
+REQUIRE_TEAMD="yes"
NUM_NETIFS=8
: ${lib_dir:=.}
source $lib_dir/lib.sh
diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh
index 5a58b1ec8aef..83e52abdbc2e 100755
--- a/tools/testing/selftests/net/forwarding/router_multicast.sh
+++ b/tools/testing/selftests/net/forwarding/router_multicast.sh
@@ -33,10 +33,6 @@ NUM_NETIFS=6
source lib.sh
source tc_common.sh
-require_command $MCD
-require_command $MC_CLI
-table_name=selftests
-
h1_create()
{
simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64
@@ -149,25 +145,6 @@ router_destroy()
ip link set dev $rp1 down
}
-start_mcd()
-{
- SMCROUTEDIR="$(mktemp -d)"
-
- for ((i = 1; i <= $NUM_NETIFS; ++i)); do
- echo "phyint ${NETIFS[p$i]} enable" >> \
- $SMCROUTEDIR/$table_name.conf
- done
-
- $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \
- -P $SMCROUTEDIR/$table_name.pid
-}
-
-kill_mcd()
-{
- pkill $MCD
- rm -rf $SMCROUTEDIR
-}
-
setup_prepare()
{
h1=${NETIFS[p1]}
@@ -179,7 +156,7 @@ setup_prepare()
rp3=${NETIFS[p5]}
h3=${NETIFS[p6]}
- start_mcd
+ adf_mcd_start || exit "$EXIT_STATUS"
vrf_prepare
@@ -206,7 +183,7 @@ cleanup()
vrf_cleanup
- kill_mcd
+ defer_scopes_cleanup
}
create_mcast_sg()
@@ -214,9 +191,9 @@ create_mcast_sg()
local if_name=$1; shift
local s_addr=$1; shift
local mcast=$1; shift
- local dest_ifs=${@}
+ local dest_ifs=("${@}")
- $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs
+ mc_cli add "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}"
}
delete_mcast_sg()
@@ -224,9 +201,9 @@ delete_mcast_sg()
local if_name=$1; shift
local s_addr=$1; shift
local mcast=$1; shift
- local dest_ifs=${@}
+ local dest_ifs=("${@}")
- $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs
+ mc_cli remove "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}"
}
mcast_v4()
diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh
index 1f6f53e284b5..6269d5e23487 100755
--- a/tools/testing/selftests/net/forwarding/sch_ets.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets.sh
@@ -11,6 +11,7 @@ ALL_TESTS="
ets_test_strict
ets_test_mixed
ets_test_dwrr
+ ets_test_plug
classifier_mode
ets_test_strict
ets_test_mixed
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_core.sh b/tools/testing/selftests/net/forwarding/sch_ets_core.sh
index 8f9922c695b0..0453210271dc 100644
--- a/tools/testing/selftests/net/forwarding/sch_ets_core.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets_core.sh
@@ -165,8 +165,7 @@ h1_create()
{
local i;
- simple_if_init $h1
- defer simple_if_fini $h1
+ adf_simple_if_init $h1
mtu_set $h1 9900
defer mtu_restore $h1
@@ -182,8 +181,7 @@ h2_create()
{
local i
- simple_if_init $h2
- defer simple_if_fini $h2
+ adf_simple_if_init $h2
mtu_set $h2 9900
defer mtu_restore $h2
@@ -251,8 +249,7 @@ setup_prepare()
put=$swp2
hut=$h2
- vrf_prepare
- defer vrf_cleanup
+ adf_vrf_prepare
h1_create
h2_create
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
index 08240d3e3c87..79d837a2868a 100644
--- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
@@ -224,3 +224,11 @@ ets_test_dwrr()
ets_set_dwrr_two_bands
xfail_on_slow ets_dwrr_test_01
}
+
+ets_test_plug()
+{
+ ets_change_qdisc $put 2 "3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3" "1514 1514"
+ tc qdisc add dev $put handle 20: parent 10:4 plug
+ start_traffic_pktsize 100 $h1.10 192.0.2.1 192.0.2.2 00:c1:a0:c1:a0:00 "-c 1"
+ ets_qdisc_setup $put 2
+}
diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh
index af166662b78a..f2a3d9254642 100755
--- a/tools/testing/selftests/net/forwarding/sch_red.sh
+++ b/tools/testing/selftests/net/forwarding/sch_red.sh
@@ -52,8 +52,7 @@ PKTSZ=1400
h1_create()
{
- simple_if_init $h1 192.0.2.1/28
- defer simple_if_fini $h1 192.0.2.1/28
+ adf_simple_if_init $h1 192.0.2.1/28
mtu_set $h1 10000
defer mtu_restore $h1
@@ -65,8 +64,7 @@ h1_create()
h2_create()
{
- simple_if_init $h2 192.0.2.2/28
- defer simple_if_fini $h2 192.0.2.2/28
+ adf_simple_if_init $h2 192.0.2.2/28
mtu_set $h2 10000
defer mtu_restore $h2
@@ -74,8 +72,7 @@ h2_create()
h3_create()
{
- simple_if_init $h3 192.0.2.3/28
- defer simple_if_fini $h3 192.0.2.3/28
+ adf_simple_if_init $h3 192.0.2.3/28
mtu_set $h3 10000
defer mtu_restore $h3
@@ -125,8 +122,7 @@ setup_prepare()
h3_mac=$(mac_get $h3)
- vrf_prepare
- defer vrf_cleanup
+ adf_vrf_prepare
h1_create
h2_create
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
index ec309a5086bc..070c17faa9e4 100644
--- a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
@@ -59,8 +59,7 @@ host_create()
local dev=$1; shift
local host=$1; shift
- simple_if_init $dev
- defer simple_if_fini $dev
+ adf_simple_if_init $dev
mtu_set $dev 10000
defer mtu_restore $dev
@@ -149,8 +148,7 @@ setup_prepare()
h2_mac=$(mac_get $h2)
- vrf_prepare
- defer vrf_cleanup
+ adf_vrf_prepare
h1_create
h2_create
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index b1daad19b01e..b58909a93112 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -6,7 +6,7 @@ ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
match_ip_tos_test match_indev_test match_ip_ttl_test
match_mpls_label_test \
match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \
- match_mpls_lse_test"
+ match_mpls_lse_test match_erspan_opts_test"
NUM_NETIFS=2
source tc_common.sh
source lib.sh
@@ -676,6 +676,56 @@ match_mpls_lse_test()
log_test "mpls lse match ($tcflags)"
}
+match_erspan_opts_test()
+{
+ RET=0
+
+ check_tc_erspan_support $h2 || return 0
+
+ # h1 erspan setup
+ tunnel_create erspan1 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1001 \
+ tos C ttl 64 erspan_ver 1 erspan 6789 # ERSPAN Type II
+ tunnel_create erspan2 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1002 \
+ tos C ttl 64 erspan_ver 2 erspan_dir egress erspan_hwid 63 \
+ # ERSPAN Type III
+ ip link set dev erspan1 master v$h1
+ ip link set dev erspan2 master v$h1
+ # h2 erspan setup
+ ip link add ep-ex type erspan ttl 64 external # To collect tunnel info
+ ip link set ep-ex up
+ ip link set dev ep-ex master v$h2
+ tc qdisc add dev ep-ex clsact
+
+ # ERSPAN Type II [decap direction]
+ tc filter add dev ep-ex ingress protocol ip handle 101 flower \
+ $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \
+ enc_key_id 1001 erspan_opts 1:6789:0:0 \
+ action drop
+ # ERSPAN Type III [decap direction]
+ tc filter add dev ep-ex ingress protocol ip handle 102 flower \
+ $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \
+ enc_key_id 1002 erspan_opts 2:0:1:63 action drop
+
+ ep1mac=$(mac_get erspan1)
+ $MZ erspan1 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q
+ tc_check_packets "dev ep-ex ingress" 101 1
+ check_err $? "ERSPAN Type II"
+
+ ep2mac=$(mac_get erspan2)
+ $MZ erspan2 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q
+ tc_check_packets "dev ep-ex ingress" 102 1
+ check_err $? "ERSPAN Type III"
+
+ # h2 erspan cleanup
+ tc qdisc del dev ep-ex clsact
+ tunnel_destroy ep-ex
+ # h1 erspan cleanup
+ tunnel_destroy erspan2 # ERSPAN Type III
+ tunnel_destroy erspan1 # ERSPAN Type II
+
+ log_test "erspan_opts match ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
index 3885a2a91f7d..baed5e380dae 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
@@ -20,6 +20,7 @@ ALL_TESTS="
test_port_range_ipv4_tcp
test_port_range_ipv6_udp
test_port_range_ipv6_tcp
+ test_port_range_ipv4_udp_drop
"
NUM_NETIFS=4
@@ -194,6 +195,51 @@ test_port_range_ipv6_tcp()
__test_port_range $proto $ip_proto $sip $dip $mode "$name"
}
+test_port_range_ipv4_udp_drop()
+{
+ local proto=ipv4
+ local ip_proto=udp
+ local sip=192.0.2.1
+ local dip=192.0.2.2
+ local mode="-4"
+ local name="IPv4 UDP Drop"
+ local dmac=$(mac_get $h2)
+ local smac=$(mac_get $h1)
+ local sport_min=2000
+ local sport_max=3000
+ local sport_mid=$((sport_min + (sport_max - sport_min) / 2))
+ local dport=5000
+
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol $proto handle 101 pref 1 \
+ flower src_ip $sip dst_ip $dip ip_proto $ip_proto \
+ src_port $sport_min-$sport_max \
+ dst_port $dport \
+ action drop
+
+ # Test ports outside range - should pass
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$((sport_min - 1)),dp=$dport"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$((sport_max + 1)),dp=$dport"
+
+ # Test ports inside range - should be dropped
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_min,dp=$dport"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_mid,dp=$dport"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_max,dp=$dport"
+
+ tc_check_packets "dev $swp1 ingress" 101 3
+ check_err $? "Filter did not drop the expected number of packets"
+
+ tc filter del dev $swp1 ingress protocol $proto pref 1 handle 101 flower
+
+ log_test "Port range matching - $name"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/tc_taprio.sh b/tools/testing/selftests/net/forwarding/tc_taprio.sh
new file mode 100755
index 000000000000..8992aeabfe0b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_taprio.sh
@@ -0,0 +1,421 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS=" \
+ test_clock_jump_backward \
+ test_taprio_after_ptp \
+ test_max_sdu \
+ test_clock_jump_backward_forward \
+"
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+source tsn_lib.sh
+
+require_command python3
+
+# The test assumes the usual topology from the README, where h1 is connected to
+# swp1, h2 to swp2, and swp1 and swp2 are together in a bridge.
+# Additional assumption: h1 and h2 use the same PHC, and so do swp1 and swp2.
+# By synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized to
+# swp1 (and both to CLOCK_REALTIME).
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+UDS_ADDRESS_H1="/var/run/ptp4l_h1"
+UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1"
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+# Tunables
+NUM_PKTS=100
+STREAM_VID=10
+STREAM_PRIO_1=6
+STREAM_PRIO_2=5
+STREAM_PRIO_3=4
+# PTP uses TC 0
+ALL_GATES=$((1 << 0 | 1 << STREAM_PRIO_1 | 1 << STREAM_PRIO_2))
+# Use a conservative cycle of 10 ms to allow the test to still pass when the
+# kernel has some extra overhead like lockdep etc
+CYCLE_TIME_NS=10000000
+# Create two Gate Control List entries, one OPEN and one CLOSE, of equal
+# durations
+GATE_DURATION_NS=$((CYCLE_TIME_NS / 2))
+# Give 2/3 of the cycle time to user space and 1/3 to the kernel
+FUDGE_FACTOR=$((CYCLE_TIME_NS / 3))
+# Shift the isochron base time by half the gate time, so that packets are
+# always received by swp1 close to the middle of the time slot, to minimize
+# inaccuracies due to network sync
+SHIFT_TIME_NS=$((GATE_DURATION_NS / 2))
+
+path_delay=
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+switch_create()
+{
+ local h2_mac_addr=$(mac_get $h2)
+
+ ip link set $swp1 up
+ ip link set $swp2 up
+
+ ip link add br0 type bridge vlan_filtering 1
+ ip link set $swp1 master br0
+ ip link set $swp2 master br0
+ ip link set br0 up
+
+ bridge vlan add dev $swp2 vid $STREAM_VID
+ bridge vlan add dev $swp1 vid $STREAM_VID
+ bridge fdb add dev $swp2 \
+ $h2_mac_addr vlan $STREAM_VID static master
+}
+
+switch_destroy()
+{
+ ip link del br0
+}
+
+ptp_setup()
+{
+ # Set up swp1 as a master PHC for h1, synchronized to the local
+ # CLOCK_REALTIME.
+ phc2sys_start $UDS_ADDRESS_SWP1
+ ptp4l_start $h1 true $UDS_ADDRESS_H1
+ ptp4l_start $swp1 false $UDS_ADDRESS_SWP1
+}
+
+ptp_cleanup()
+{
+ ptp4l_stop $swp1
+ ptp4l_stop $h1
+ phc2sys_stop
+}
+
+txtime_setup()
+{
+ local if_name=$1
+
+ tc qdisc add dev $if_name clsact
+ # Classify PTP on TC 7 and isochron on TC 6
+ tc filter add dev $if_name egress protocol 0x88f7 \
+ flower action skbedit priority 7
+ tc filter add dev $if_name egress protocol 802.1Q \
+ flower vlan_ethtype 0xdead action skbedit priority 6
+ tc qdisc add dev $if_name handle 100: parent root mqprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+ map 0 1 2 3 4 5 6 7 \
+ hw 1
+ # Set up TC 5, 6, 7 for SO_TXTIME. tc-mqprio queues count from 1.
+ tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_1 + 1)) etf \
+ clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+ tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_2 + 1)) etf \
+ clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+ tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_3 + 1)) etf \
+ clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+}
+
+txtime_cleanup()
+{
+ local if_name=$1
+
+ tc qdisc del dev $if_name clsact
+ tc qdisc del dev $if_name root
+}
+
+taprio_replace()
+{
+ local if_name="$1"; shift
+ local extra_args="$1"; shift
+
+ # STREAM_PRIO_1 always has an open gate.
+ # STREAM_PRIO_2 has a gate open for GATE_DURATION_NS (half the cycle time)
+ # STREAM_PRIO_3 always has a closed gate.
+ tc qdisc replace dev $if_name root stab overhead 24 taprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+ map 0 1 2 3 4 5 6 7 \
+ sched-entry S $(printf "%x" $ALL_GATES) $GATE_DURATION_NS \
+ sched-entry S $(printf "%x" $((ALL_GATES & ~(1 << STREAM_PRIO_2)))) $GATE_DURATION_NS \
+ base-time 0 flags 0x2 $extra_args
+ taprio_wait_for_admin $if_name
+}
+
+taprio_cleanup()
+{
+ local if_name=$1
+
+ tc qdisc del dev $if_name root
+}
+
+probe_path_delay()
+{
+ local isochron_dat="$(mktemp)"
+ local received
+
+ log_info "Probing path delay"
+
+ isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" 0 \
+ "$CYCLE_TIME_NS" "" "" "$NUM_PKTS" \
+ "$STREAM_VID" "$STREAM_PRIO_1" "" "$isochron_dat"
+
+ received=$(isochron_report_num_received "$isochron_dat")
+ if [ "$received" != "$NUM_PKTS" ]; then
+ echo "Cannot establish basic data path between $h1 and $h2"
+ exit $ksft_fail
+ fi
+
+ printf "pdelay = {}\n" > isochron_data.py
+ isochron report --input-file "$isochron_dat" \
+ --printf-format "pdelay[%u] = %d - %d\n" \
+ --printf-args "qRT" \
+ >> isochron_data.py
+ cat <<-'EOF' > isochron_postprocess.py
+ #!/usr/bin/env python3
+
+ from isochron_data import pdelay
+ import numpy as np
+
+ w = np.array(list(pdelay.values()))
+ print("{}".format(np.max(w)))
+ EOF
+ path_delay=$(python3 ./isochron_postprocess.py)
+
+ log_info "Path delay from $h1 to $h2 estimated at $path_delay ns"
+
+ if [ "$path_delay" -gt "$GATE_DURATION_NS" ]; then
+ echo "Path delay larger than gate duration, aborting"
+ exit $ksft_fail
+ fi
+
+ rm -f ./isochron_data.py 2> /dev/null
+ rm -f ./isochron_postprocess.py 2> /dev/null
+ rm -f "$isochron_dat" 2> /dev/null
+}
+
+setup_prepare()
+{
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+
+ txtime_setup $h1
+
+ # Temporarily set up PTP just to probe the end-to-end path delay.
+ ptp_setup
+ probe_path_delay
+ ptp_cleanup
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ isochron_recv_stop
+ txtime_cleanup $h1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+run_test()
+{
+ local base_time=$1; shift
+ local stream_prio=$1; shift
+ local expected_delay=$1; shift
+ local should_fail=$1; shift
+ local test_name=$1; shift
+ local isochron_dat="$(mktemp)"
+ local received
+ local median_delay
+
+ RET=0
+
+ # Set the shift time equal to the cycle time, which effectively
+ # cancels the default advance time. Packets won't be sent early in
+ # software, which ensures that they won't prematurely enter through
+ # the open gate in __test_out_of_band(). Also, the gate is open for
+ # long enough that this won't cause a problem in __test_in_band().
+ isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" "$base_time" \
+ "$CYCLE_TIME_NS" "$SHIFT_TIME_NS" "$GATE_DURATION_NS" \
+ "$NUM_PKTS" "$STREAM_VID" "$stream_prio" "" "$isochron_dat"
+
+ received=$(isochron_report_num_received "$isochron_dat")
+ [ "$received" = "$NUM_PKTS" ]
+ check_err_fail $should_fail $? "Reception of $NUM_PKTS packets"
+
+ if [ $should_fail = 0 ] && [ "$received" = "$NUM_PKTS" ]; then
+ printf "pdelay = {}\n" > isochron_data.py
+ isochron report --input-file "$isochron_dat" \
+ --printf-format "pdelay[%u] = %d - %d\n" \
+ --printf-args "qRT" \
+ >> isochron_data.py
+ cat <<-'EOF' > isochron_postprocess.py
+ #!/usr/bin/env python3
+
+ from isochron_data import pdelay
+ import numpy as np
+
+ w = np.array(list(pdelay.values()))
+ print("{}".format(int(np.median(w))))
+ EOF
+ median_delay=$(python3 ./isochron_postprocess.py)
+
+ # If the condition below is true, packets were delayed by a closed gate
+ [ "$median_delay" -gt $((path_delay + expected_delay)) ]
+ check_fail $? "Median delay $median_delay is greater than expected delay $expected_delay plus path delay $path_delay"
+
+ # If the condition below is true, packets were sent expecting them to
+ # hit a closed gate in the switch, but were not delayed
+ [ "$expected_delay" -gt 0 ] && [ "$median_delay" -lt "$expected_delay" ]
+ check_fail $? "Median delay $median_delay is less than expected delay $expected_delay"
+ fi
+
+ log_test "$test_name"
+
+ rm -f ./isochron_data.py 2> /dev/null
+ rm -f ./isochron_postprocess.py 2> /dev/null
+ rm -f "$isochron_dat" 2> /dev/null
+}
+
+__test_always_open()
+{
+ run_test 0.000000000 $STREAM_PRIO_1 0 0 "Gate always open"
+}
+
+__test_always_closed()
+{
+ run_test 0.000000000 $STREAM_PRIO_3 0 1 "Gate always closed"
+}
+
+__test_in_band()
+{
+ # Send packets in-band with the OPEN gate entry
+ run_test 0.000000000 $STREAM_PRIO_2 0 0 "In band with gate"
+}
+
+__test_out_of_band()
+{
+ # Send packets in-band with the CLOSE gate entry
+ run_test 0.005000000 $STREAM_PRIO_2 \
+ $((GATE_DURATION_NS - SHIFT_TIME_NS)) 0 \
+ "Out of band with gate"
+}
+
+run_subtests()
+{
+ __test_always_open
+ __test_always_closed
+ __test_in_band
+ __test_out_of_band
+}
+
+test_taprio_after_ptp()
+{
+ log_info "Setting up taprio after PTP"
+ ptp_setup
+ taprio_replace $swp2
+ run_subtests
+ taprio_cleanup $swp2
+ ptp_cleanup
+}
+
+__test_under_max_sdu()
+{
+ # Limit max-sdu for STREAM_PRIO_1
+ taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 100 0"
+ run_test 0.000000000 $STREAM_PRIO_1 0 0 "Under maximum SDU"
+}
+
+__test_over_max_sdu()
+{
+ # Limit max-sdu for STREAM_PRIO_1
+ taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 20 0"
+ run_test 0.000000000 $STREAM_PRIO_1 0 1 "Over maximum SDU"
+}
+
+test_max_sdu()
+{
+ ptp_setup
+ __test_under_max_sdu
+ __test_over_max_sdu
+ taprio_cleanup $swp2
+ ptp_cleanup
+}
+
+# Perform a clock jump in the past without synchronization running, so that the
+# time base remains where it was set by phc_ctl.
+test_clock_jump_backward()
+{
+ # This is a more complex schedule specifically crafted in a way that
+ # has been problematic on NXP LS1028A. Not much to test with it other
+ # than the fact that it passes traffic.
+ tc qdisc replace dev $swp2 root stab overhead 24 taprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \
+ base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \
+ sched-entry S 20 300000 sched-entry S 48 200000 \
+ sched-entry S 20 300000 sched-entry S 83 200000 \
+ sched-entry S 40 300000 sched-entry S 00 200000 flags 2
+
+ log_info "Forcing a backward clock jump"
+ phc_ctl $swp1 set 0
+
+ ping_test $h1 192.0.2.2
+ taprio_cleanup $swp2
+}
+
+# Test that taprio tolerates clock jumps.
+# Since ptp4l and phc2sys are running, it is expected for the time to
+# eventually recover (through yet another clock jump). Isochron waits
+# until that is the case.
+test_clock_jump_backward_forward()
+{
+ log_info "Forcing a backward and a forward clock jump"
+ taprio_replace $swp2
+ phc_ctl $swp1 set 0
+ ptp_setup
+ ping_test $h1 192.0.2.2
+ run_subtests
+ ptp_cleanup
+ taprio_cleanup $swp2
+}
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_test_skip "Could not test offloaded functionality"
+ exit $EXIT_STATUS
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh
index b91bcd8008a9..08c044ff6689 100644
--- a/tools/testing/selftests/net/forwarding/tsn_lib.sh
+++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh
@@ -2,6 +2,8 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright 2021-2022 NXP
+tc_testing_scripts_dir=$(dirname $0)/../../tc-testing/scripts
+
REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes}
REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes}
@@ -18,6 +20,7 @@ fi
if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then
require_command phc2sys
require_command ptp4l
+ require_command phc_ctl
fi
phc2sys_start()
@@ -182,6 +185,7 @@ isochron_do()
local base_time=$1; shift
local cycle_time=$1; shift
local shift_time=$1; shift
+ local window_size=$1; shift
local num_pkts=$1; shift
local vid=$1; shift
local priority=$1; shift
@@ -212,6 +216,10 @@ isochron_do()
extra_args="${extra_args} --shift-time=${shift_time}"
fi
+ if ! [ -z "${window_size}" ]; then
+ extra_args="${extra_args} --window-size=${window_size}"
+ fi
+
if [ "${use_l2}" = "true" ]; then
extra_args="${extra_args} --l2 --etype=0xdead ${vid}"
receiver_extra_args="--l2 --etype=0xdead"
@@ -247,3 +255,21 @@ isochron_do()
cpufreq_restore ${ISOCHRON_CPU}
}
+
+isochron_report_num_received()
+{
+ local isochron_dat=$1; shift
+
+ # Count all received packets by looking at the non-zero RX timestamps
+ isochron report \
+ --input-file "${isochron_dat}" \
+ --printf-format "%u\n" --printf-args "R" | \
+ grep -w -v '0' | wc -l
+}
+
+taprio_wait_for_admin()
+{
+ local if_name="$1"; shift
+
+ "$tc_testing_scripts_dir/taprio_wait_for_admin.sh" "$(which tc)" "$if_name"
+}
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index 3f9d50f1ef9e..b43816dd998c 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -428,6 +428,14 @@ __test_flood()
test_flood()
{
__test_flood de:ad:be:ef:13:37 192.0.2.100 "flood"
+
+ # Add an entry with arbitrary destination IP. Verify that packets are
+ # not duplicated (this can happen if hardware floods the packets, and
+ # then traps them due to misconfiguration, so software data path repeats
+ # flooding and resends packets).
+ bridge fdb append dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self
+ __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood, unresolved FDB entry"
+ bridge fdb del dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self
}
vxlan_fdb_add_del()
@@ -740,6 +748,8 @@ test_learning()
vxlan_flood_test $mac $dst 0 10 0
+ # The entry should age out when it only forwards traffic
+ $MZ $h1 -c 50 -d 1sec -p 64 -b $mac -B $dst -t icmp -q &
sleep 60
bridge fdb show brport vx1 | grep $mac | grep -q self
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
index fb9a34cb50c6..afc65647f673 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
@@ -539,6 +539,21 @@ test_flood()
10 10 0 10 0
__test_flood ca:fe:be:ef:13:37 198.51.100.100 20 "flood vlan 20" \
10 0 10 0 10
+
+ # Add entries with arbitrary destination IP. Verify that packets are
+ # not duplicated (this can happen if hardware floods the packets, and
+ # then traps them due to misconfiguration, so software data path repeats
+ # flooding and resends packets).
+ bridge fdb append dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self
+ bridge fdb append dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self
+
+ __test_flood de:ad:be:ef:13:37 192.0.2.100 10 \
+ "flood vlan 10, unresolved FDB entry" 10 10 0 10 0
+ __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 \
+ "flood vlan 20, unresolved FDB entry" 10 0 10 0 10
+
+ bridge fdb del dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self
+ bridge fdb del dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self
}
vxlan_fdb_add_del()
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
new file mode 100755
index 000000000000..6a570d256e07
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh
@@ -0,0 +1,766 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------------------------+
+# | + $h1.10 + $h1.20 |
+# | | 192.0.2.1/28 | 2001:db8:1::1/64 |
+# | \________ ________/ |
+# | \ / |
+# | + $h1 H1 (vrf) |
+# +-----------|-----------------------------+
+# |
+# +-----------|----------------------------------------------------------------+
+# | +---------|--------------------------------------+ SWITCH (main vrf) |
+# | | + $swp1 BR1 (802.1q) | |
+# | | vid 10 20 | |
+# | | | |
+# | | + vx10 (vxlan) + vx20 (vxlan) | + lo10 (dummy) |
+# | | local 192.0.2.100 local 2001:db8:4::1 | 192.0.2.100/28 |
+# | | group 233.252.0.1 group ff0e::1:2:3 | 2001:db8:4::1/64 |
+# | | id 1000 id 2000 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | +------------------------------------------------+ |
+# | |
+# | + $swp2 $swp3 + |
+# | | 192.0.2.33/28 192.0.2.65/28 | |
+# | | 2001:db8:2::1/64 2001:db8:3::1/64 | |
+# | | | |
+# +---|--------------------------------------------------------------------|---+
+# | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | | H2 (vrf) | | H3 (vrf) | |
+# | +-|----------------------------+ | | +-----------------------------|-+ |
+# | | + $h2 BR2 (802.1d) | | | | BR3 (802.1d) $h3 + | |
+# | | | | | | | |
+# | | + v1$h2 (veth) | | | | v1$h3 (veth) + | |
+# | +-|----------------------------+ | | +-----------------------------|-+ |
+# | | | | | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | |
+# +---|--------------------------------+ +--------------------------------|---+
+# | + v2$h2 (veth) NS2 (netns) | | NS3 (netns) v2$h3 (veth) + |
+# | 192.0.2.34/28 | | 192.0.2.66/28 |
+# | 2001:db8:2::2/64 | | 2001:db8:3::2/64 |
+# | | | |
+# | +--------------------------------+ | | +--------------------------------+ |
+# | | BR1 (802.1q) | | | | BR1 (802.1q) | |
+# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | |
+# | | local 192.0.2.34 | | | | local 192.0.2.50 | |
+# | | group 233.252.0.1 dev v2$h2 | | | | group 233.252.0.1 dev v2$h3 | |
+# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | |
+# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | |
+# | | | | | | | |
+# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | |
+# | | local 2001:db8:2::2 | | | | local 2001:db8:3::2 | |
+# | | group ff0e::1:2:3 dev v2$h2 | | | | group ff0e::1:2:3 dev v2$h3 | |
+# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | |
+# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | |
+# | | | | | | | |
+# | | + w1 (veth) | | | | + w1 (veth) | |
+# | | | vid 10 20 | | | | | vid 10 20 | |
+# | +--|-----------------------------+ | | +--|-----------------------------+ |
+# | | | | | |
+# | +--|-----------------------------+ | | +--|-----------------------------+ |
+# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | |
+# | | |\ | | | | |\ | |
+# | | | + w2.10 | | | | | + w2.10 | |
+# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | |
+# | | | | | | | | | |
+# | | + w2.20 | | | | + w2.20 | |
+# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | |
+# | +--------------------------------+ | | +--------------------------------+ |
+# +------------------------------------+ +------------------------------------+
+#
+#shellcheck disable=SC2317 # SC doesn't see our uses of functions.
+
+: "${VXPORT:=4789}"
+export VXPORT
+
+: "${GROUP4:=233.252.0.1}"
+export GROUP4
+
+: "${GROUP6:=ff0e::1:2:3}"
+export GROUP6
+
+: "${IPMR:=lo10}"
+
+ALL_TESTS="
+ ipv4_nomcroute
+ ipv4_mcroute
+ ipv4_mcroute_changelink
+ ipv4_mcroute_starg
+ ipv4_mcroute_noroute
+ ipv4_mcroute_fdb
+ ipv4_mcroute_fdb_oif0
+ ipv4_mcroute_fdb_oif0_sep
+
+ ipv6_nomcroute
+ ipv6_mcroute
+ ipv6_mcroute_changelink
+ ipv6_mcroute_starg
+ ipv6_mcroute_noroute
+ ipv6_mcroute_fdb
+ ipv6_mcroute_fdb_oif0
+
+ ipv4_nomcroute_rx
+ ipv4_mcroute_rx
+ ipv4_mcroute_starg_rx
+ ipv4_mcroute_fdb_oif0_sep_rx
+ ipv4_mcroute_fdb_sep_rx
+
+ ipv6_nomcroute_rx
+ ipv6_mcroute_rx
+ ipv6_mcroute_starg_rx
+ ipv6_mcroute_fdb_sep_rx
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ adf_simple_if_init "$h1"
+
+ adf_ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10
+ adf_ip_link_set_up "$h1.10"
+ adf_ip_addr_add "$h1.10" 192.0.2.1/28
+
+ adf_ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20
+ adf_ip_link_set_up "$h1.20"
+ adf_ip_addr_add "$h1.20" 2001:db8:1::1/64
+}
+
+install_capture()
+{
+ local dev=$1; shift
+
+ tc qdisc add dev "$dev" clsact
+ defer tc qdisc del dev "$dev" clsact
+
+ tc filter add dev "$dev" ingress proto ip pref 104 \
+ flower skip_hw ip_proto udp dst_port "$VXPORT" \
+ action pass
+ defer tc filter del dev "$dev" ingress proto ip pref 104
+
+ tc filter add dev "$dev" ingress proto ipv6 pref 106 \
+ flower skip_hw ip_proto udp dst_port "$VXPORT" \
+ action pass
+ defer tc filter del dev "$dev" ingress proto ipv6 pref 106
+}
+
+h2_create()
+{
+ # $h2
+ adf_ip_link_set_up "$h2"
+
+ # H2
+ vrf_create "v$h2"
+ defer vrf_destroy "v$h2"
+
+ adf_ip_link_set_up "v$h2"
+
+ # br2
+ adf_ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0
+ adf_ip_link_set_master br2 "v$h2"
+ adf_ip_link_set_up br2
+
+ # $h2
+ adf_ip_link_set_master "$h2" br2
+ install_capture "$h2"
+
+ # v1$h2
+ adf_ip_link_set_up "v1$h2"
+ adf_ip_link_set_master "v1$h2" br2
+}
+
+h3_create()
+{
+ # $h3
+ adf_ip_link_set_up "$h3"
+
+ # H3
+ vrf_create "v$h3"
+ defer vrf_destroy "v$h3"
+
+ adf_ip_link_set_up "v$h3"
+
+ # br3
+ adf_ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0
+ adf_ip_link_set_master br3 "v$h3"
+ adf_ip_link_set_up br3
+
+ # $h3
+ adf_ip_link_set_master "$h3" br3
+ install_capture "$h3"
+
+ # v1$h3
+ adf_ip_link_set_up "v1$h3"
+ adf_ip_link_set_master "v1$h3" br3
+}
+
+switch_create()
+{
+ local swp1_mac
+
+ # br1
+ swp1_mac=$(mac_get "$swp1")
+ adf_ip_link_add br1 type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+ adf_ip_link_set_addr br1 "$swp1_mac"
+ adf_ip_link_set_up br1
+
+ # A dummy to force the IPv6 OIF=0 test to install a suitable MC route on
+ # $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test.
+ adf_ip_link_add "X$IPMR" up type dummy
+
+ # IPMR
+ adf_ip_link_add "$IPMR" up type dummy
+ adf_ip_addr_add "$IPMR" 192.0.2.100/28
+ adf_ip_addr_add "$IPMR" 2001:db8:4::1/64
+
+ # $swp1
+ adf_ip_link_set_up "$swp1"
+ adf_ip_link_set_master "$swp1" br1
+ adf_bridge_vlan_add vid 10 dev "$swp1"
+ adf_bridge_vlan_add vid 20 dev "$swp1"
+
+ # $swp2
+ adf_ip_link_set_up "$swp2"
+ adf_ip_addr_add "$swp2" 192.0.2.33/28
+ adf_ip_addr_add "$swp2" 2001:db8:2::1/64
+
+ # $swp3
+ adf_ip_link_set_up "$swp3"
+ adf_ip_addr_add "$swp3" 192.0.2.65/28
+ adf_ip_addr_add "$swp3" 2001:db8:3::1/64
+}
+
+vx_create()
+{
+ local name=$1; shift
+ local vid=$1; shift
+
+ adf_ip_link_add "$name" up type vxlan dstport "$VXPORT" \
+ nolearning noudpcsum tos inherit ttl 16 \
+ "$@"
+ adf_ip_link_set_master "$name" br1
+ adf_bridge_vlan_add vid "$vid" dev "$name" pvid untagged
+}
+export -f vx_create
+
+vx_wait()
+{
+ # Wait for all the ARP, IGMP etc. noise to settle down so that the
+ # tunnel is clear for measurements.
+ sleep 10
+}
+
+vx10_create()
+{
+ vx_create vx10 10 id 1000 "$@"
+}
+export -f vx10_create
+
+vx20_create()
+{
+ vx_create vx20 20 id 2000 "$@"
+}
+export -f vx20_create
+
+vx10_create_wait()
+{
+ vx10_create "$@"
+ vx_wait
+}
+
+vx20_create_wait()
+{
+ vx20_create "$@"
+ vx_wait
+}
+
+ns_init_common()
+{
+ local ns=$1; shift
+ local if_in=$1; shift
+ local ipv4_in=$1; shift
+ local ipv6_in=$1; shift
+ local ipv4_host=$1; shift
+ local ipv6_host=$1; shift
+
+ # v2$h2 / v2$h3
+ adf_ip_link_set_up "$if_in"
+ adf_ip_addr_add "$if_in" "$ipv4_in"
+ adf_ip_addr_add "$if_in" "$ipv6_in"
+
+ # br1
+ adf_ip_link_add br1 type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+ adf_ip_link_set_up br1
+
+ # vx10, vx20
+ vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in"
+ vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in"
+
+ # w1
+ adf_ip_link_add w1 type veth peer name w2
+ adf_ip_link_set_master w1 br1
+ adf_ip_link_set_up w1
+ adf_bridge_vlan_add vid 10 dev w1
+ adf_bridge_vlan_add vid 20 dev w1
+
+ # w2
+ adf_simple_if_init w2
+
+ # w2.10
+ adf_ip_link_add w2.10 master vw2 link w2 type vlan id 10
+ adf_ip_link_set_up w2.10
+ adf_ip_addr_add w2.10 "$ipv4_host"
+
+ # w2.20
+ adf_ip_link_add w2.20 master vw2 link w2 type vlan id 20
+ adf_ip_link_set_up w2.20
+ adf_ip_addr_add w2.20 "$ipv6_host"
+}
+export -f ns_init_common
+
+ns2_create()
+{
+ # NS2
+ ip netns add ns2
+ defer ip netns del ns2
+
+ # v2$h2
+ ip link set dev "v2$h2" netns ns2
+ defer ip -n ns2 link set dev "v2$h2" netns 1
+
+ in_ns ns2 \
+ ns_init_common ns2 "v2$h2" \
+ 192.0.2.34/28 2001:db8:2::2/64 \
+ 192.0.2.3/28 2001:db8:1::3/64
+}
+
+ns3_create()
+{
+ # NS3
+ ip netns add ns3
+ defer ip netns del ns3
+
+ # v2$h3
+ ip link set dev "v2$h3" netns ns3
+ defer ip -n ns3 link set dev "v2$h3" netns 1
+
+ ip -n ns3 link set dev "v2$h3" up
+
+ in_ns ns3 \
+ ns_init_common ns3 "v2$h3" \
+ 192.0.2.66/28 2001:db8:3::2/64 \
+ 192.0.2.4/28 2001:db8:1::4/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ adf_vrf_prepare
+ adf_forwarding_enable
+
+ adf_ip_link_add "v1$h2" type veth peer name "v2$h2"
+ adf_ip_link_add "v1$h3" type veth peer name "v2$h3"
+
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+ ns2_create
+ ns3_create
+}
+
+adf_install_broken_sg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3"
+ defer mc_cli remove "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3"
+
+ mc_cli add "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3"
+ defer mc_cli remove "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3"
+}
+
+adf_install_rx()
+{
+ mc_cli add "$swp2" 0.0.0.0 "$GROUP4" "$IPMR"
+ defer mc_cli remove "$swp2" 0.0.0.0 "$GROUP4" lo10
+
+ mc_cli add "$swp3" 0.0.0.0 "$GROUP4" "$IPMR"
+ defer mc_cli remove "$swp3" 0.0.0.0 "$GROUP4" lo10
+
+ mc_cli add "$swp2" :: "$GROUP6" "$IPMR"
+ defer mc_cli remove "$swp2" :: "$GROUP6" lo10
+
+ mc_cli add "$swp3" :: "$GROUP6" "$IPMR"
+ defer mc_cli remove "$swp3" :: "$GROUP6" lo10
+}
+
+adf_install_sg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$IPMR" 192.0.2.100 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 192.0.2.33 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+adf_install_sg_sep()
+{
+ adf_mcd_start lo || exit "$EXIT_STATUS"
+
+ mc_cli add lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+}
+
+adf_install_sg_sep_rx()
+{
+ local lo=$1; shift
+
+ adf_mcd_start "$IPMR" "$lo" || exit "$EXIT_STATUS"
+
+ mc_cli add "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+adf_install_starg()
+{
+ adf_mcd_start "$IPMR" || exit "$EXIT_STATUS"
+
+ mc_cli add "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3"
+
+ mc_cli add "$IPMR" :: "$GROUP6" "$swp2" "$swp3"
+ defer mc_cli remove "$IPMR" :: "$GROUP6" "$swp2" "$swp3"
+
+ adf_install_rx
+}
+
+do_packets_v4()
+{
+ local mac
+
+ mac=$(mac_get "$h2")
+ "$MZ" "$h1" -Q 10 -c 10 -d 100msec -p 64 -a own -b "$mac" \
+ -A 192.0.2.1 -B 192.0.2.2 -t udp sp=1234,dp=2345 -q
+}
+
+do_packets_v6()
+{
+ local mac
+
+ mac=$(mac_get "$h2")
+ "$MZ" -6 "$h1" -Q 20 -c 10 -d 100msec -p 64 -a own -b "$mac" \
+ -A 2001:db8:1::1 -B 2001:db8:1::2 -t udp sp=1234,dp=2345 -q
+}
+
+do_test()
+{
+ local ipv=$1; shift
+ local expect_h2=$1; shift
+ local expect_h3=$1; shift
+ local what=$1; shift
+
+ local pref=$((100 + ipv))
+ local t0_h2
+ local t0_h3
+ local t1_h2
+ local t1_h3
+ local d_h2
+ local d_h3
+
+ RET=0
+
+ t0_h2=$(tc_rule_stats_get "$h2" "$pref" ingress)
+ t0_h3=$(tc_rule_stats_get "$h3" "$pref" ingress)
+
+ "do_packets_v$ipv"
+ sleep 1
+
+ t1_h2=$(tc_rule_stats_get "$h2" "$pref" ingress)
+ t1_h3=$(tc_rule_stats_get "$h3" "$pref" ingress)
+
+ d_h2=$((t1_h2 - t0_h2))
+ d_h3=$((t1_h3 - t0_h3))
+
+ ((d_h2 == expect_h2))
+ check_err $? "Expected $expect_h2 packets on H2, got $d_h2"
+
+ ((d_h3 == expect_h3))
+ check_err $? "Expected $expect_h3 packets on H3, got $d_h3"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv4_do_test_rx()
+{
+ local h3_should_fail=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ping_do "$h1.10" 192.0.2.3
+ check_err $? "H2 should respond"
+
+ ping_do "$h1.10" 192.0.2.4
+ check_err_fail "$h3_should_fail" $? "H3 responds"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv6_do_test_rx()
+{
+ local h3_should_fail=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ ping6_do "$h1.20" 2001:db8:1::3
+ check_err $? "H2 should respond"
+
+ ping6_do "$h1.20" 2001:db8:1::4
+ check_err_fail "$h3_should_fail" $? "H3 responds"
+
+ log_test "VXLAN MC flood $what"
+}
+
+ipv4_nomcroute()
+{
+ # Install a misleading (S,G) rule to attempt to trick the system into
+ # pushing the packets elsewhere.
+ adf_install_broken_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$swp2"
+ do_test 4 10 0 "IPv4 nomcroute"
+}
+
+ipv6_nomcroute()
+{
+ # Like for IPv4, install a misleading (S,G).
+ adf_install_broken_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+ do_test 6 10 0 "IPv6 nomcroute"
+}
+
+ipv4_nomcroute_rx()
+{
+ vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2"
+ ipv4_do_test_rx 1 "IPv4 nomcroute ping"
+}
+
+ipv6_nomcroute_rx()
+{
+ vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2"
+ ipv6_do_test_rx 1 "IPv6 nomcroute ping"
+}
+
+ipv4_mcroute()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 10 10 "IPv4 mcroute"
+}
+
+ipv6_mcroute()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 10 10 "IPv6 mcroute"
+}
+
+ipv4_mcroute_rx()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ ipv4_do_test_rx 0 "IPv4 mcroute ping"
+}
+
+ipv6_mcroute_rx()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ipv6_do_test_rx 0 "IPv6 mcroute ping"
+}
+
+ipv4_mcroute_changelink()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR"
+ ip link set dev vx10 type vxlan mcroute
+ sleep 1
+ do_test 4 10 10 "IPv4 mcroute changelink"
+}
+
+ipv6_mcroute_changelink()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ip link set dev vx20 type vxlan mcroute
+ sleep 1
+ do_test 6 10 10 "IPv6 mcroute changelink"
+}
+
+ipv4_mcroute_starg()
+{
+ adf_install_starg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 10 10 "IPv4 mcroute (*,G)"
+}
+
+ipv6_mcroute_starg()
+{
+ adf_install_starg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 10 10 "IPv6 mcroute (*,G)"
+}
+
+ipv4_mcroute_starg_rx()
+{
+ adf_install_starg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping"
+}
+
+ipv6_mcroute_starg_rx()
+{
+ adf_install_starg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping"
+}
+
+ipv4_mcroute_noroute()
+{
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ do_test 4 0 0 "IPv4 mcroute, no route"
+}
+
+ipv6_mcroute_noroute()
+{
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ do_test 6 0 0 "IPv6 mcroute, no route"
+}
+
+ipv4_mcroute_fdb()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 dev "$IPMR" mcroute
+ bridge fdb add dev vx10 \
+ 00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR"
+ do_test 4 10 10 "IPv4 mcroute FDB"
+}
+
+ipv6_mcroute_fdb()
+{
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 dev "$IPMR" mcroute
+ bridge -6 fdb add dev vx20 \
+ 00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR"
+ do_test 6 10 10 "IPv6 mcroute FDB"
+}
+
+# Use FDB to configure VXLAN in a way where oif=0 for purposes of FIB lookup.
+ipv4_mcroute_fdb_oif0()
+{
+ adf_install_sg
+ vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ do_test 4 10 10 "IPv4 mcroute oif=0"
+}
+
+ipv6_mcroute_fdb_oif0()
+{
+ # The IPv6 tunnel lookup does not fall back to selection by source
+ # address. Instead it just does a FIB match, and that would find one of
+ # the several ff00::/8 multicast routes -- each device has one. In order
+ # to reliably force the $IPMR device, add a /128 route for the
+ # destination group address.
+ ip -6 route add table local multicast "$GROUP6/128" dev "$IPMR"
+ defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR"
+
+ adf_install_sg
+ vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute
+ bridge -6 fdb del dev vx20 00:00:00:00:00:00
+ bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6"
+ do_test 6 10 10 "IPv6 mcroute oif=0"
+}
+
+# In oif=0 test as above, have FIB lookup resolve to loopback instead of IPMR.
+# This doesn't work with IPv6 -- a MC route on lo would be marked as RTF_REJECT.
+ipv4_mcroute_fdb_oif0_sep()
+{
+ adf_install_sg_sep
+
+ adf_ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0"
+}
+
+ipv4_mcroute_fdb_oif0_sep_rx()
+{
+ adf_install_sg_sep_rx lo
+
+ adf_ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4"
+ ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping"
+}
+
+ipv4_mcroute_fdb_sep_rx()
+{
+ adf_install_sg_sep_rx lo
+
+ adf_ip_addr_add lo 192.0.2.120/28
+ vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute
+ bridge fdb del dev vx10 00:00:00:00:00:00
+ bridge fdb add \
+ dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo
+ ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX ping"
+}
+
+ipv6_mcroute_fdb_sep_rx()
+{
+ adf_install_sg_sep_rx "X$IPMR"
+
+ adf_ip_addr_add "X$IPMR" 2001:db8:5::1/64
+ vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute
+ bridge -6 fdb del dev vx20 00:00:00:00:00:00
+ bridge -6 fdb add dev vx20 00:00:00:00:00:00 \
+ self static dst "$GROUP6" via "X$IPMR"
+ ipv6_do_test_rx 0 "IPv6 mcroute TX!=RX ping"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh
new file mode 100755
index 000000000000..709845123727
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh
@@ -0,0 +1,347 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 192.0.2.1/28 |
+# +----|---------------+
+# |
+# +----|--------------------------------+
+# | SW | |
+# | +--|------------------------------+ |
+# | | + $swp1 BR1 (802.1d) | |
+# | | | |
+# | | + vx1 (vxlan) | |
+# | | local 192.0.2.17 | |
+# | | id 1000 dstport $VXPORT | |
+# | +---------------------------------+ |
+# | |
+# | 192.0.2.32/28 via 192.0.2.18 |
+# | |
+# | + $rp1 |
+# | | 192.0.2.17/28 |
+# +--|----------------------------------+
+# |
+# +--|----------------------------------+
+# | | |
+# | + $rp2 |
+# | 192.0.2.18/28 |
+# | |
+# | VRP2 (vrf) |
+# +-------------------------------------+
+
+: ${VXPORT:=4789}
+: ${ALL_TESTS:="
+ default_test
+ plain_test
+ reserved_0_test
+ reserved_10_test
+ reserved_31_test
+ reserved_56_test
+ reserved_63_test
+ "}
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ adf_simple_if_init $h1 192.0.2.1/28
+
+ tc qdisc add dev $h1 clsact
+ defer tc qdisc del dev $h1 clsact
+
+ tc filter add dev $h1 ingress pref 77 \
+ prot ip flower skip_hw ip_proto icmp action drop
+ defer tc filter del dev $h1 ingress pref 77
+}
+
+switch_create()
+{
+ adf_ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ adf_ip_link_set_addr br1 $(mac_get $swp1)
+ adf_ip_link_set_up br1
+
+ adf_ip_link_set_up $rp1
+ adf_ip_addr_add $rp1 192.0.2.17/28
+ adf_ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18
+
+ adf_ip_link_set_master $swp1 br1
+ adf_ip_link_set_up $swp1
+}
+
+vrp2_create()
+{
+ adf_simple_if_init $rp2 192.0.2.18/28
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ adf_vrf_prepare
+ adf_forwarding_enable
+
+ h1_create
+ switch_create
+
+ vrp2_create
+}
+
+vxlan_header_bytes()
+{
+ local vni=$1; shift
+ local -a extra_bits=("$@")
+ local -a bits
+ local i
+
+ for ((i=0; i < 64; i++)); do
+ bits[i]=0
+ done
+
+ # Bit 4 is the I flag and is always on.
+ bits[4]=1
+
+ for i in ${extra_bits[@]}; do
+ bits[i]=1
+ done
+
+ # Bits 32..55 carry the VNI
+ local mask=0x800000
+ for ((i=0; i < 24; i++)); do
+ bits[$((i + 32))]=$(((vni & mask) != 0))
+ ((mask >>= 1))
+ done
+
+ local bytes
+ for ((i=0; i < 8; i++)); do
+ local byte=0
+ local j
+ for ((j=0; j < 8; j++)); do
+ local bit=${bits[8 * i + j]}
+ ((byte += bit << (7 - j)))
+ done
+ bytes+=$(printf %02x $byte):
+ done
+
+ echo ${bytes%:}
+}
+
+neg_bytes()
+{
+ local bytes=$1; shift
+
+ local -A neg=([0]=f [1]=e [2]=d [3]=c [4]=b [5]=a [6]=9 [7]=8
+ [8]=7 [9]=6 [a]=5 [b]=4 [c]=3 [d]=2 [e]=1 [f]=0 [:]=:)
+ local out
+ local i
+
+ for ((i=0; i < ${#bytes}; i++)); do
+ local c=${bytes:$i:1}
+ out+=${neg[$c]}
+ done
+ echo $out
+}
+
+vxlan_ping_do()
+{
+ local count=$1; shift
+ local dev=$1; shift
+ local next_hop_mac=$1; shift
+ local dest_ip=$1; shift
+ local dest_mac=$1; shift
+ local vni=$1; shift
+ local reserved_bits=$1; shift
+
+ local vxlan_header=$(vxlan_header_bytes $vni $reserved_bits)
+
+ $MZ $dev -c $count -d 100msec -q \
+ -b $next_hop_mac -B $dest_ip \
+ -t udp sp=23456,dp=$VXPORT,p=$(:
+ )"$vxlan_header:"$( : VXLAN
+ )"$dest_mac:"$( : ETH daddr
+ )"00:11:22:33:44:55:"$( : ETH saddr
+ )"08:00:"$( : ETH type
+ )"45:"$( : IP version + IHL
+ )"00:"$( : IP TOS
+ )"00:54:"$( : IP total length
+ )"99:83:"$( : IP identification
+ )"40:00:"$( : IP flags + frag off
+ )"40:"$( : IP TTL
+ )"01:"$( : IP proto
+ )"00:00:"$( : IP header csum
+ )"$(ipv4_to_bytes 192.0.2.3):"$( : IP saddr
+ )"$(ipv4_to_bytes 192.0.2.1):"$( : IP daddr
+ )"08:"$( : ICMP type
+ )"00:"$( : ICMP code
+ )"8b:f2:"$( : ICMP csum
+ )"1f:6a:"$( : ICMP request identifier
+ )"00:01:"$( : ICMP request seq. number
+ )"4f:ff:c5:5b:00:00:00:00:"$( : ICMP payload
+ )"6d:74:0b:00:00:00:00:00:"$( :
+ )"10:11:12:13:14:15:16:17:"$( :
+ )"18:19:1a:1b:1c:1d:1e:1f:"$( :
+ )"20:21:22:23:24:25:26:27:"$( :
+ )"28:29:2a:2b:2c:2d:2e:2f:"$( :
+ )"30:31:32:33:34:35:36:37"
+}
+
+vxlan_device_add()
+{
+ adf_ip_link_add vx1 up type vxlan id 1000 \
+ local 192.0.2.17 dstport "$VXPORT" \
+ nolearning noudpcsum tos inherit ttl 100 "$@"
+ adf_ip_link_set_master vx1 br1
+}
+
+vxlan_all_reserved_bits()
+{
+ local i
+
+ for ((i=0; i < 64; i++)); do
+ if ((i == 4 || i >= 32 && i < 56)); then
+ continue
+ fi
+ echo $i
+ done
+}
+
+vxlan_ping_vanilla()
+{
+ vxlan_ping_do 10 $rp2 $(mac_get $rp1) 192.0.2.17 $(mac_get $h1) 1000
+}
+
+vxlan_ping_reserved()
+{
+ for bit in $(vxlan_all_reserved_bits); do
+ vxlan_ping_do 1 $rp2 $(mac_get $rp1) \
+ 192.0.2.17 $(mac_get $h1) 1000 "$bit"
+ ((n++))
+ done
+}
+
+vxlan_ping_test()
+{
+ local what=$1; shift
+ local get_stat=$1; shift
+ local expect=$1; shift
+
+ RET=0
+
+ local t0=$($get_stat)
+
+ "$@"
+ check_err $? "Failure when running $@"
+
+ local t1=$($get_stat)
+ local delta=$((t1 - t0))
+
+ ((expect == delta))
+ check_err $? "Expected to capture $expect packets, got $delta."
+
+ log_test "$what"
+}
+
+__default_test_do()
+{
+ local n_allowed_bits=$1; shift
+ local what=$1; shift
+
+ vxlan_ping_test "$what: clean packets" \
+ "tc_rule_stats_get $h1 77 ingress" \
+ 10 vxlan_ping_vanilla
+
+ local t0=$(link_stats_get vx1 rx errors)
+ vxlan_ping_test "$what: mangled packets" \
+ "tc_rule_stats_get $h1 77 ingress" \
+ $n_allowed_bits vxlan_ping_reserved
+ local t1=$(link_stats_get vx1 rx errors)
+
+ RET=0
+ local expect=$((39 - n_allowed_bits))
+ local delta=$((t1 - t0))
+ ((expect == delta))
+ check_err $? "Expected $expect error packets, got $delta."
+ log_test "$what: drops reported"
+}
+
+default_test_do()
+{
+ vxlan_device_add
+ __default_test_do 0 "Default"
+}
+
+default_test()
+{
+ in_defer_scope \
+ default_test_do
+}
+
+plain_test_do()
+{
+ vxlan_device_add reserved_bits 0xf7ffffff000000ff
+ __default_test_do 0 "reserved_bits 0xf7ffffff000000ff"
+}
+
+plain_test()
+{
+ in_defer_scope \
+ plain_test_do
+}
+
+reserved_test()
+{
+ local bit=$1; shift
+
+ local allowed_bytes=$(vxlan_header_bytes 0xffffff $bit)
+ local reserved_bytes=$(neg_bytes $allowed_bytes)
+ local reserved_bits=${reserved_bytes//:/}
+
+ vxlan_device_add reserved_bits 0x$reserved_bits
+ __default_test_do 1 "reserved_bits 0x$reserved_bits"
+}
+
+reserved_0_test()
+{
+ in_defer_scope \
+ reserved_test 0
+}
+
+reserved_10_test()
+{
+ in_defer_scope \
+ reserved_test 10
+}
+
+reserved_31_test()
+{
+ in_defer_scope \
+ reserved_test 31
+}
+
+reserved_56_test()
+{
+ in_defer_scope \
+ reserved_test 56
+}
+
+reserved_63_test()
+{
+ in_defer_scope \
+ reserved_test 63
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh
new file mode 100755
index 000000000000..48eb999a3120
--- /dev/null
+++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./lib.sh
+
+PAUSE_ON_FAIL="no"
+
+# The trap function handler
+#
+exit_cleanup_all()
+{
+ cleanup_all_ns
+
+ exit "${EXIT_STATUS}"
+}
+
+# Add fake IPv4 and IPv6 networks on the loopback device, to be used as
+# underlay by future GRE devices.
+#
+setup_basenet()
+{
+ ip -netns "${NS0}" link set dev lo up
+ ip -netns "${NS0}" address add dev lo 192.0.2.10/24
+ ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad
+}
+
+# Check the IPv6 configuration of a network device.
+#
+# We currently check the generation of the link-local IPv6 address and the
+# creation of the ff00::/8 multicast route.
+#
+# Parameters:
+#
+# * $1: The network device to test
+# * $2: An extra regular expression that should be matched (to verify the
+# presence of extra attributes)
+# * $3: The expected return code from grep (to allow checking the absence of
+# a link-local address)
+# * $4: The user visible name for the scenario being tested
+#
+check_ipv6_device_config()
+{
+ local DEV="$1"
+ local EXTRA_MATCH="$2"
+ local XRET="$3"
+ local MSG="$4"
+
+ RET=0
+ set +e
+ ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}"
+ check_err_fail "${XRET}" $? "IPv6 link-local address generation"
+
+ ip -netns "${NS0}" -6 route show table local type multicast ff00::/8 proto kernel | grep -q "${DEV}"
+ check_err_fail 0 $? "IPv6 multicast route creation"
+
+ log_test "${MSG}"
+ set -e
+}
+
+# Create a GRE device and verify that it gets an IPv6 link-local address as
+# expected.
+#
+# Parameters:
+#
+# * $1: The device type (gre, ip6gre, gretap or ip6gretap)
+# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any")
+# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any")
+# * $4: The IPv6 interface identifier generation mode to use for the GRE
+# device (eui64, none, stable-privacy or random).
+#
+test_gre_device()
+{
+ local GRE_TYPE="$1"
+ local LOCAL_IP="$2"
+ local REMOTE_IP="$3"
+ local MODE="$4"
+ local ADDR_GEN_MODE
+ local MATCH_REGEXP
+ local MSG
+
+ ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}"
+
+ case "${MODE}" in
+ "eui64")
+ ADDR_GEN_MODE=0
+ MATCH_REGEXP=""
+ MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=0
+ ;;
+ "none")
+ ADDR_GEN_MODE=1
+ MATCH_REGEXP=""
+ MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=1 # No link-local address should be generated
+ ;;
+ "stable-privacy")
+ ADDR_GEN_MODE=2
+ MATCH_REGEXP="stable-privacy"
+ MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=0
+ # Initialise stable_secret (required for stable-privacy mode)
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd"
+ ;;
+ "random")
+ ADDR_GEN_MODE=3
+ MATCH_REGEXP="stable-privacy"
+ MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}"
+ XRET=0
+ ;;
+ esac
+
+ # Check the IPv6 device configuration when it goes up
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}"
+ ip -netns "${NS0}" link set dev gretest up
+ check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}"
+
+ # Now disable link-local address generation
+ ip -netns "${NS0}" link set dev gretest down
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1
+ ip -netns "${NS0}" link set dev gretest up
+
+ # Check the IPv6 device configuration when link-local address
+ # generation is re-enabled while the device is already up
+ ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}"
+ check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}"
+
+ ip -netns "${NS0}" link del dev gretest
+}
+
+test_gre4()
+{
+ local GRE_TYPE
+ local MODE
+
+ for GRE_TYPE in "gre" "gretap"; do
+ printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n"
+
+ for MODE in "eui64" "none" "stable-privacy" "random"; do
+ test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}"
+ done
+ done
+}
+
+test_gre6()
+{
+ local GRE_TYPE
+ local MODE
+
+ for GRE_TYPE in "ip6gre" "ip6gretap"; do
+ printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n"
+
+ for MODE in "eui64" "none" "stable-privacy" "random"; do
+ test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}"
+ test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}"
+ done
+ done
+}
+
+usage()
+{
+ echo "Usage: $0 [-p]"
+ exit 1
+}
+
+while getopts :p o
+do
+ case $o in
+ p) PAUSE_ON_FAIL="yes";;
+ *) usage;;
+ esac
+done
+
+setup_ns NS0
+
+set -e
+trap exit_cleanup_all EXIT
+
+setup_basenet
+
+test_gre4
+test_gre6
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
deleted file mode 100644
index b2184847e388..000000000000
--- a/tools/testing/selftests/net/gro.c
+++ /dev/null
@@ -1,1328 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This testsuite provides conformance testing for GRO coalescing.
- *
- * Test cases:
- * 1.data
- * Data packets of the same size and same header setup with correct
- * sequence numbers coalesce. The one exception being the last data
- * packet coalesced: it can be smaller than the rest and coalesced
- * as long as it is in the same flow.
- * 2.ack
- * Pure ACK does not coalesce.
- * 3.flags
- * Specific test cases: no packets with PSH, SYN, URG, RST set will
- * be coalesced.
- * 4.tcp
- * Packets with incorrect checksum, non-consecutive seqno and
- * different TCP header options shouldn't coalesce. Nit: given that
- * some extension headers have paddings, such as timestamp, headers
- * that are padding differently would not be coalesced.
- * 5.ip:
- * Packets with different (ECN, TTL, TOS) header, ip options or
- * ip fragments (ipv6) shouldn't coalesce.
- * 6.large:
- * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
- *
- * MSS is defined as 4096 - header because if it is too small
- * (i.e. 1500 MTU - header), it will result in many packets,
- * increasing the "large" test case's flakiness. This is because
- * due to time sensitivity in the coalescing window, the receiver
- * may not coalesce all of the packets.
- *
- * Note the timing issue applies to all of the test cases, so some
- * flakiness is to be expected.
- *
- */
-
-#define _GNU_SOURCE
-
-#include <arpa/inet.h>
-#include <errno.h>
-#include <error.h>
-#include <getopt.h>
-#include <linux/filter.h>
-#include <linux/if_packet.h>
-#include <linux/ipv6.h>
-#include <net/ethernet.h>
-#include <net/if.h>
-#include <netinet/in.h>
-#include <netinet/ip.h>
-#include <netinet/ip6.h>
-#include <netinet/tcp.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdio.h>
-#include <stdarg.h>
-#include <string.h>
-#include <unistd.h>
-
-#include "../kselftest.h"
-
-#define DPORT 8000
-#define SPORT 1500
-#define PAYLOAD_LEN 100
-#define NUM_PACKETS 4
-#define START_SEQ 100
-#define START_ACK 100
-#define ETH_P_NONE 0
-#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
-#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
-#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
-#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
-#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
-#define MIN_EXTHDR_SIZE 8
-#define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00"
-#define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11"
-
-#define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
-
-static const char *addr6_src = "fdaa::2";
-static const char *addr6_dst = "fdaa::1";
-static const char *addr4_src = "192.168.1.200";
-static const char *addr4_dst = "192.168.1.100";
-static int proto = -1;
-static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
-static char *testname = "data";
-static char *ifname = "eth0";
-static char *smac = "aa:00:00:00:00:02";
-static char *dmac = "aa:00:00:00:00:01";
-static bool verbose;
-static bool tx_socket = true;
-static int tcp_offset = -1;
-static int total_hdr_len = -1;
-static int ethhdr_proto = -1;
-static const int num_flush_id_cases = 6;
-
-static void vlog(const char *fmt, ...)
-{
- va_list args;
-
- if (verbose) {
- va_start(args, fmt);
- vfprintf(stderr, fmt, args);
- va_end(args);
- }
-}
-
-static void setup_sock_filter(int fd)
-{
- const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
- const int ethproto_off = offsetof(struct ethhdr, h_proto);
- int optlen = 0;
- int ipproto_off, opt_ipproto_off;
- int next_off;
-
- if (proto == PF_INET)
- next_off = offsetof(struct iphdr, protocol);
- else
- next_off = offsetof(struct ipv6hdr, nexthdr);
- ipproto_off = ETH_HLEN + next_off;
-
- /* Overridden later if exthdrs are used: */
- opt_ipproto_off = ipproto_off;
-
- if (strcmp(testname, "ip") == 0) {
- if (proto == PF_INET)
- optlen = sizeof(struct ip_timestamp);
- else {
- BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE);
- BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE);
- BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE);
-
- /* same size for HBH and Fragment extension header types */
- optlen = MIN_EXTHDR_SIZE;
- opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr)
- + offsetof(struct ip6_ext, ip6e_nxt);
- }
- }
-
- /* this filter validates the following:
- * - packet is IPv4/IPv6 according to the running test.
- * - packet is TCP. Also handles the case of one extension header and then TCP.
- * - checks the packet tcp dport equals to DPORT. Also handles the case of one
- * extension header and then TCP.
- */
- struct sock_filter filter[] = {
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9),
- BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0),
- BPF_STMT(BPF_LD + BPF_B + BPF_ABS, opt_ipproto_off),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
- BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
- BPF_STMT(BPF_RET + BPF_K, 0),
- };
-
- struct sock_fprog bpf = {
- .len = ARRAY_SIZE(filter),
- .filter = filter,
- };
-
- if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
- error(1, errno, "error setting filter");
-}
-
-static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
-{
- uint16_t *words = data;
- int i;
-
- for (i = 0; i < len / 2; i++)
- sum += words[i];
- if (len & 1)
- sum += ((char *)data)[len - 1];
- return sum;
-}
-
-static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
-{
- sum = checksum_nofold(data, len, sum);
- while (sum > 0xFFFF)
- sum = (sum & 0xFFFF) + (sum >> 16);
- return ~sum;
-}
-
-static uint16_t tcp_checksum(void *buf, int payload_len)
-{
- struct pseudo_header6 {
- struct in6_addr saddr;
- struct in6_addr daddr;
- uint16_t protocol;
- uint16_t payload_len;
- } ph6;
- struct pseudo_header4 {
- struct in_addr saddr;
- struct in_addr daddr;
- uint16_t protocol;
- uint16_t payload_len;
- } ph4;
- uint32_t sum = 0;
-
- if (proto == PF_INET6) {
- if (inet_pton(AF_INET6, addr6_src, &ph6.saddr) != 1)
- error(1, errno, "inet_pton6 source ip pseudo");
- if (inet_pton(AF_INET6, addr6_dst, &ph6.daddr) != 1)
- error(1, errno, "inet_pton6 dest ip pseudo");
- ph6.protocol = htons(IPPROTO_TCP);
- ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
-
- sum = checksum_nofold(&ph6, sizeof(ph6), 0);
- } else if (proto == PF_INET) {
- if (inet_pton(AF_INET, addr4_src, &ph4.saddr) != 1)
- error(1, errno, "inet_pton source ip pseudo");
- if (inet_pton(AF_INET, addr4_dst, &ph4.daddr) != 1)
- error(1, errno, "inet_pton dest ip pseudo");
- ph4.protocol = htons(IPPROTO_TCP);
- ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
-
- sum = checksum_nofold(&ph4, sizeof(ph4), 0);
- }
-
- return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
-}
-
-static void read_MAC(uint8_t *mac_addr, char *mac)
-{
- if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
- &mac_addr[0], &mac_addr[1], &mac_addr[2],
- &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
- error(1, 0, "sscanf");
-}
-
-static void fill_datalinklayer(void *buf)
-{
- struct ethhdr *eth = buf;
-
- memcpy(eth->h_dest, dst_mac, ETH_ALEN);
- memcpy(eth->h_source, src_mac, ETH_ALEN);
- eth->h_proto = ethhdr_proto;
-}
-
-static void fill_networklayer(void *buf, int payload_len)
-{
- struct ipv6hdr *ip6h = buf;
- struct iphdr *iph = buf;
-
- if (proto == PF_INET6) {
- memset(ip6h, 0, sizeof(*ip6h));
-
- ip6h->version = 6;
- ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
- ip6h->nexthdr = IPPROTO_TCP;
- ip6h->hop_limit = 8;
- if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1)
- error(1, errno, "inet_pton source ip6");
- if (inet_pton(AF_INET6, addr6_dst, &ip6h->daddr) != 1)
- error(1, errno, "inet_pton dest ip6");
- } else if (proto == PF_INET) {
- memset(iph, 0, sizeof(*iph));
-
- iph->version = 4;
- iph->ihl = 5;
- iph->ttl = 8;
- iph->protocol = IPPROTO_TCP;
- iph->tot_len = htons(sizeof(struct tcphdr) +
- payload_len + sizeof(struct iphdr));
- iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
- if (inet_pton(AF_INET, addr4_src, &iph->saddr) != 1)
- error(1, errno, "inet_pton source ip");
- if (inet_pton(AF_INET, addr4_dst, &iph->daddr) != 1)
- error(1, errno, "inet_pton dest ip");
- iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
- }
-}
-
-static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
- int payload_len, int fin)
-{
- struct tcphdr *tcph = buf;
-
- memset(tcph, 0, sizeof(*tcph));
-
- tcph->source = htons(SPORT);
- tcph->dest = htons(DPORT);
- tcph->seq = ntohl(START_SEQ + seq_offset);
- tcph->ack_seq = ntohl(START_ACK + ack_offset);
- tcph->ack = 1;
- tcph->fin = fin;
- tcph->doff = 5;
- tcph->window = htons(TCP_MAXWIN);
- tcph->urg_ptr = 0;
- tcph->check = tcp_checksum(tcph, payload_len);
-}
-
-static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
-{
- int ret = -1;
-
- ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
- if (ret == -1)
- error(1, errno, "sendto failure");
- if (ret != len)
- error(1, errno, "sendto wrong length");
-}
-
-static void create_packet(void *buf, int seq_offset, int ack_offset,
- int payload_len, int fin)
-{
- memset(buf, 0, total_hdr_len);
- memset(buf + total_hdr_len, 'a', payload_len);
- fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
- payload_len, fin);
- fill_networklayer(buf + ETH_HLEN, payload_len);
- fill_datalinklayer(buf);
-}
-
-/* send one extra flag, not first and not last pkt */
-static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
- int rst, int urg)
-{
- static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- int payload_len, pkt_size, flag, i;
- struct tcphdr *tcph;
-
- payload_len = PAYLOAD_LEN * psh;
- pkt_size = total_hdr_len + payload_len;
- flag = NUM_PACKETS / 2;
-
- create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
-
- tcph = (struct tcphdr *)(flag_buf + tcp_offset);
- tcph->psh = psh;
- tcph->syn = syn;
- tcph->rst = rst;
- tcph->urg = urg;
- tcph->check = 0;
- tcph->check = tcp_checksum(tcph, payload_len);
-
- for (i = 0; i < NUM_PACKETS + 1; i++) {
- if (i == flag) {
- write_packet(fd, flag_buf, pkt_size, daddr);
- continue;
- }
- create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
- }
-}
-
-/* Test for data of same length, smaller than previous
- * and of different lengths
- */
-static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
- int payload_len1, int payload_len2)
-{
- static char buf[ETH_HLEN + IP_MAXPACKET];
-
- create_packet(buf, 0, 0, payload_len1, 0);
- write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
- create_packet(buf, payload_len1, 0, payload_len2, 0);
- write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
-}
-
-/* If incoming segments make tracked segment length exceed
- * legal IP datagram length, do not coalesce
- */
-static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
-{
- static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
- static char last[TOTAL_HDR_LEN + MSS];
- static char new_seg[TOTAL_HDR_LEN + MSS];
- int i;
-
- for (i = 0; i < NUM_LARGE_PKT; i++)
- create_packet(pkts[i], i * MSS, 0, MSS, 0);
- create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
- create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
-
- for (i = 0; i < NUM_LARGE_PKT; i++)
- write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
- write_packet(fd, last, total_hdr_len + remainder, daddr);
- write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
-}
-
-/* Pure acks and dup acks don't coalesce */
-static void send_ack(int fd, struct sockaddr_ll *daddr)
-{
- static char buf[MAX_HDR_LEN];
-
- create_packet(buf, 0, 0, 0, 0);
- write_packet(fd, buf, total_hdr_len, daddr);
- write_packet(fd, buf, total_hdr_len, daddr);
- create_packet(buf, 0, 1, 0, 0);
- write_packet(fd, buf, total_hdr_len, daddr);
-}
-
-static void recompute_packet(char *buf, char *no_ext, int extlen)
-{
- struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
- struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
- struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
-
- memmove(buf, no_ext, total_hdr_len);
- memmove(buf + total_hdr_len + extlen,
- no_ext + total_hdr_len, PAYLOAD_LEN);
-
- tcphdr->doff = tcphdr->doff + (extlen / 4);
- tcphdr->check = 0;
- tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
- if (proto == PF_INET) {
- iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
- iph->check = 0;
- iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
- } else {
- ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
- }
-}
-
-static void tcp_write_options(char *buf, int kind, int ts)
-{
- struct tcp_option_ts {
- uint8_t kind;
- uint8_t len;
- uint32_t tsval;
- uint32_t tsecr;
- } *opt_ts = (void *)buf;
- struct tcp_option_window {
- uint8_t kind;
- uint8_t len;
- uint8_t shift;
- } *opt_window = (void *)buf;
-
- switch (kind) {
- case TCPOPT_NOP:
- buf[0] = TCPOPT_NOP;
- break;
- case TCPOPT_WINDOW:
- memset(opt_window, 0, sizeof(struct tcp_option_window));
- opt_window->kind = TCPOPT_WINDOW;
- opt_window->len = TCPOLEN_WINDOW;
- opt_window->shift = 0;
- break;
- case TCPOPT_TIMESTAMP:
- memset(opt_ts, 0, sizeof(struct tcp_option_ts));
- opt_ts->kind = TCPOPT_TIMESTAMP;
- opt_ts->len = TCPOLEN_TIMESTAMP;
- opt_ts->tsval = ts;
- opt_ts->tsecr = 0;
- break;
- default:
- error(1, 0, "unimplemented TCP option");
- break;
- }
-}
-
-/* TCP with options is always a permutation of {TS, NOP, NOP}.
- * Implement different orders to verify coalescing stops.
- */
-static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
-{
- switch (order) {
- case 0:
- tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
- tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
- tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
- TCPOPT_TIMESTAMP, ts);
- break;
- case 1:
- tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
- tcp_write_options(buf + total_hdr_len + 1,
- TCPOPT_TIMESTAMP, ts);
- tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
- TCPOPT_NOP, 0);
- break;
- case 2:
- tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
- tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
- TCPOPT_NOP, 0);
- tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
- TCPOPT_NOP, 0);
- break;
- default:
- error(1, 0, "unknown order");
- break;
- }
- recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
-}
-
-/* Packets with invalid checksum don't coalesce. */
-static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
-{
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
- int pkt_size = total_hdr_len + PAYLOAD_LEN;
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- tcph->check = tcph->check - 1;
- write_packet(fd, buf, pkt_size, daddr);
-}
-
- /* Packets with non-consecutive sequence number don't coalesce.*/
-static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
-{
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
- int pkt_size = total_hdr_len + PAYLOAD_LEN;
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- tcph->seq = ntohl(htonl(tcph->seq) + 1);
- tcph->check = 0;
- tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
- write_packet(fd, buf, pkt_size, daddr);
-}
-
- /* Packet with different timestamp option or different timestamps
- * don't coalesce.
- */
-static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
-{
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
- int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- add_standard_tcp_options(extpkt, buf, 0, 0);
- write_packet(fd, extpkt, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- add_standard_tcp_options(extpkt, buf, 0, 0);
- write_packet(fd, extpkt, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
- add_standard_tcp_options(extpkt, buf, 100, 0);
- write_packet(fd, extpkt, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
- add_standard_tcp_options(extpkt, buf, 100, 1);
- write_packet(fd, extpkt, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
- add_standard_tcp_options(extpkt, buf, 100, 2);
- write_packet(fd, extpkt, pkt_size, daddr);
-}
-
-/* Packet with different tcp options don't coalesce. */
-static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
-{
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
- static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
- int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
- int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- add_standard_tcp_options(extpkt1, buf, 0, 0);
- write_packet(fd, extpkt1, extpkt1_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- add_standard_tcp_options(extpkt1, buf, 0, 0);
- write_packet(fd, extpkt1, extpkt1_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
- tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
- tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
- recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
- write_packet(fd, extpkt2, extpkt2_size, daddr);
-}
-
-static void add_ipv4_ts_option(void *buf, void *optpkt)
-{
- struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
- int optlen = sizeof(struct ip_timestamp);
- struct iphdr *iph;
-
- if (optlen % 4)
- error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
-
- ts->ipt_code = IPOPT_TS;
- ts->ipt_len = optlen;
- ts->ipt_ptr = 5;
- ts->ipt_flg = IPOPT_TS_TSONLY;
-
- memcpy(optpkt, buf, tcp_offset);
- memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
- sizeof(struct tcphdr) + PAYLOAD_LEN);
-
- iph = (struct iphdr *)(optpkt + ETH_HLEN);
- iph->ihl = 5 + (optlen / 4);
- iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
- iph->check = 0;
- iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
-}
-
-static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext_payload)
-{
- struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset);
- struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN);
- char *exthdr_payload_start = (char *)(exthdr + 1);
-
- exthdr->hdrlen = 0;
- exthdr->nexthdr = IPPROTO_TCP;
-
- memcpy(exthdr_payload_start, ext_payload, MIN_EXTHDR_SIZE - sizeof(*exthdr));
-
- memcpy(optpkt, buf, tcp_offset);
- memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset,
- sizeof(struct tcphdr) + PAYLOAD_LEN);
-
- iph->nexthdr = exthdr_type;
- iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE);
-}
-
-static void fix_ip4_checksum(struct iphdr *iph)
-{
- iph->check = 0;
- iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
-}
-
-static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
-{
- static char buf1[MAX_HDR_LEN + PAYLOAD_LEN];
- static char buf2[MAX_HDR_LEN + PAYLOAD_LEN];
- static char buf3[MAX_HDR_LEN + PAYLOAD_LEN];
- bool send_three = false;
- struct iphdr *iph1;
- struct iphdr *iph2;
- struct iphdr *iph3;
-
- iph1 = (struct iphdr *)(buf1 + ETH_HLEN);
- iph2 = (struct iphdr *)(buf2 + ETH_HLEN);
- iph3 = (struct iphdr *)(buf3 + ETH_HLEN);
-
- create_packet(buf1, 0, 0, PAYLOAD_LEN, 0);
- create_packet(buf2, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
-
- switch (tcase) {
- case 0: /* DF=1, Incrementing - should coalesce */
- iph1->frag_off |= htons(IP_DF);
- iph1->id = htons(8);
-
- iph2->frag_off |= htons(IP_DF);
- iph2->id = htons(9);
- break;
-
- case 1: /* DF=1, Fixed - should coalesce */
- iph1->frag_off |= htons(IP_DF);
- iph1->id = htons(8);
-
- iph2->frag_off |= htons(IP_DF);
- iph2->id = htons(8);
- break;
-
- case 2: /* DF=0, Incrementing - should coalesce */
- iph1->frag_off &= ~htons(IP_DF);
- iph1->id = htons(8);
-
- iph2->frag_off &= ~htons(IP_DF);
- iph2->id = htons(9);
- break;
-
- case 3: /* DF=0, Fixed - should not coalesce */
- iph1->frag_off &= ~htons(IP_DF);
- iph1->id = htons(8);
-
- iph2->frag_off &= ~htons(IP_DF);
- iph2->id = htons(8);
- break;
-
- case 4: /* DF=1, two packets incrementing, and one fixed - should
- * coalesce only the first two packets
- */
- iph1->frag_off |= htons(IP_DF);
- iph1->id = htons(8);
-
- iph2->frag_off |= htons(IP_DF);
- iph2->id = htons(9);
-
- iph3->frag_off |= htons(IP_DF);
- iph3->id = htons(9);
- send_three = true;
- break;
-
- case 5: /* DF=1, two packets fixed, and one incrementing - should
- * coalesce only the first two packets
- */
- iph1->frag_off |= htons(IP_DF);
- iph1->id = htons(8);
-
- iph2->frag_off |= htons(IP_DF);
- iph2->id = htons(8);
-
- iph3->frag_off |= htons(IP_DF);
- iph3->id = htons(9);
- send_three = true;
- break;
- }
-
- fix_ip4_checksum(iph1);
- fix_ip4_checksum(iph2);
- write_packet(fd, buf1, total_hdr_len + PAYLOAD_LEN, daddr);
- write_packet(fd, buf2, total_hdr_len + PAYLOAD_LEN, daddr);
-
- if (send_three) {
- fix_ip4_checksum(iph3);
- write_packet(fd, buf3, total_hdr_len + PAYLOAD_LEN, daddr);
- }
-}
-
-static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt)
-{
- for (int i = 0; i < num_flush_id_cases; i++) {
- sleep(1);
- send_flush_id_case(fd, daddr, i);
- sleep(1);
- write_packet(fd, fin_pkt, total_hdr_len, daddr);
- }
-}
-
-static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2)
-{
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1);
- write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
-
- create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
- add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2);
- write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
-}
-
-/* IPv4 options shouldn't coalesce */
-static void send_ip_options(int fd, struct sockaddr_ll *daddr)
-{
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
- int optlen = sizeof(struct ip_timestamp);
- int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
-
- create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
- add_ipv4_ts_option(buf, optpkt);
- write_packet(fd, optpkt, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
-}
-
-/* IPv4 fragments shouldn't coalesce */
-static void send_fragment4(int fd, struct sockaddr_ll *daddr)
-{
- static char buf[IP_MAXPACKET];
- struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
- int pkt_size = total_hdr_len + PAYLOAD_LEN;
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, pkt_size, daddr);
-
- /* Once fragmented, packet would retain the total_len.
- * Tcp header is prepared as if rest of data is in follow-up frags,
- * but follow up frags aren't actually sent.
- */
- memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
- fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
- fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN);
- fill_datalinklayer(buf);
-
- iph->frag_off = htons(0x6000); // DF = 1, MF = 1
- iph->check = 0;
- iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
- write_packet(fd, buf, pkt_size, daddr);
-}
-
-/* IPv4 packets with different ttl don't coalesce.*/
-static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
-{
- int pkt_size = total_hdr_len + PAYLOAD_LEN;
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- iph->ttl = 7;
- iph->check = 0;
- iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
- write_packet(fd, buf, pkt_size, daddr);
-}
-
-/* Packets with different tos don't coalesce.*/
-static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
-{
- int pkt_size = total_hdr_len + PAYLOAD_LEN;
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
- struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- if (proto == PF_INET) {
- iph->tos = 1;
- iph->check = 0;
- iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
- } else if (proto == PF_INET6) {
- ip6h->priority = 0xf;
- }
- write_packet(fd, buf, pkt_size, daddr);
-}
-
-/* Packets with different ECN don't coalesce.*/
-static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
-{
- int pkt_size = total_hdr_len + PAYLOAD_LEN;
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
-
- create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, pkt_size, daddr);
-
- create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
- if (proto == PF_INET) {
- buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
- iph->check = 0;
- iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
- } else {
- buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
- }
- write_packet(fd, buf, pkt_size, daddr);
-}
-
-/* IPv6 fragments and packets with extensions don't coalesce.*/
-static void send_fragment6(int fd, struct sockaddr_ll *daddr)
-{
- static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
- static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
- sizeof(struct ip6_frag)];
- struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
- struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
- int extlen = sizeof(struct ip6_frag);
- int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
- int extpkt_len = bufpkt_len + extlen;
- int i;
-
- for (i = 0; i < 2; i++) {
- create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, bufpkt_len, daddr);
- }
- sleep(1);
- create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
- memset(extpkt, 0, extpkt_len);
-
- ip6h->nexthdr = IPPROTO_FRAGMENT;
- ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
- frag->ip6f_nxt = IPPROTO_TCP;
-
- memcpy(extpkt, buf, tcp_offset);
- memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
- sizeof(struct tcphdr) + PAYLOAD_LEN);
- write_packet(fd, extpkt, extpkt_len, daddr);
-
- create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
- write_packet(fd, buf, bufpkt_len, daddr);
-}
-
-static void bind_packetsocket(int fd)
-{
- struct sockaddr_ll daddr = {};
-
- daddr.sll_family = AF_PACKET;
- daddr.sll_protocol = ethhdr_proto;
- daddr.sll_ifindex = if_nametoindex(ifname);
- if (daddr.sll_ifindex == 0)
- error(1, errno, "if_nametoindex");
-
- if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
- error(1, errno, "could not bind socket");
-}
-
-static void set_timeout(int fd)
-{
- struct timeval timeout;
-
- timeout.tv_sec = 3;
- timeout.tv_usec = 0;
- if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
- sizeof(timeout)) < 0)
- error(1, errno, "cannot set timeout, setsockopt failed");
-}
-
-static void check_recv_pkts(int fd, int *correct_payload,
- int correct_num_pkts)
-{
- static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
- struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
- struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
- struct tcphdr *tcph;
- bool bad_packet = false;
- int tcp_ext_len = 0;
- int ip_ext_len = 0;
- int pkt_size = -1;
- int data_len = 0;
- int num_pkt = 0;
- int i;
-
- vlog("Expected {");
- for (i = 0; i < correct_num_pkts; i++)
- vlog("%d ", correct_payload[i]);
- vlog("}, Total %d packets\nReceived {", correct_num_pkts);
-
- while (1) {
- ip_ext_len = 0;
- pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
- if (pkt_size < 0)
- error(1, errno, "could not receive");
-
- if (iph->version == 4)
- ip_ext_len = (iph->ihl - 5) * 4;
- else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
- ip_ext_len = MIN_EXTHDR_SIZE;
-
- tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
-
- if (tcph->fin)
- break;
-
- tcp_ext_len = (tcph->doff - 5) * 4;
- data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
- /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
- * Ipv4/tcp packets without at least 6 bytes of data will be padded.
- * Packet sockets are protocol agnostic, and will not trim the padding.
- */
- if (pkt_size == ETH_ZLEN && iph->version == 4) {
- data_len = ntohs(iph->tot_len)
- - sizeof(struct tcphdr) - sizeof(struct iphdr);
- }
- vlog("%d ", data_len);
- if (data_len != correct_payload[num_pkt]) {
- vlog("[!=%d]", correct_payload[num_pkt]);
- bad_packet = true;
- }
- num_pkt++;
- }
- vlog("}, Total %d packets.\n", num_pkt);
- if (num_pkt != correct_num_pkts)
- error(1, 0, "incorrect number of packets");
- if (bad_packet)
- error(1, 0, "incorrect packet geometry");
-
- printf("Test succeeded\n\n");
-}
-
-static void gro_sender(void)
-{
- static char fin_pkt[MAX_HDR_LEN];
- struct sockaddr_ll daddr = {};
- int txfd = -1;
-
- txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
- if (txfd < 0)
- error(1, errno, "socket creation");
-
- memset(&daddr, 0, sizeof(daddr));
- daddr.sll_ifindex = if_nametoindex(ifname);
- if (daddr.sll_ifindex == 0)
- error(1, errno, "if_nametoindex");
- daddr.sll_family = AF_PACKET;
- memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
- daddr.sll_halen = ETH_ALEN;
- create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
-
- if (strcmp(testname, "data") == 0) {
- send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- } else if (strcmp(testname, "ack") == 0) {
- send_ack(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- } else if (strcmp(testname, "flags") == 0) {
- send_flags(txfd, &daddr, 1, 0, 0, 0);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_flags(txfd, &daddr, 0, 1, 0, 0);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_flags(txfd, &daddr, 0, 0, 1, 0);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_flags(txfd, &daddr, 0, 0, 0, 1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- } else if (strcmp(testname, "tcp") == 0) {
- send_changed_checksum(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_changed_seq(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_changed_ts(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_diff_opt(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- } else if (strcmp(testname, "ip") == 0) {
- send_changed_ECN(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_changed_tos(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- if (proto == PF_INET) {
- /* Modified packets may be received out of order.
- * Sleep function added to enforce test boundaries
- * so that fin pkts are not received prior to other pkts.
- */
- sleep(1);
- send_changed_ttl(txfd, &daddr);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- sleep(1);
- send_ip_options(txfd, &daddr);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- sleep(1);
- send_fragment4(txfd, &daddr);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- test_flush_id(txfd, &daddr, fin_pkt);
- } else if (proto == PF_INET6) {
- sleep(1);
- send_fragment6(txfd, &daddr);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- sleep(1);
- /* send IPv6 packets with ext header with same payload */
- send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- sleep(1);
- /* send IPv6 packets with ext header with different payload */
- send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2);
- sleep(1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- }
- } else if (strcmp(testname, "large") == 0) {
- /* 20 is the difference between min iphdr size
- * and min ipv6hdr size. Like MAX_HDR_SIZE,
- * MAX_PAYLOAD is defined with the larger header of the two.
- */
- int offset = proto == PF_INET ? 20 : 0;
- int remainder = (MAX_PAYLOAD + offset) % MSS;
-
- send_large(txfd, &daddr, remainder);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
-
- send_large(txfd, &daddr, remainder + 1);
- write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
- } else {
- error(1, 0, "Unknown testcase");
- }
-
- if (close(txfd))
- error(1, errno, "socket close");
-}
-
-static void gro_receiver(void)
-{
- static int correct_payload[NUM_PACKETS];
- int rxfd = -1;
-
- rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
- if (rxfd < 0)
- error(1, 0, "socket creation");
- setup_sock_filter(rxfd);
- set_timeout(rxfd);
- bind_packetsocket(rxfd);
-
- memset(correct_payload, 0, sizeof(correct_payload));
-
- if (strcmp(testname, "data") == 0) {
- printf("pure data packet of same size: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("large data packets followed by a smaller one: ");
- correct_payload[0] = PAYLOAD_LEN * 1.5;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("small data packets followed by a larger one: ");
- correct_payload[0] = PAYLOAD_LEN / 2;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
- } else if (strcmp(testname, "ack") == 0) {
- printf("duplicate ack and pure ack: ");
- check_recv_pkts(rxfd, correct_payload, 3);
- } else if (strcmp(testname, "flags") == 0) {
- correct_payload[0] = PAYLOAD_LEN * 3;
- correct_payload[1] = PAYLOAD_LEN * 2;
-
- printf("psh flag ends coalescing: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- correct_payload[0] = PAYLOAD_LEN * 2;
- correct_payload[1] = 0;
- correct_payload[2] = PAYLOAD_LEN * 2;
- printf("syn flag ends coalescing: ");
- check_recv_pkts(rxfd, correct_payload, 3);
-
- printf("rst flag ends coalescing: ");
- check_recv_pkts(rxfd, correct_payload, 3);
-
- printf("urg flag ends coalescing: ");
- check_recv_pkts(rxfd, correct_payload, 3);
- } else if (strcmp(testname, "tcp") == 0) {
- correct_payload[0] = PAYLOAD_LEN;
- correct_payload[1] = PAYLOAD_LEN;
- correct_payload[2] = PAYLOAD_LEN;
- correct_payload[3] = PAYLOAD_LEN;
-
- printf("changed checksum does not coalesce: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("Wrong Seq number doesn't coalesce: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("Different timestamp doesn't coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 4);
-
- printf("Different options doesn't coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 2);
- } else if (strcmp(testname, "ip") == 0) {
- correct_payload[0] = PAYLOAD_LEN;
- correct_payload[1] = PAYLOAD_LEN;
-
- printf("different ECN doesn't coalesce: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("different tos doesn't coalesce: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- if (proto == PF_INET) {
- printf("different ttl doesn't coalesce: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("ip options doesn't coalesce: ");
- correct_payload[2] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 3);
-
- printf("fragmented ip4 doesn't coalesce: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- /* is_atomic checks */
- printf("DF=1, Incrementing - should coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("DF=1, Fixed - should coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("DF=0, Incrementing - should coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("DF=0, Fixed - should not coalesce: ");
- correct_payload[0] = PAYLOAD_LEN;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
-
- printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
- } else if (proto == PF_INET6) {
- /* GRO doesn't check for ipv6 hop limit when flushing.
- * Hence no corresponding test to the ipv4 case.
- */
- printf("fragmented ip6 doesn't coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- correct_payload[1] = PAYLOAD_LEN;
- correct_payload[2] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 3);
-
- printf("ipv6 with ext header does coalesce: ");
- correct_payload[0] = PAYLOAD_LEN * 2;
- check_recv_pkts(rxfd, correct_payload, 1);
-
- printf("ipv6 with ext header with different payloads doesn't coalesce: ");
- correct_payload[0] = PAYLOAD_LEN;
- correct_payload[1] = PAYLOAD_LEN;
- check_recv_pkts(rxfd, correct_payload, 2);
- }
- } else if (strcmp(testname, "large") == 0) {
- int offset = proto == PF_INET ? 20 : 0;
- int remainder = (MAX_PAYLOAD + offset) % MSS;
-
- correct_payload[0] = (MAX_PAYLOAD + offset);
- correct_payload[1] = remainder;
- printf("Shouldn't coalesce if exceed IP max pkt size: ");
- check_recv_pkts(rxfd, correct_payload, 2);
-
- /* last segment sent individually, doesn't start new segment */
- correct_payload[0] = correct_payload[0] - remainder;
- correct_payload[1] = remainder + 1;
- correct_payload[2] = remainder + 1;
- check_recv_pkts(rxfd, correct_payload, 3);
- } else {
- error(1, 0, "Test case error, should never trigger");
- }
-
- if (close(rxfd))
- error(1, 0, "socket close");
-}
-
-static void parse_args(int argc, char **argv)
-{
- static const struct option opts[] = {
- { "daddr", required_argument, NULL, 'd' },
- { "dmac", required_argument, NULL, 'D' },
- { "iface", required_argument, NULL, 'i' },
- { "ipv4", no_argument, NULL, '4' },
- { "ipv6", no_argument, NULL, '6' },
- { "rx", no_argument, NULL, 'r' },
- { "saddr", required_argument, NULL, 's' },
- { "smac", required_argument, NULL, 'S' },
- { "test", required_argument, NULL, 't' },
- { "verbose", no_argument, NULL, 'v' },
- { 0, 0, 0, 0 }
- };
- int c;
-
- while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) {
- switch (c) {
- case '4':
- proto = PF_INET;
- ethhdr_proto = htons(ETH_P_IP);
- break;
- case '6':
- proto = PF_INET6;
- ethhdr_proto = htons(ETH_P_IPV6);
- break;
- case 'd':
- addr4_dst = addr6_dst = optarg;
- break;
- case 'D':
- dmac = optarg;
- break;
- case 'i':
- ifname = optarg;
- break;
- case 'r':
- tx_socket = false;
- break;
- case 's':
- addr4_src = addr6_src = optarg;
- break;
- case 'S':
- smac = optarg;
- break;
- case 't':
- testname = optarg;
- break;
- case 'v':
- verbose = true;
- break;
- default:
- error(1, 0, "%s invalid option %c\n", __func__, c);
- break;
- }
- }
-}
-
-int main(int argc, char **argv)
-{
- parse_args(argc, argv);
-
- if (proto == PF_INET) {
- tcp_offset = ETH_HLEN + sizeof(struct iphdr);
- total_hdr_len = tcp_offset + sizeof(struct tcphdr);
- } else if (proto == PF_INET6) {
- tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
- total_hdr_len = MAX_HDR_LEN;
- } else {
- error(1, 0, "Protocol family is not ipv4 or ipv6");
- }
-
- read_MAC(src_mac, smac);
- read_MAC(dst_mac, dmac);
-
- if (tx_socket)
- gro_sender();
- else
- gro_receiver();
-
- fprintf(stderr, "Gro::%s test passed.\n", testname);
- return 0;
-}
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
deleted file mode 100755
index 02c21ff4ca81..000000000000
--- a/tools/testing/selftests/net/gro.sh
+++ /dev/null
@@ -1,104 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-readonly SERVER_MAC="aa:00:00:00:00:02"
-readonly CLIENT_MAC="aa:00:00:00:00:01"
-readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large")
-readonly PROTOS=("ipv4" "ipv6")
-dev=""
-test="all"
-proto="ipv4"
-
-run_test() {
- local server_pid=0
- local exit_code=0
- local protocol=$1
- local test=$2
- local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \
- "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
-
- setup_ns
- # Each test is run 3 times to deflake, because given the receive timing,
- # not all packets that should coalesce will be considered in the same flow
- # on every try.
- for tries in {1..3}; do
- # Actual test starts here
- ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
- 1>>log.txt &
- server_pid=$!
- sleep 0.5 # to allow for socket init
- ip netns exec $client_ns ./gro "${ARGS[@]}" "--iface" "client" \
- 1>>log.txt
- wait "${server_pid}"
- exit_code=$?
- if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \
- ${exit_code} -ne 0 ]]; then
- echo "Ignoring errors due to slow environment" 1>&2
- exit_code=0
- fi
- if [[ "${exit_code}" -eq 0 ]]; then
- break;
- fi
- done
- cleanup_ns
- echo ${exit_code}
-}
-
-run_all_tests() {
- local failed_tests=()
- for proto in "${PROTOS[@]}"; do
- for test in "${TESTS[@]}"; do
- echo "running test ${proto} ${test}" >&2
- exit_code=$(run_test $proto $test)
- if [[ "${exit_code}" -ne 0 ]]; then
- failed_tests+=("${proto}_${test}")
- fi;
- done;
- done
- if [[ ${#failed_tests[@]} -ne 0 ]]; then
- echo "failed tests: ${failed_tests[*]}. \
- Please see log.txt for more logs"
- exit 1
- else
- echo "All Tests Succeeded!"
- fi;
-}
-
-usage() {
- echo "Usage: $0 \
- [-i <DEV>] \
- [-t data|ack|flags|tcp|ip|large] \
- [-p <ipv4|ipv6>]" 1>&2;
- exit 1;
-}
-
-while getopts "i:t:p:" opt; do
- case "${opt}" in
- i)
- dev="${OPTARG}"
- ;;
- t)
- test="${OPTARG}"
- ;;
- p)
- proto="${OPTARG}"
- ;;
- *)
- usage
- ;;
- esac
-done
-
-if [ -n "$dev" ]; then
- source setup_loopback.sh
-else
- source setup_veth.sh
-fi
-
-setup
-trap cleanup EXIT
-if [[ "${test}" == "all" ]]; then
- run_all_tests
-else
- run_test "${proto}" "${test}"
-fi;
diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile
index 884cd2cc0681..4b6afc0fe9f8 100644
--- a/tools/testing/selftests/net/hsr/Makefile
+++ b/tools/testing/selftests/net/hsr/Makefile
@@ -2,7 +2,11 @@
top_srcdir = ../../../../..
-TEST_PROGS := hsr_ping.sh hsr_redbox.sh
+TEST_PROGS := \
+ hsr_ping.sh \
+ hsr_redbox.sh \
+# end of TEST_PROGS
+
TEST_FILES += hsr_common.sh
include ../../lib.mk
diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config
index 555a868743f0..205cc4d3d64b 100644
--- a/tools/testing/selftests/net/hsr/config
+++ b/tools/testing/selftests/net/hsr/config
@@ -1,6 +1,6 @@
+CONFIG_BRIDGE=y
+CONFIG_HSR=y
CONFIG_IPV6=y
CONFIG_NET_SCH_NETEM=m
-CONFIG_HSR=y
CONFIG_VETH=y
-CONFIG_BRIDGE=y
CONFIG_VLAN_8021Q=m
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index d6f0e449c029..b13c89a99ecb 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -178,8 +178,6 @@ setup()
else
ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
- ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0
- ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0
ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
index 76e604e4810e..7bfeeb133705 100644
--- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
@@ -106,14 +106,14 @@ static void do_tx(int domain, int type, int protocol)
ret = io_uring_queue_init(512, &ring, 0);
if (ret)
- error(1, ret, "io_uring: queue init");
+ error(1, -ret, "io_uring: queue init");
iov.iov_base = payload;
iov.iov_len = cfg_payload_len;
ret = io_uring_register_buffers(&ring, &iov, 1);
if (ret)
- error(1, ret, "io_uring: buffer registration");
+ error(1, -ret, "io_uring: buffer registration");
tstop = gettimeofday_ms() + cfg_runtime_ms;
do {
@@ -149,24 +149,24 @@ static void do_tx(int domain, int type, int protocol)
ret = io_uring_submit(&ring);
if (ret != cfg_nr_reqs)
- error(1, ret, "submit");
+ error(1, -ret, "submit");
if (cfg_cork)
do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
for (i = 0; i < cfg_nr_reqs; i++) {
ret = io_uring_wait_cqe(&ring, &cqe);
if (ret)
- error(1, ret, "wait cqe");
+ error(1, -ret, "wait cqe");
if (cqe->user_data != NONZC_TAG &&
cqe->user_data != ZC_TAG)
- error(1, -EINVAL, "invalid cqe->user_data");
+ error(1, EINVAL, "invalid cqe->user_data");
if (cqe->flags & IORING_CQE_F_NOTIF) {
if (cqe->flags & IORING_CQE_F_MORE)
- error(1, -EINVAL, "invalid notif flags");
+ error(1, EINVAL, "invalid notif flags");
if (compl_cqes <= 0)
- error(1, -EINVAL, "notification mismatch");
+ error(1, EINVAL, "notification mismatch");
compl_cqes--;
i--;
io_uring_cqe_seen(&ring);
@@ -174,14 +174,14 @@ static void do_tx(int domain, int type, int protocol)
}
if (cqe->flags & IORING_CQE_F_MORE) {
if (cqe->user_data != ZC_TAG)
- error(1, cqe->res, "unexpected F_MORE");
+ error(1, -cqe->res, "unexpected F_MORE");
compl_cqes++;
}
if (cqe->res >= 0) {
packets++;
bytes += cqe->res;
} else if (cqe->res != -EAGAIN) {
- error(1, cqe->res, "send failed");
+ error(1, -cqe->res, "send failed");
}
io_uring_cqe_seen(&ring);
}
@@ -190,11 +190,11 @@ static void do_tx(int domain, int type, int protocol)
while (compl_cqes) {
ret = io_uring_wait_cqe(&ring, &cqe);
if (ret)
- error(1, ret, "wait cqe");
+ error(1, -ret, "wait cqe");
if (cqe->flags & IORING_CQE_F_MORE)
- error(1, -EINVAL, "invalid notif flags");
+ error(1, EINVAL, "invalid notif flags");
if (!(cqe->flags & IORING_CQE_F_NOTIF))
- error(1, -EINVAL, "missing notif flag");
+ error(1, EINVAL, "missing notif flag");
io_uring_cqe_seen(&ring);
compl_cqes--;
diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c
index 29451d2244b7..e6834a6cfc8f 100644
--- a/tools/testing/selftests/net/ip_local_port_range.c
+++ b/tools/testing/selftests/net/ip_local_port_range.c
@@ -10,7 +10,7 @@
#include <fcntl.h>
#include <netinet/ip.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#ifndef IP_LOCAL_PORT_RANGE
#define IP_LOCAL_PORT_RANGE 51
diff --git a/tools/testing/selftests/net/ip_local_port_range.sh b/tools/testing/selftests/net/ip_local_port_range.sh
index 6c6ad346eaa0..4ff746db1256 100755
--- a/tools/testing/selftests/net/ip_local_port_range.sh
+++ b/tools/testing/selftests/net/ip_local_port_range.sh
@@ -2,4 +2,6 @@
# SPDX-License-Identifier: GPL-2.0
./in_netns.sh \
- sh -c 'sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && ./ip_local_port_range'
+ sh -c 'sysctl -q -w net.mptcp.enabled=1 && \
+ sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && \
+ ./ip_local_port_range'
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index be4a30a0d02a..0ccf484b1d9d 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -34,7 +34,7 @@
#include <time.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#define printk(fmt, ...) \
ksft_print_msg("%d[%u] " fmt "\n", getpid(), __LINE__, ##__VA_ARGS__)
@@ -227,7 +227,8 @@ static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz,
attr->rta_len = RTA_LENGTH(size);
attr->rta_type = rta_type;
- memcpy(RTA_DATA(attr), payload, size);
+ if (payload)
+ memcpy(RTA_DATA(attr), payload, size);
return 0;
}
diff --git a/tools/testing/selftests/net/ipv6_force_forwarding.sh b/tools/testing/selftests/net/ipv6_force_forwarding.sh
new file mode 100755
index 000000000000..bf0243366caa
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_force_forwarding.sh
@@ -0,0 +1,105 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test IPv6 force_forwarding interface property
+#
+# This test verifies that the force_forwarding property works correctly:
+# - When global forwarding is disabled, packets are not forwarded normally
+# - When force_forwarding is enabled on an interface, packets are forwarded
+# regardless of the global forwarding setting
+
+source lib.sh
+
+cleanup() {
+ cleanup_ns $ns1 $ns2 $ns3
+}
+
+trap cleanup EXIT
+
+setup_test() {
+ # Create three namespaces: sender, router, receiver
+ setup_ns ns1 ns2 ns3
+
+ # Create veth pairs: ns1 <-> ns2 <-> ns3
+ ip link add name veth12 type veth peer name veth21
+ ip link add name veth23 type veth peer name veth32
+
+ # Move interfaces to namespaces
+ ip link set veth12 netns $ns1
+ ip link set veth21 netns $ns2
+ ip link set veth23 netns $ns2
+ ip link set veth32 netns $ns3
+
+ # Configure interfaces
+ ip -n $ns1 addr add 2001:db8:1::1/64 dev veth12 nodad
+ ip -n $ns2 addr add 2001:db8:1::2/64 dev veth21 nodad
+ ip -n $ns2 addr add 2001:db8:2::1/64 dev veth23 nodad
+ ip -n $ns3 addr add 2001:db8:2::2/64 dev veth32 nodad
+
+ # Bring up interfaces
+ ip -n $ns1 link set veth12 up
+ ip -n $ns2 link set veth21 up
+ ip -n $ns2 link set veth23 up
+ ip -n $ns3 link set veth32 up
+
+ # Add routes
+ ip -n $ns1 route add 2001:db8:2::/64 via 2001:db8:1::2
+ ip -n $ns3 route add 2001:db8:1::/64 via 2001:db8:2::1
+
+ # Disable global forwarding
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=0
+}
+
+test_force_forwarding() {
+ local ret=0
+
+ echo "TEST: force_forwarding functionality"
+
+ # Check if force_forwarding sysctl exists
+ if ! ip netns exec $ns2 test -f /proc/sys/net/ipv6/conf/veth21/force_forwarding; then
+ echo "SKIP: force_forwarding not available"
+ return $ksft_skip
+ fi
+
+ # Test 1: Without force_forwarding, ping should fail
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=0
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=0
+
+ if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then
+ echo "FAIL: ping succeeded when forwarding disabled"
+ ret=1
+ else
+ echo "PASS: forwarding disabled correctly"
+ fi
+
+ # Test 2: With force_forwarding enabled, ping should succeed
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=1
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=1
+
+ if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then
+ echo "PASS: force_forwarding enabled forwarding"
+ else
+ echo "FAIL: ping failed with force_forwarding enabled"
+ ret=1
+ fi
+
+ return $ret
+}
+
+echo "IPv6 force_forwarding test"
+echo "=========================="
+
+setup_test
+test_force_forwarding
+ret=$?
+
+if [ $ret -eq 0 ]; then
+ echo "OK"
+ exit 0
+elif [ $ret -eq $ksft_skip ]; then
+ echo "SKIP"
+ exit $ksft_skip
+else
+ echo "FAIL"
+ exit 1
+fi
diff --git a/tools/testing/selftests/net/ipv6_fragmentation.c b/tools/testing/selftests/net/ipv6_fragmentation.c
new file mode 100644
index 000000000000..672c9fe086a7
--- /dev/null
+++ b/tools/testing/selftests/net/ipv6_fragmentation.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author: Brett A C Sheffield <bacs@librecast.net>
+ *
+ * Kernel selftest for the IPv6 fragmentation regression which affected stable
+ * kernels:
+ *
+ * https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com
+ *
+ * Commit: a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable
+ * without some prerequisite commits.
+ *
+ * This caused a regression when sending IPv6 UDP packets by preventing
+ * fragmentation and instead returning -1 (EMSGSIZE).
+ *
+ * This selftest demonstrates the issue by sending an IPv6 UDP packet to
+ * localhost (::1) on the loopback interface from the autoconfigured link-local
+ * address.
+ *
+ * sendmsg(2) returns bytes sent correctly on a working kernel, and returns -1
+ * (EMSGSIZE) when the regression is present.
+ *
+ * The regression was not present in the mainline kernel, but add this test to
+ * catch similar breakage in future.
+ */
+
+#define _GNU_SOURCE
+
+#include <error.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <sched.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include "kselftest.h"
+
+#define MTU 1500
+#define LARGER_THAN_MTU 8192
+
+static void setup(void)
+{
+ struct ifreq ifr = {
+ .ifr_name = "lo"
+ };
+ int ctl;
+
+ /* we need to set MTU, so do this in a namespace to play nicely */
+ if (unshare(CLONE_NEWNET) == -1)
+ error(KSFT_FAIL, errno, "unshare");
+
+ ctl = socket(AF_LOCAL, SOCK_STREAM, 0);
+ if (ctl == -1)
+ error(KSFT_FAIL, errno, "socket");
+
+ /* ensure MTU is smaller than what we plan to send */
+ ifr.ifr_mtu = MTU;
+ if (ioctl(ctl, SIOCSIFMTU, &ifr) == -1)
+ error(KSFT_FAIL, errno, "ioctl: set MTU");
+
+ /* bring up interface */
+ if (ioctl(ctl, SIOCGIFFLAGS, &ifr) == -1)
+ error(KSFT_FAIL, errno, "ioctl SIOCGIFFLAGS");
+ ifr.ifr_flags = ifr.ifr_flags | IFF_UP;
+ if (ioctl(ctl, SIOCSIFFLAGS, &ifr) == -1)
+ error(KSFT_FAIL, errno, "ioctl: bring interface up");
+
+ if (close(ctl) == -1)
+ error(KSFT_FAIL, errno, "close");
+}
+
+int main(void)
+{
+ struct in6_addr addr = {
+ .s6_addr[15] = 0x01, /* ::1 */
+ };
+ struct sockaddr_in6 sa = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = addr,
+ .sin6_port = htons(9) /* port 9/udp (DISCARD) */
+ };
+ static char buf[LARGER_THAN_MTU] = {0};
+ struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf) };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_name = (struct sockaddr *)&sa,
+ .msg_namelen = sizeof(sa),
+ };
+ ssize_t rc;
+ int s;
+
+ printf("Testing IPv6 fragmentation\n");
+ setup();
+ s = socket(AF_INET6, SOCK_DGRAM, 0);
+send_again:
+ rc = sendmsg(s, &msg, 0);
+ if (rc == -1) {
+ /* if interface wasn't ready, try again */
+ if (errno == EADDRNOTAVAIL) {
+ usleep(1000);
+ goto send_again;
+ }
+ error(KSFT_FAIL, errno, "sendmsg");
+ } else if (rc != LARGER_THAN_MTU) {
+ error(KSFT_FAIL, errno, "sendmsg returned %zi, expected %i",
+ rc, LARGER_THAN_MTU);
+ }
+ printf("[PASS] sendmsg() returned %zi\n", rc);
+ if (close(s) == -1)
+ error(KSFT_FAIL, errno, "close");
+ return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
index a6b2b1f9c641..c6866e42f95c 100755
--- a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
+++ b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh
@@ -69,7 +69,6 @@
# which can affect the conditions needed to trigger a soft lockup.
source lib.sh
-source net_helper.sh
TEST_DURATION=300
ROUTING_TABLE_REFRESH_PERIOD=0.01
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 8994fec1c38f..f448bafb3f20 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -43,7 +43,7 @@ __ksft_status_merge()
weights[$i]=$((weight++))
done
- if [[ ${weights[$a]} > ${weights[$b]} ]]; then
+ if [[ ${weights[$a]} -ge ${weights[$b]} ]]; then
echo "$a"
return 0
else
@@ -217,11 +217,61 @@ setup_ns()
return $ksft_skip
fi
ip -n "${!ns_name}" link set lo up
+ ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ns_list+=("${!ns_name}")
done
NS_LIST+=("${ns_list[@]}")
}
+# Create netdevsim with given id and net namespace.
+create_netdevsim() {
+ local id="$1"
+ local ns="$2"
+
+ modprobe netdevsim &> /dev/null
+ udevadm settle
+
+ echo "$id 1" | ip netns exec $ns tee /sys/bus/netdevsim/new_device >/dev/null
+ local dev=$(ip netns exec $ns ls /sys/bus/netdevsim/devices/netdevsim$id/net)
+ ip -netns $ns link set dev $dev name nsim$id
+ ip -netns $ns link set dev nsim$id up
+
+ echo nsim$id
+}
+
+create_netdevsim_port() {
+ local nsim_id="$1"
+ local ns="$2"
+ local port_id="$3"
+ local perm_addr="$4"
+ local orig_dev
+ local new_dev
+ local nsim_path
+
+ nsim_path="/sys/bus/netdevsim/devices/netdevsim$nsim_id"
+
+ echo "$port_id $perm_addr" | ip netns exec "$ns" tee "$nsim_path"/new_port > /dev/null || return 1
+
+ orig_dev=$(ip netns exec "$ns" find "$nsim_path"/net/ -maxdepth 1 -name 'e*' | tail -n 1)
+ orig_dev=$(basename "$orig_dev")
+ new_dev="nsim${nsim_id}p$port_id"
+
+ ip -netns "$ns" link set dev "$orig_dev" name "$new_dev"
+ ip -netns "$ns" link set dev "$new_dev" up
+
+ echo "$new_dev"
+}
+
+# Remove netdevsim with given id.
+cleanup_netdevsim() {
+ local id="$1"
+
+ if [ -d "/sys/bus/netdevsim/devices/netdevsim$id/net" ]; then
+ echo "$id" > /sys/bus/netdevsim/del_device
+ fi
+}
+
tc_rule_stats_get()
{
local dev=$1; shift
@@ -245,6 +295,30 @@ tc_rule_handle_stats_get()
.options.actions[0].stats$selector"
}
+# attach a qdisc with two children match/no-match and a flower filter to match
+tc_set_flower_counter() {
+ local -r ns=$1
+ local -r ipver=$2
+ local -r dev=$3
+ local -r flower_expr=$4
+
+ tc -n $ns qdisc add dev $dev root handle 1: prio bands 2 \
+ priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+ tc -n $ns qdisc add dev $dev parent 1:1 handle 11: pfifo
+ tc -n $ns qdisc add dev $dev parent 1:2 handle 12: pfifo
+
+ tc -n $ns filter add dev $dev parent 1: protocol ipv$ipver \
+ flower $flower_expr classid 1:2
+}
+
+tc_get_flower_counter() {
+ local -r ns=$1
+ local -r dev=$2
+
+ tc -n $ns -j -s qdisc show dev $dev handle 12: | jq .[0].packets
+}
+
ret_set_ksft_status()
{
local ksft_status=$1; shift
@@ -261,7 +335,7 @@ log_test_result()
local test_name=$1; shift
local opt_str=$1; shift
local result=$1; shift
- local retmsg=$1; shift
+ local retmsg=$1
printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result"
if [[ $retmsg ]]; then
@@ -435,6 +509,13 @@ xfail_on_veth()
fi
}
+mac_get()
+{
+ local if_name=$1
+
+ ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
kill_process()
{
local pid=$1; shift
@@ -443,19 +524,148 @@ kill_process()
{ kill $pid && wait $pid; } 2>/dev/null
}
-ip_link_add()
+check_command()
+{
+ local cmd=$1; shift
+
+ if [[ ! -x "$(command -v "$cmd")" ]]; then
+ log_test_skip "$cmd not installed"
+ return $EXIT_STATUS
+ fi
+}
+
+require_command()
+{
+ local cmd=$1; shift
+
+ if ! check_command "$cmd"; then
+ exit $EXIT_STATUS
+ fi
+}
+
+adf_ip_link_add()
{
local name=$1; shift
- ip link add name "$name" "$@"
- defer ip link del dev "$name"
+ ip link add name "$name" "$@" && \
+ defer ip link del dev "$name"
}
-ip_link_master()
+adf_ip_link_set_master()
{
local member=$1; shift
local master=$1; shift
- ip link set dev "$member" master "$master"
- defer ip link set dev "$member" nomaster
+ ip link set dev "$member" master "$master" && \
+ defer ip link set dev "$member" nomaster
+}
+
+adf_ip_link_set_addr()
+{
+ local name=$1; shift
+ local addr=$1; shift
+
+ local old_addr=$(mac_get "$name")
+ ip link set dev "$name" address "$addr" && \
+ defer ip link set dev "$name" address "$old_addr"
+}
+
+ip_link_has_flag()
+{
+ local name=$1; shift
+ local flag=$1; shift
+
+ local state=$(ip -j link show "$name" |
+ jq --arg flag "$flag" 'any(.[].flags.[]; . == $flag)')
+ [[ $state == true ]]
+}
+
+ip_link_is_up()
+{
+ ip_link_has_flag "$1" UP
+}
+
+adf_ip_link_set_up()
+{
+ local name=$1; shift
+
+ if ! ip_link_is_up "$name"; then
+ ip link set dev "$name" up && \
+ defer ip link set dev "$name" down
+ fi
+}
+
+adf_ip_link_set_down()
+{
+ local name=$1; shift
+
+ if ip_link_is_up "$name"; then
+ ip link set dev "$name" down && \
+ defer ip link set dev "$name" up
+ fi
+}
+
+adf_ip_addr_add()
+{
+ local name=$1; shift
+
+ ip addr add dev "$name" "$@" && \
+ defer ip addr del dev "$name" "$@"
+}
+
+adf_ip_route_add()
+{
+ ip route add "$@" && \
+ defer ip route del "$@"
+}
+
+adf_bridge_vlan_add()
+{
+ bridge vlan add "$@" && \
+ defer bridge vlan del "$@"
+}
+
+wait_local_port_listen()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+ local protocol="${3}"
+ local pattern
+ local i
+
+ pattern=":$(printf "%04X" "${port}") "
+
+ # for tcp protocol additionally check the socket state
+ [ ${protocol} = "tcp" ] && pattern="${pattern}0A"
+ for i in $(seq 10); do
+ if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \
+ /proc/net/"${protocol}"* | grep -q "${pattern}"; then
+ break
+ fi
+ sleep 0.1
+ done
+}
+
+cmd_jq()
+{
+ local cmd=$1
+ local jq_exp=$2
+ local jq_opts=$3
+ local ret
+ local output
+
+ output="$($cmd)"
+ # it the command fails, return error right away
+ ret=$?
+ if [[ $ret -ne 0 ]]; then
+ return $ret
+ fi
+ output=$(echo $output | jq -r $jq_opts "$jq_exp")
+ ret=$?
+ if [[ $ret -ne 0 ]]; then
+ return $ret
+ fi
+ echo $output
+ # return success only in case of non-empty output
+ [ ! -z "$output" ]
}
diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore
index 1ebc6187f421..bbc97d6bf556 100644
--- a/tools/testing/selftests/net/lib/.gitignore
+++ b/tools/testing/selftests/net/lib/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
csum
+xdp_helper
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
index 18b9443454a9..5339f56329e1 100644
--- a/tools/testing/selftests/net/lib/Makefile
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -1,15 +1,24 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES)
# Additional include paths needed by kselftest.h
CFLAGS += -I../../
-TEST_FILES := ../../../../../Documentation/netlink/specs
-TEST_FILES += ../../../../net/ynl
+TEST_FILES := \
+ ../../../../net/ynl \
+ ../../../../../Documentation/netlink/specs \
+ ksft_setup_loopback.sh \
+# end of TEST_FILES
-TEST_GEN_FILES += csum
+TEST_GEN_FILES := \
+ $(patsubst %.c,%.o,$(wildcard *.bpf.c)) \
+ csum \
+ xdp_helper \
+# end of TEST_GEN_FILES
TEST_INCLUDES := $(wildcard py/*.py sh/*.sh)
include ../../lib.mk
+
+include ../bpf.mk
diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h
new file mode 100644
index 000000000000..17dc34a612c6
--- /dev/null
+++ b/tools/testing/selftests/net/lib/ksft.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(__NET_KSFT_H__)
+#define __NET_KSFT_H__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static inline void ksft_ready(void)
+{
+ const char msg[7] = "ready\n";
+ char *env_str;
+ int fd;
+
+ env_str = getenv("KSFT_READY_FD");
+ if (env_str) {
+ fd = atoi(env_str);
+ if (!fd) {
+ fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n",
+ env_str);
+ return;
+ }
+ } else {
+ fd = STDOUT_FILENO;
+ }
+
+ write(fd, msg, sizeof(msg));
+ if (fd != STDOUT_FILENO)
+ close(fd);
+}
+
+static inline void ksft_wait(void)
+{
+ char *env_str;
+ char byte;
+ int fd;
+
+ env_str = getenv("KSFT_WAIT_FD");
+ if (env_str) {
+ fd = atoi(env_str);
+ if (!fd) {
+ fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n",
+ env_str);
+ return;
+ }
+ } else {
+ /* Not running in KSFT env, wait for input from STDIN instead */
+ fd = STDIN_FILENO;
+ }
+
+ read(fd, &byte, sizeof(byte));
+ if (fd != STDIN_FILENO)
+ close(fd);
+}
+
+#endif
diff --git a/tools/testing/selftests/net/lib/ksft_setup_loopback.sh b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh
new file mode 100755
index 000000000000..3defbb1919c5
--- /dev/null
+++ b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Setup script for running ksft tests over a real interface in loopback mode.
+# This scripts replaces the historical setup_loopback.sh. It puts
+# a (presumably) real hardware interface into loopback mode, creates macvlan
+# interfaces on top and places them in a network namespace for isolation.
+#
+# NETIF env variable must be exported to indicate the real target device.
+# Note that the test will override NETIF with one of the macvlans, the
+# actual ksft test will only see the macvlans.
+#
+# Example use:
+# export NETIF=eth0
+# ./net/lib/ksft_setup_loopback.sh ./drivers/net/gro.py
+
+if [ -z "$NETIF" ]; then
+ echo "Error: NETIF variable not set"
+ exit 1
+fi
+if ! [ -d "/sys/class/net/$NETIF" ]; then
+ echo "Error: Can't find $NETIF, invalid netdevice"
+ exit 1
+fi
+
+# Save original settings for cleanup
+readonly FLUSH_PATH="/sys/class/net/${NETIF}/gro_flush_timeout"
+readonly IRQ_PATH="/sys/class/net/${NETIF}/napi_defer_hard_irqs"
+FLUSH_TIMEOUT="$(< "${FLUSH_PATH}")"
+readonly FLUSH_TIMEOUT
+HARD_IRQS="$(< "${IRQ_PATH}")"
+readonly HARD_IRQS
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+readonly SERVER_NS
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+readonly CLIENT_NS
+readonly SERVER_MAC="aa:00:00:00:00:02"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+
+# ksft expects addresses to communicate with remote
+export LOCAL_V6=2001:db8:1::1
+export REMOTE_V6=2001:db8:1::2
+
+cleanup() {
+ local exit_code=$?
+
+ echo "Cleaning up..."
+
+ # Remove macvlan interfaces and namespaces
+ ip -netns "${SERVER_NS}" link del dev server 2>/dev/null || true
+ ip netns del "${SERVER_NS}" 2>/dev/null || true
+ ip -netns "${CLIENT_NS}" link del dev client 2>/dev/null || true
+ ip netns del "${CLIENT_NS}" 2>/dev/null || true
+
+ # Disable loopback
+ ethtool -K "${NETIF}" loopback off 2>/dev/null || true
+ sleep 1
+
+ echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
+ echo "${HARD_IRQS}" >"${IRQ_PATH}"
+
+ exit $exit_code
+}
+
+trap cleanup EXIT INT TERM
+
+# Enable loopback mode
+echo "Enabling loopback on ${NETIF}..."
+ethtool -K "${NETIF}" loopback on || {
+ echo "Failed to enable loopback mode"
+ exit 1
+}
+# The interface may need time to get carrier back, but selftests
+# will wait for carrier, so no need to wait / sleep here.
+
+# Use timer on host to trigger the network stack
+# Also disable device interrupt to not depend on NIC interrupt
+# Reduce test flakiness caused by unexpected interrupts
+echo 100000 >"${FLUSH_PATH}"
+echo 50 >"${IRQ_PATH}"
+
+# Create server namespace with macvlan
+ip netns add "${SERVER_NS}"
+ip link add link "${NETIF}" dev server address "${SERVER_MAC}" type macvlan
+ip link set dev server netns "${SERVER_NS}"
+ip -netns "${SERVER_NS}" link set dev server up
+ip -netns "${SERVER_NS}" addr add $LOCAL_V6/64 dev server
+ip -netns "${SERVER_NS}" link set dev lo up
+
+# Create client namespace with macvlan
+ip netns add "${CLIENT_NS}"
+ip link add link "${NETIF}" dev client address "${CLIENT_MAC}" type macvlan
+ip link set dev client netns "${CLIENT_NS}"
+ip -netns "${CLIENT_NS}" link set dev client up
+ip -netns "${CLIENT_NS}" addr add $REMOTE_V6/64 dev client
+ip -netns "${CLIENT_NS}" link set dev lo up
+
+echo "Setup complete!"
+echo " Device: ${NETIF}"
+echo " Server NS: ${SERVER_NS}"
+echo " Client NS: ${CLIENT_NS}"
+echo ""
+
+# Setup environment variables for tests
+export NETIF=server
+export REMOTE_TYPE=netns
+export REMOTE_ARGS="${CLIENT_NS}"
+
+# Run the command
+ip netns exec "${SERVER_NS}" "$@"
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
index 54d8f5eba810..40f9ce307dd1 100644
--- a/tools/testing/selftests/net/lib/py/__init__.py
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -1,9 +1,33 @@
# SPDX-License-Identifier: GPL-2.0
+"""
+Python selftest helpers for netdev.
+"""
+
from .consts import KSRC
-from .ksft import *
-from .netns import NetNS
-from .nsim import *
-from .utils import *
-from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily
-from .ynl import NetshaperFamily
+from .ksft import KsftFailEx, KsftSkipEx, KsftXfailEx, ksft_pr, ksft_eq, \
+ ksft_ne, ksft_true, ksft_not_none, ksft_in, ksft_not_in, ksft_is, \
+ ksft_ge, ksft_gt, ksft_lt, ksft_raises, ksft_busy_wait, \
+ ktap_result, ksft_disruptive, ksft_setup, ksft_run, ksft_exit, \
+ ksft_variants, KsftNamedVariant
+from .netns import NetNS, NetNSEnter
+from .nsim import NetdevSim, NetdevSimDev
+from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \
+ bpftool, ip, ethtool, bpftrace, rand_port, wait_port_listen, wait_file
+from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily
+from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily
+
+__all__ = ["KSRC",
+ "KsftFailEx", "KsftSkipEx", "KsftXfailEx", "ksft_pr", "ksft_eq",
+ "ksft_ne", "ksft_true", "ksft_not_none", "ksft_in", "ksft_not_in",
+ "ksft_is", "ksft_ge", "ksft_gt", "ksft_lt", "ksft_raises",
+ "ksft_busy_wait", "ktap_result", "ksft_disruptive", "ksft_setup",
+ "ksft_run", "ksft_exit", "ksft_variants", "KsftNamedVariant",
+ "NetNS", "NetNSEnter",
+ "CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer",
+ "bpftool", "ip", "ethtool", "bpftrace", "rand_port",
+ "wait_port_listen", "wait_file",
+ "NetdevSim", "NetdevSimDev",
+ "NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError",
+ "YnlFamily", "EthtoolFamily", "NetdevFamily", "RtnlFamily",
+ "RtnlAddrFamily"]
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 477ae76de93d..531e7fa1b3ea 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -1,11 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
-import builtins
import functools
import inspect
+import signal
import sys
import time
import traceback
+from collections import namedtuple
from .consts import KSFT_MAIN_NAME
from .utils import global_defer_queue
@@ -26,7 +27,12 @@ class KsftXfailEx(Exception):
pass
+class KsftTerminate(KeyboardInterrupt):
+ pass
+
+
def ksft_pr(*objs, **kwargs):
+ kwargs["flush"] = True
print("#", *objs, **kwargs)
@@ -66,16 +72,36 @@ def ksft_true(a, comment=""):
_fail("Check failed", a, "does not eval to True", comment)
+def ksft_not_none(a, comment=""):
+ if a is None:
+ _fail("Check failed", a, "is None", comment)
+
+
def ksft_in(a, b, comment=""):
if a not in b:
_fail("Check failed", a, "not in", b, comment)
+def ksft_not_in(a, b, comment=""):
+ if a in b:
+ _fail("Check failed", a, "in", b, comment)
+
+
+def ksft_is(a, b, comment=""):
+ if a is not b:
+ _fail("Check failed", a, "is not", b, comment)
+
+
def ksft_ge(a, b, comment=""):
if a < b:
_fail("Check failed", a, "<", b, comment)
+def ksft_gt(a, b, comment=""):
+ if a <= b:
+ _fail("Check failed", a, "<=", b, comment)
+
+
def ksft_lt(a, b, comment=""):
if a >= b:
_fail("Check failed", a, ">=", b, comment)
@@ -110,7 +136,7 @@ def ksft_busy_wait(cond, sleep=0.005, deadline=1, comment=""):
time.sleep(sleep)
-def ktap_result(ok, cnt=1, case="", comment=""):
+def ktap_result(ok, cnt=1, case_name="", comment=""):
global KSFT_RESULT_ALL
KSFT_RESULT_ALL = KSFT_RESULT_ALL and ok
@@ -120,11 +146,11 @@ def ktap_result(ok, cnt=1, case="", comment=""):
res += "ok "
res += str(cnt) + " "
res += KSFT_MAIN_NAME
- if case:
- res += "." + str(case.__name__)
+ if case_name:
+ res += "." + case_name
if comment:
res += " # " + comment
- print(res)
+ print(res, flush=True)
def ksft_flush_defer():
@@ -137,7 +163,7 @@ def ksft_flush_defer():
entry = global_defer_queue.pop()
try:
entry.exec_only()
- except:
+ except Exception:
ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!")
tb = traceback.format_exc()
for line in tb.strip().split('\n'):
@@ -145,6 +171,10 @@ def ksft_flush_defer():
KSFT_RESULT = False
+KsftCaseFunction = namedtuple("KsftCaseFunction",
+ ['name', 'original_func', 'variants'])
+
+
def ksft_disruptive(func):
"""
Decorator that marks the test as disruptive (e.g. the test
@@ -155,11 +185,47 @@ def ksft_disruptive(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
if not KSFT_DISRUPTIVE:
- raise KsftSkipEx(f"marked as disruptive")
+ raise KsftSkipEx("marked as disruptive")
return func(*args, **kwargs)
return wrapper
+class KsftNamedVariant:
+ """ Named string name + argument list tuple for @ksft_variants """
+
+ def __init__(self, name, *params):
+ self.params = params
+ self.name = name or "_".join([str(x) for x in self.params])
+
+
+def ksft_variants(params):
+ """
+ Decorator defining the sets of inputs for a test.
+ The parameters will be included in the name of the resulting sub-case.
+ Parameters can be either single object, tuple or a KsftNamedVariant.
+ The argument can be a list or a generator.
+
+ Example:
+
+ @ksft_variants([
+ (1, "a"),
+ (2, "b"),
+ KsftNamedVariant("three", 3, "c"),
+ ])
+ def my_case(cfg, a, b):
+ pass # ...
+
+ ksft_run(cases=[my_case], args=(cfg, ))
+
+ Will generate cases:
+ my_case.1_a
+ my_case.2_b
+ my_case.three
+ """
+
+ return lambda func: KsftCaseFunction(func.__name__, func, params)
+
+
def ksft_setup(env):
"""
Setup test framework global state from the environment.
@@ -173,7 +239,7 @@ def ksft_setup(env):
return False
try:
return bool(int(value))
- except:
+ except Exception:
raise Exception(f"failed to parse {name}")
if "DISRUPTIVE" in env:
@@ -183,9 +249,24 @@ def ksft_setup(env):
return env
-def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
+def _ksft_intr(signum, frame):
+ # ksft runner.sh sends 2 SIGTERMs in a row on a timeout
+ # if we don't ignore the second one it will stop us from handling cleanup
+ global term_cnt
+ term_cnt += 1
+ if term_cnt == 1:
+ raise KsftTerminate()
+ else:
+ ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...")
+
+
+def _ksft_generate_test_cases(cases, globs, case_pfx, args):
+ """Generate a flat list of (func, args, name) tuples"""
+
cases = cases or []
+ test_cases = []
+ # If using the globs method find all relevant functions
if globs and case_pfx:
for key, value in globs.items():
if not callable(value):
@@ -195,22 +276,47 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
cases.append(value)
break
+ for func in cases:
+ if isinstance(func, KsftCaseFunction):
+ # Parametrized test - create case for each param
+ for param in func.variants:
+ if not isinstance(param, KsftNamedVariant):
+ if not isinstance(param, tuple):
+ param = (param, )
+ param = KsftNamedVariant(None, *param)
+
+ test_cases.append((func.original_func,
+ (*args, *param.params),
+ func.name + "." + param.name))
+ else:
+ test_cases.append((func, args, func.__name__))
+
+ return test_cases
+
+
+def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
+ test_cases = _ksft_generate_test_cases(cases, globs, case_pfx, args)
+
+ global term_cnt
+ term_cnt = 0
+ prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr)
+
totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
- print("KTAP version 1")
- print("1.." + str(len(cases)))
+ print("TAP version 13", flush=True)
+ print("1.." + str(len(test_cases)), flush=True)
global KSFT_RESULT
cnt = 0
stop = False
- for case in cases:
+ for func, args, name in test_cases:
KSFT_RESULT = True
cnt += 1
comment = ""
cnt_key = ""
try:
- case(*args)
+ func(*args)
except KsftSkipEx as e:
comment = "SKIP " + str(e)
cnt_key = 'skip'
@@ -223,21 +329,37 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
for line in tb.strip().split('\n'):
ksft_pr("Exception|", line)
if stop:
- ksft_pr("Stopping tests due to KeyboardInterrupt.")
+ ksft_pr(f"Stopping tests due to {type(e).__name__}.")
KSFT_RESULT = False
cnt_key = 'fail'
- ksft_flush_defer()
+ try:
+ ksft_flush_defer()
+ except BaseException as e:
+ tb = traceback.format_exc()
+ for line in tb.strip().split('\n'):
+ ksft_pr("Exception|", line)
+ if isinstance(e, KeyboardInterrupt):
+ ksft_pr()
+ ksft_pr("WARN: defer() interrupted, cleanup may be incomplete.")
+ ksft_pr(" Attempting to finish cleanup before exiting.")
+ ksft_pr(" Interrupt again to exit immediately.")
+ ksft_pr()
+ stop = True
+ # Flush was interrupted, try to finish the job best we can
+ ksft_flush_defer()
if not cnt_key:
cnt_key = 'pass' if KSFT_RESULT else 'fail'
- ktap_result(KSFT_RESULT, cnt, case, comment=comment)
+ ktap_result(KSFT_RESULT, cnt, name, comment=comment)
totals[cnt_key] += 1
if stop:
break
+ signal.signal(signal.SIGTERM, prev_sigterm)
+
print(
f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0"
)
diff --git a/tools/testing/selftests/net/lib/py/netns.py b/tools/testing/selftests/net/lib/py/netns.py
index ecff85f9074f..8e9317044eef 100644
--- a/tools/testing/selftests/net/lib/py/netns.py
+++ b/tools/testing/selftests/net/lib/py/netns.py
@@ -1,9 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
from .utils import ip
+import ctypes
import random
import string
+libc = ctypes.cdll.LoadLibrary('libc.so.6')
+
class NetNS:
def __init__(self, name=None):
@@ -29,3 +32,18 @@ class NetNS:
def __repr__(self):
return f"NetNS({self.name})"
+
+
+class NetNSEnter:
+ def __init__(self, ns_name):
+ self.ns_path = f"/run/netns/{ns_name}"
+
+ def __enter__(self):
+ self.saved = open("/proc/thread-self/ns/net")
+ with open(self.ns_path) as ns_file:
+ libc.setns(ns_file.fileno(), 0)
+ return self
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ libc.setns(self.saved.fileno(), 0)
+ self.saved.close()
diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py
index 1a8cbe9acc48..7c640ed64c0b 100644
--- a/tools/testing/selftests/net/lib/py/nsim.py
+++ b/tools/testing/selftests/net/lib/py/nsim.py
@@ -27,7 +27,7 @@ class NetdevSim:
self.port_index = port_index
self.ns = ns
self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index)
- ret = ip("-j link show dev %s" % ifname, ns=ns)
+ ret = ip("-d -j link show dev %s" % ifname, ns=ns)
self.dev = json.loads(ret.stdout)[0]
self.ifindex = self.dev["ifindex"]
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index 72590c3f90f1..106ee1f2df86 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -1,33 +1,80 @@
# SPDX-License-Identifier: GPL-2.0
-import errno
import json as _json
-import random
+import os
import re
+import select
import socket
import subprocess
import time
class CmdExitFailure(Exception):
- pass
+ def __init__(self, msg, cmd_obj):
+ super().__init__(msg)
+ self.cmd = cmd_obj
+
+
+def fd_read_timeout(fd, timeout):
+ rlist, _, _ = select.select([fd], [], [], timeout)
+ if rlist:
+ return os.read(fd, 1024)
+ raise TimeoutError("Timeout waiting for fd read")
class cmd:
- def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None, timeout=5):
+ """
+ Execute a command on local or remote host.
+
+ @shell defaults to false, and class will try to split @comm into a list
+ if it's a string with spaces.
+
+ Use bkg() instead to run a command in the background.
+ """
+ def __init__(self, comm, shell=None, fail=True, ns=None, background=False,
+ host=None, timeout=5, ksft_ready=None, ksft_wait=None):
if ns:
comm = f'ip netns exec {ns} ' + comm
self.stdout = None
self.stderr = None
self.ret = None
+ self.ksft_term_fd = None
self.comm = comm
if host:
self.proc = host.cmd(comm)
else:
+ # If user doesn't explicitly request shell try to avoid it.
+ if shell is None and isinstance(comm, str) and ' ' in comm:
+ comm = comm.split()
+
+ # ksft_wait lets us wait for the background process to fully start,
+ # we pass an FD to the child process, and wait for it to write back.
+ # Similarly term_fd tells child it's time to exit.
+ pass_fds = []
+ env = os.environ.copy()
+ if ksft_wait is not None:
+ wait_fd, self.ksft_term_fd = os.pipe()
+ pass_fds.append(wait_fd)
+ env["KSFT_WAIT_FD"] = str(wait_fd)
+ ksft_ready = True # ksft_wait implies ready
+ if ksft_ready is not None:
+ rfd, ready_fd = os.pipe()
+ pass_fds.append(ready_fd)
+ env["KSFT_READY_FD"] = str(ready_fd)
+
self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE,
- stderr=subprocess.PIPE)
+ stderr=subprocess.PIPE, pass_fds=pass_fds,
+ env=env)
+ if ksft_wait is not None:
+ os.close(wait_fd)
+ if ksft_ready is not None:
+ os.close(ready_fd)
+ msg = fd_read_timeout(rfd, ksft_wait)
+ os.close(rfd)
+ if not msg:
+ raise Exception("Did not receive ready message")
if not background:
self.process(terminate=False, fail=fail, timeout=timeout)
@@ -35,6 +82,8 @@ class cmd:
if fail is None:
fail = not terminate
+ if self.ksft_term_fd:
+ os.write(self.ksft_term_fd, b"1")
if terminate:
self.proc.terminate()
stdout, stderr = self.proc.communicate(timeout)
@@ -48,22 +97,48 @@ class cmd:
if len(stderr) > 0 and stderr[-1] == "\n":
stderr = stderr[:-1]
raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" %
- (self.proc.args, stdout, stderr))
+ (self.proc.args, stdout, stderr), self)
class bkg(cmd):
- def __init__(self, comm, shell=True, fail=None, ns=None, host=None,
- exit_wait=False):
+ """
+ Run a command in the background.
+
+ Examples usage:
+
+ Run a command on remote host, and wait for it to finish.
+ This is usually paired with wait_port_listen() to make sure
+ the command has initialized:
+
+ with bkg("socat ...", exit_wait=True, host=cfg.remote) as nc:
+ ...
+
+ Run a command and expect it to let us know that it's ready
+ by writing to a special file descriptor passed via KSFT_READY_FD.
+ Command will be terminated when we exit the context manager:
+
+ with bkg("my_binary", ksft_wait=5):
+ """
+ def __init__(self, comm, shell=None, fail=None, ns=None, host=None,
+ exit_wait=False, ksft_ready=None, ksft_wait=None):
super().__init__(comm, background=True,
- shell=shell, fail=fail, ns=ns, host=host)
- self.terminate = not exit_wait
+ shell=shell, fail=fail, ns=ns, host=host,
+ ksft_ready=ksft_ready, ksft_wait=ksft_wait)
+ self.terminate = not exit_wait and not ksft_wait
+ self._exit_wait = exit_wait
self.check_fail = fail
+ if shell and self.terminate:
+ print("# Warning: combining shell and terminate is risky!")
+ print("# SIGTERM may not reach the child on zsh/ksh!")
+
def __enter__(self):
return self
def __exit__(self, ex_type, ex_value, ex_tb):
- return self.process(terminate=self.terminate, fail=self.check_fail)
+ # Force termination on exception
+ terminate = self.terminate or (self._exit_wait and ex_type)
+ return self.process(terminate=terminate, fail=self.check_fail)
global_defer_queue = []
@@ -71,8 +146,6 @@ global_defer_queue = []
class defer:
def __init__(self, func, *args, **kwargs):
- global global_defer_queue
-
if not callable(func):
raise Exception("defer created with un-callable object, did you call the function instead of passing its name?")
@@ -111,6 +184,10 @@ def tool(name, args, json=None, ns=None, host=None):
return cmd_obj
+def bpftool(args, json=None, ns=None, host=None):
+ return tool('bpftool', args, json=json, ns=ns, host=host)
+
+
def ip(args, json=None, ns=None, host=None):
if ns:
args = f'-netns {ns} ' + args
@@ -121,20 +198,48 @@ def ethtool(args, json=None, ns=None, host=None):
return tool('ethtool', args, json=json, ns=ns, host=host)
-def rand_port():
+def bpftrace(expr, json=None, ns=None, host=None, timeout=None):
"""
- Get a random unprivileged port, try to make sure it's not already used.
+ Run bpftrace and return map data (if json=True).
+ The output of bpftrace is inconvenient, so the helper converts
+ to a dict indexed by map name, e.g.:
+ {
+ "@": { ... },
+ "@map2": { ... },
+ }
"""
- for _ in range(1000):
- port = random.randint(10000, 65535)
- try:
- with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
- s.bind(("", port))
- return port
- except OSError as e:
- if e.errno != errno.EADDRINUSE:
- raise
- raise Exception("Can't find any free unprivileged port")
+ cmd_arr = ['bpftrace']
+ # Throw in --quiet if json, otherwise the output has two objects
+ if json:
+ cmd_arr += ['-f', 'json', '-q']
+ if timeout:
+ expr += ' interval:s:' + str(timeout) + ' { exit(); }'
+ cmd_arr += ['-e', expr]
+ cmd_obj = cmd(cmd_arr, ns=ns, host=host, shell=False)
+ if json:
+ # bpftrace prints objects as lines
+ ret = {}
+ for l in cmd_obj.stdout.split('\n'):
+ if not l.strip():
+ continue
+ one = _json.loads(l)
+ if one.get('type') != 'map':
+ continue
+ for k, v in one["data"].items():
+ if k.startswith('@'):
+ k = k.lstrip('@')
+ ret[k] = v
+ return ret
+ return cmd_obj
+
+
+def rand_port(stype=socket.SOCK_STREAM):
+ """
+ Get a random unprivileged port.
+ """
+ with socket.socket(socket.AF_INET6, stype) as s:
+ s.bind(("", 0))
+ return s.getsockname()[1]
def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5):
@@ -153,3 +258,21 @@ def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadlin
if time.monotonic() > end:
raise Exception("Waiting for port listen timed out")
time.sleep(sleep)
+
+
+def wait_file(fname, test_fn, sleep=0.005, deadline=5, encoding='utf-8'):
+ """
+ Wait for file contents on the local system to satisfy a condition.
+ test_fn() should take one argument (file contents) and return whether
+ condition is met.
+ """
+ end = time.monotonic() + deadline
+
+ with open(fname, "r", encoding=encoding) as fp:
+ while True:
+ if test_fn(fp.read()):
+ break
+ fp.seek(0)
+ if time.monotonic() > end:
+ raise TimeoutError("Wait for file contents failed", fname)
+ time.sleep(sleep)
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index a0d689d58c57..32c223e93b2c 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -13,14 +13,14 @@ try:
SPEC_PATH = KSFT_DIR / "net/lib/specs"
sys.path.append(tools_full_path.as_posix())
- from net.lib.ynl.lib import YnlFamily, NlError
+ from net.lib.ynl.pyynl.lib import YnlFamily, NlError
else:
# Running in tree
tools_full_path = KSRC / "tools"
SPEC_PATH = KSRC / "Documentation/netlink/specs"
sys.path.append(tools_full_path.as_posix())
- from net.ynl.lib import YnlFamily, NlError
+ from net.ynl.pyynl.lib import YnlFamily, NlError
except ModuleNotFoundError as e:
ksft_pr("Failed importing `ynl` library from kernel sources")
ksft_pr(str(e))
@@ -32,23 +32,37 @@ except ModuleNotFoundError as e:
# Set schema='' to avoid jsonschema validation, it's slow
#
class EthtoolFamily(YnlFamily):
- def __init__(self):
+ def __init__(self, recv_size=0):
super().__init__((SPEC_PATH / Path('ethtool.yaml')).as_posix(),
- schema='')
+ schema='', recv_size=recv_size)
class RtnlFamily(YnlFamily):
- def __init__(self):
- super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(),
- schema='')
+ def __init__(self, recv_size=0):
+ super().__init__((SPEC_PATH / Path('rt-link.yaml')).as_posix(),
+ schema='', recv_size=recv_size)
+class RtnlAddrFamily(YnlFamily):
+ def __init__(self, recv_size=0):
+ super().__init__((SPEC_PATH / Path('rt-addr.yaml')).as_posix(),
+ schema='', recv_size=recv_size)
class NetdevFamily(YnlFamily):
- def __init__(self):
+ def __init__(self, recv_size=0):
super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(),
- schema='')
+ schema='', recv_size=recv_size)
class NetshaperFamily(YnlFamily):
- def __init__(self):
+ def __init__(self, recv_size=0):
super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(),
- schema='')
+ schema='', recv_size=recv_size)
+
+class DevlinkFamily(YnlFamily):
+ def __init__(self, recv_size=0):
+ super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(),
+ schema='', recv_size=recv_size)
+
+class PSPFamily(YnlFamily):
+ def __init__(self, recv_size=0):
+ super().__init__((SPEC_PATH / Path('psp.yaml')).as_posix(),
+ schema='', recv_size=recv_size)
diff --git a/tools/testing/selftests/net/lib/sh/defer.sh b/tools/testing/selftests/net/lib/sh/defer.sh
index 082f5d38321b..47ab78c4d465 100644
--- a/tools/testing/selftests/net/lib/sh/defer.sh
+++ b/tools/testing/selftests/net/lib/sh/defer.sh
@@ -1,6 +1,10 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Whether to pause and allow debugging when an executed deferred command has a
+# non-zero exit code.
+: "${DEFER_PAUSE_ON_FAIL:=no}"
+
# map[(scope_id,track,cleanup_id) -> cleanup_command]
# track={d=default | p=priority}
declare -A __DEFER__JOBS
@@ -38,8 +42,20 @@ __defer__run()
local track=$1; shift
local defer_ix=$1; shift
local defer_key=$(__defer__defer_key $track $defer_ix)
+ local ret
+
+ eval ${__DEFER__JOBS[$defer_key]}
+ ret=$?
+
+ if [[ "$DEFER_PAUSE_ON_FAIL" == yes && "$ret" -ne 0 ]]; then
+ echo "Deferred command (track $track index $defer_ix):"
+ echo " ${__DEFER__JOBS[$defer_key]}"
+ echo "... ended with an exit status of $ret"
+ echo "Hit enter to continue, 'q' to quit"
+ read a
+ [[ "$a" == q ]] && exit 1
+ fi
- ${__DEFER__JOBS[$defer_key]}
unset __DEFER__JOBS[$defer_key]
}
@@ -49,7 +65,7 @@ __defer__schedule()
local ndefers=$(__defer__ndefers $track)
local ndefers_key=$(__defer__ndefer_key $track)
local defer_key=$(__defer__defer_key $track $ndefers)
- local defer="$@"
+ local defer="${@@Q}"
__DEFER__JOBS[$defer_key]="$defer"
__DEFER__NJOBS[$ndefers_key]=$((ndefers + 1))
diff --git a/tools/testing/selftests/net/xdp_dummy.bpf.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c
index d988b2e0cee8..e73fab3edd9f 100644
--- a/tools/testing/selftests/net/xdp_dummy.bpf.c
+++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c
@@ -10,4 +10,10 @@ int xdp_dummy_prog(struct xdp_md *ctx)
return XDP_PASS;
}
+SEC("xdp.frags")
+int xdp_dummy_prog_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/lib/xdp_helper.c b/tools/testing/selftests/net/lib/xdp_helper.c
new file mode 100644
index 000000000000..eb025a9f35b1
--- /dev/null
+++ b/tools/testing/selftests/net/lib/xdp_helper.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <linux/if_xdp.h>
+#include <linux/if_link.h>
+#include <net/if.h>
+#include <inttypes.h>
+
+#include "ksft.h"
+
+#define UMEM_SZ (1U << 16)
+#define NUM_DESC (UMEM_SZ / 2048)
+
+
+static void print_usage(const char *bin)
+{
+ fprintf(stderr, "Usage: %s ifindex queue_id [-z]\n\n"
+ "where:\n\t-z: force zerocopy mode", bin);
+}
+
+/* this is a simple helper program that creates an XDP socket and does the
+ * minimum necessary to get bind() to succeed.
+ *
+ * this test program is not intended to actually process packets, but could be
+ * extended in the future if that is actually needed.
+ *
+ * it is used by queues.py to ensure the xsk netlinux attribute is set
+ * correctly.
+ */
+int main(int argc, char **argv)
+{
+ struct xdp_umem_reg umem_reg = { 0 };
+ struct sockaddr_xdp sxdp = { 0 };
+ int num_desc = NUM_DESC;
+ void *umem_area;
+ int retry = 0;
+ int ifindex;
+ int sock_fd;
+ int queue;
+
+ if (argc != 3 && argc != 4) {
+ print_usage(argv[0]);
+ return 1;
+ }
+
+ sock_fd = socket(AF_XDP, SOCK_RAW, 0);
+ if (sock_fd < 0) {
+ perror("socket creation failed");
+ /* if the kernel doesn't support AF_XDP, let the test program
+ * know with -1. All other error paths return 1.
+ */
+ if (errno == EAFNOSUPPORT)
+ return -1;
+ return 1;
+ }
+
+ /* "Probing mode", just checking if AF_XDP sockets are supported */
+ if (!strcmp(argv[1], "-") && !strcmp(argv[2], "-")) {
+ printf("AF_XDP support detected\n");
+ close(sock_fd);
+ return 0;
+ }
+
+ ifindex = atoi(argv[1]);
+ queue = atoi(argv[2]);
+
+ umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE |
+ MAP_ANONYMOUS, -1, 0);
+ if (umem_area == MAP_FAILED) {
+ perror("mmap failed");
+ return 1;
+ }
+
+ umem_reg.addr = (uintptr_t)umem_area;
+ umem_reg.len = UMEM_SZ;
+ umem_reg.chunk_size = 2048;
+ umem_reg.headroom = 0;
+
+ setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg,
+ sizeof(umem_reg));
+ setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc,
+ sizeof(num_desc));
+ setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc,
+ sizeof(num_desc));
+ setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc));
+
+ sxdp.sxdp_family = AF_XDP;
+ sxdp.sxdp_ifindex = ifindex;
+ sxdp.sxdp_queue_id = queue;
+ sxdp.sxdp_flags = 0;
+
+ if (argc > 3) {
+ if (!strcmp(argv[3], "-z")) {
+ sxdp.sxdp_flags = XDP_ZEROCOPY;
+ } else {
+ print_usage(argv[0]);
+ return 1;
+ }
+ }
+
+ while (1) {
+ if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0)
+ break;
+
+ if (errno == EBUSY && retry < 3) {
+ retry++;
+ sleep(1);
+ continue;
+ } else {
+ perror("bind failed");
+ munmap(umem_area, UMEM_SZ);
+ close(sock_fd);
+ return 1;
+ }
+ }
+
+ ksft_ready();
+ ksft_wait();
+
+ /* parent program will write a byte to stdin when its ready for this
+ * helper to exit
+ */
+
+ close(sock_fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c
new file mode 100644
index 000000000000..64f05229ab24
--- /dev/null
+++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c
@@ -0,0 +1,680 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#define MAX_ADJST_OFFSET 256
+#define MAX_PAYLOAD_LEN 5000
+#define MAX_HDR_LEN 64
+
+extern int bpf_xdp_pull_data(struct xdp_md *xdp, __u32 len) __ksym __weak;
+
+enum {
+ XDP_MODE = 0,
+ XDP_PORT = 1,
+ XDP_ADJST_OFFSET = 2,
+ XDP_ADJST_TAG = 3,
+} xdp_map_setup_keys;
+
+enum {
+ XDP_MODE_PASS = 0,
+ XDP_MODE_DROP = 1,
+ XDP_MODE_TX = 2,
+ XDP_MODE_TAIL_ADJST = 3,
+ XDP_MODE_HEAD_ADJST = 4,
+} xdp_map_modes;
+
+enum {
+ STATS_RX = 0,
+ STATS_PASS = 1,
+ STATS_DROP = 2,
+ STATS_TX = 3,
+ STATS_ABORT = 4,
+} xdp_stats;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 5);
+ __type(key, __u32);
+ __type(value, __s32);
+} map_xdp_setup SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 5);
+ __type(key, __u32);
+ __type(value, __u64);
+} map_xdp_stats SEC(".maps");
+
+static __u32 min(__u32 a, __u32 b)
+{
+ return a < b ? a : b;
+}
+
+static void record_stats(struct xdp_md *ctx, __u32 stat_type)
+{
+ __u64 *count;
+
+ count = bpf_map_lookup_elem(&map_xdp_stats, &stat_type);
+
+ if (count)
+ __sync_fetch_and_add(count, 1);
+}
+
+static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+ void *data, *data_end;
+ struct ethhdr *eth;
+ int err;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth));
+ if (err)
+ return NULL;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = eth = (void *)(long)ctx->data;
+
+ if (data + sizeof(*eth) > data_end)
+ return NULL;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) +
+ sizeof(*udph));
+ if (err)
+ return NULL;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ iph = data + sizeof(*eth);
+
+ if (iph + 1 > (struct iphdr *)data_end ||
+ iph->protocol != IPPROTO_UDP)
+ return NULL;
+
+ udph = data + sizeof(*iph) + sizeof(*eth);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) +
+ sizeof(*udph));
+ if (err)
+ return NULL;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ ipv6h = data + sizeof(*eth);
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
+ ipv6h->nexthdr != IPPROTO_UDP)
+ return NULL;
+
+ udph = data + sizeof(*ipv6h) + sizeof(*eth);
+ } else {
+ return NULL;
+ }
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ if (udph->dest != bpf_htons(port))
+ return NULL;
+
+ record_stats(ctx, STATS_RX);
+
+ return udph;
+}
+
+static int xdp_mode_pass(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_PASS);
+
+ return XDP_PASS;
+}
+
+static int xdp_mode_drop_handler(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_DROP);
+
+ return XDP_DROP;
+}
+
+static void swap_machdr(void *data)
+{
+ struct ethhdr *eth = data;
+ __u8 tmp_mac[ETH_ALEN];
+
+ __builtin_memcpy(tmp_mac, eth->h_source, ETH_ALEN);
+ __builtin_memcpy(eth->h_source, eth->h_dest, ETH_ALEN);
+ __builtin_memcpy(eth->h_dest, tmp_mac, ETH_ALEN);
+}
+
+static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+ void *data, *data_end;
+ struct ethhdr *eth;
+ int err;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth));
+ if (err)
+ return XDP_PASS;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = eth = (void *)(long)ctx->data;
+
+ if (data + sizeof(*eth) > data_end)
+ return XDP_PASS;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph;
+ __be32 tmp_ip;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) +
+ sizeof(*udph));
+ if (err)
+ return XDP_PASS;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ iph = data + sizeof(*eth);
+
+ if (iph + 1 > (struct iphdr *)data_end ||
+ iph->protocol != IPPROTO_UDP)
+ return XDP_PASS;
+
+ udph = data + sizeof(*iph) + sizeof(*eth);
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return XDP_PASS;
+ if (udph->dest != bpf_htons(port))
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_RX);
+ eth = data;
+ swap_machdr((void *)eth);
+
+ tmp_ip = iph->saddr;
+ iph->saddr = iph->daddr;
+ iph->daddr = tmp_ip;
+
+ record_stats(ctx, STATS_TX);
+
+ return XDP_TX;
+
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct in6_addr tmp_ipv6;
+ struct ipv6hdr *ipv6h;
+
+ err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) +
+ sizeof(*udph));
+ if (err)
+ return XDP_PASS;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ ipv6h = data + sizeof(*eth);
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end ||
+ ipv6h->nexthdr != IPPROTO_UDP)
+ return XDP_PASS;
+
+ udph = data + sizeof(*ipv6h) + sizeof(*eth);
+
+ if (udph + 1 > (struct udphdr *)data_end)
+ return XDP_PASS;
+ if (udph->dest != bpf_htons(port))
+ return XDP_PASS;
+
+ record_stats(ctx, STATS_RX);
+ eth = data;
+ swap_machdr((void *)eth);
+
+ __builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6));
+ __builtin_memcpy(&ipv6h->saddr, &ipv6h->daddr,
+ sizeof(tmp_ipv6));
+ __builtin_memcpy(&ipv6h->daddr, &tmp_ipv6, sizeof(tmp_ipv6));
+
+ record_stats(ctx, STATS_TX);
+
+ return XDP_TX;
+ }
+
+ return XDP_PASS;
+}
+
+static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct udphdr *udph = NULL;
+ struct ethhdr *eth = data;
+ __u32 len, len_new;
+
+ if (data + sizeof(*eth) > data_end)
+ return NULL;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = data + sizeof(*eth);
+ __u16 total_len;
+
+ if (iph + 1 > (struct iphdr *)data_end)
+ return NULL;
+
+ iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset);
+
+ udph = (void *)eth + sizeof(*iph) + sizeof(*eth);
+ if (!udph || udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ len_new = bpf_htons(bpf_ntohs(udph->len) + offset);
+ } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct ipv6hdr *ipv6h = data + sizeof(*eth);
+ __u16 payload_len;
+
+ if (ipv6h + 1 > (struct ipv6hdr *)data_end)
+ return NULL;
+
+ udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth);
+ if (!udph || udph + 1 > (struct udphdr *)data_end)
+ return NULL;
+
+ *udp_csum = ~((__u32)udph->check);
+
+ len = ipv6h->payload_len;
+ len_new = bpf_htons(bpf_ntohs(len) + offset);
+ ipv6h->payload_len = len_new;
+
+ *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
+ sizeof(len_new), *udp_csum);
+
+ len = udph->len;
+ len_new = bpf_htons(bpf_ntohs(udph->len) + offset);
+ *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new,
+ sizeof(len_new), *udp_csum);
+ } else {
+ return NULL;
+ }
+
+ udph->len = len_new;
+
+ return udph;
+}
+
+static __u16 csum_fold_helper(__u32 csum)
+{
+ return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff;
+}
+
+static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset,
+ unsigned long hdr_len)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ __u32 buff_pos, udp_csum = 0;
+ struct udphdr *udph = NULL;
+ __u32 buff_len;
+
+ udph = update_pkt(ctx, 0 - offset, &udp_csum);
+ if (!udph)
+ return -1;
+
+ buff_len = bpf_xdp_get_buff_len(ctx);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ /* Make sure we have enough data to avoid eating the header */
+ if (buff_len - offset < hdr_len)
+ return -1;
+
+ buff_pos = buff_len - offset;
+ if (bpf_xdp_load_bytes(ctx, buff_pos, tmp_buff, offset) < 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ __u32 buff_pos, udp_csum = 0;
+ __u32 buff_len, hdr_len, key;
+ struct udphdr *udph;
+ __s32 *val;
+ __u8 tag;
+
+ /* Proceed to update the packet headers before attempting to adjuste
+ * the tail. Once the tail is adjusted we lose access to the offset
+ * amount of data at the end of the packet which is crucial to update
+ * the checksum.
+ * Since any failure beyond this would abort the packet, we should
+ * not worry about passing a packet up the stack with wrong headers
+ */
+ udph = update_pkt(ctx, offset, &udp_csum);
+ if (!udph)
+ return -1;
+
+ key = XDP_ADJST_TAG;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return -1;
+
+ tag = (__u8)(*val);
+
+ for (int i = 0; i < MAX_ADJST_OFFSET; i++)
+ __builtin_memcpy(&tmp_buff[i], &tag, 1);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ buff_len = bpf_xdp_get_buff_len(ctx);
+
+ if (bpf_xdp_adjust_tail(ctx, offset) < 0) {
+ bpf_printk("Failed to adjust tail\n");
+ return -1;
+ }
+
+ if (bpf_xdp_store_bytes(ctx, buff_len, tmp_buff, offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph = NULL;
+ __s32 *adjust_offset, *val;
+ unsigned long hdr_len;
+ void *offset_ptr;
+ __u32 key;
+ __u8 tag;
+ int ret;
+
+ udph = filter_udphdr(ctx, port);
+ if (!udph)
+ return XDP_PASS;
+
+ hdr_len = (void *)udph - (void *)(long)ctx->data +
+ sizeof(struct udphdr);
+ key = XDP_ADJST_OFFSET;
+ adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!adjust_offset)
+ return XDP_PASS;
+
+ if (*adjust_offset < 0)
+ ret = xdp_adjst_tail_shrnk_data(ctx,
+ (__u16)(0 - *adjust_offset),
+ hdr_len);
+ else
+ ret = xdp_adjst_tail_grow_data(ctx, (__u16)(*adjust_offset));
+ if (ret)
+ goto abort_pkt;
+
+ record_stats(ctx, STATS_PASS);
+ return XDP_PASS;
+
+abort_pkt:
+ record_stats(ctx, STATS_ABORT);
+ return XDP_ABORTED;
+}
+
+static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len,
+ __u32 offset)
+{
+ char tmp_buff[MAX_ADJST_OFFSET];
+ struct udphdr *udph;
+ void *offset_ptr;
+ __u32 udp_csum = 0;
+
+ /* Update the length information in the IP and UDP headers before
+ * adjusting the headroom. This simplifies accessing the relevant
+ * fields in the IP and UDP headers for fragmented packets. Any
+ * failure beyond this point will result in the packet being aborted,
+ * so we don't need to worry about incorrect length information for
+ * passed packets.
+ */
+ udph = update_pkt(ctx, (__s16)(0 - offset), &udp_csum);
+ if (!udph)
+ return -1;
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ if (bpf_xdp_load_bytes(ctx, hdr_len, tmp_buff, offset) < 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum);
+
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0)
+ return -1;
+
+ if (bpf_xdp_adjust_head(ctx, offset) < 0)
+ return -1;
+
+ if (offset > MAX_ADJST_OFFSET)
+ return -1;
+
+ if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0)
+ return -1;
+
+ /* Added here to handle clang complain about negative value */
+ hdr_len = hdr_len & 0xff;
+
+ if (hdr_len == 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, 0, tmp_buff, hdr_len) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len,
+ __u32 offset)
+{
+ char hdr_buff[MAX_HDR_LEN];
+ char data_buff[MAX_ADJST_OFFSET];
+ void *offset_ptr;
+ __s32 *val;
+ __u32 key;
+ __u8 tag;
+ __u32 udp_csum = 0;
+ struct udphdr *udph;
+
+ udph = update_pkt(ctx, (__s16)(offset), &udp_csum);
+ if (!udph)
+ return -1;
+
+ key = XDP_ADJST_TAG;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return -1;
+
+ tag = (__u8)(*val);
+ for (int i = 0; i < MAX_ADJST_OFFSET; i++)
+ __builtin_memcpy(&data_buff[i], &tag, 1);
+
+ offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET :
+ offset & 0xff;
+ if (offset == 0)
+ return -1;
+
+ udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum);
+ udph->check = (__u16)csum_fold_helper(udp_csum);
+
+ if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0)
+ return -1;
+
+ /* Added here to handle clang complain about negative value */
+ hdr_len = hdr_len & 0xff;
+
+ if (hdr_len == 0)
+ return -1;
+
+ if (bpf_xdp_load_bytes(ctx, 0, hdr_buff, hdr_len) < 0)
+ return -1;
+
+ if (offset > MAX_ADJST_OFFSET)
+ return -1;
+
+ if (bpf_xdp_adjust_head(ctx, 0 - offset) < 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, 0, hdr_buff, hdr_len) < 0)
+ return -1;
+
+ if (bpf_xdp_store_bytes(ctx, hdr_len, data_buff, offset) < 0)
+ return -1;
+
+ return 0;
+}
+
+static int xdp_head_adjst(struct xdp_md *ctx, __u16 port)
+{
+ struct udphdr *udph_ptr = NULL;
+ __u32 key, size, hdr_len;
+ __s32 *val;
+ int res;
+
+ /* Filter packets based on UDP port */
+ udph_ptr = filter_udphdr(ctx, port);
+ if (!udph_ptr)
+ return XDP_PASS;
+
+ hdr_len = (void *)udph_ptr - (void *)(long)ctx->data +
+ sizeof(struct udphdr);
+
+ key = XDP_ADJST_OFFSET;
+ val = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!val)
+ return XDP_PASS;
+
+ switch (*val) {
+ case -16:
+ case 16:
+ size = 16;
+ break;
+ case -32:
+ case 32:
+ size = 32;
+ break;
+ case -64:
+ case 64:
+ size = 64;
+ break;
+ case -128:
+ case 128:
+ size = 128;
+ break;
+ case -256:
+ case 256:
+ size = 256;
+ break;
+ default:
+ bpf_printk("Invalid adjustment offset: %d\n", *val);
+ goto abort;
+ }
+
+ if (*val < 0)
+ res = xdp_adjst_head_grow_data(ctx, hdr_len, size);
+ else
+ res = xdp_adjst_head_shrnk_data(ctx, hdr_len, size);
+
+ if (res)
+ goto abort;
+
+ record_stats(ctx, STATS_PASS);
+ return XDP_PASS;
+
+abort:
+ record_stats(ctx, STATS_ABORT);
+ return XDP_ABORTED;
+}
+
+static int xdp_prog_common(struct xdp_md *ctx)
+{
+ __u32 key, *port;
+ __s32 *mode;
+
+ key = XDP_MODE;
+ mode = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!mode)
+ return XDP_PASS;
+
+ key = XDP_PORT;
+ port = bpf_map_lookup_elem(&map_xdp_setup, &key);
+ if (!port)
+ return XDP_PASS;
+
+ switch (*mode) {
+ case XDP_MODE_PASS:
+ return xdp_mode_pass(ctx, (__u16)(*port));
+ case XDP_MODE_DROP:
+ return xdp_mode_drop_handler(ctx, (__u16)(*port));
+ case XDP_MODE_TX:
+ return xdp_mode_tx_handler(ctx, (__u16)(*port));
+ case XDP_MODE_TAIL_ADJST:
+ return xdp_adjst_tail(ctx, (__u16)(*port));
+ case XDP_MODE_HEAD_ADJST:
+ return xdp_head_adjst(ctx, (__u16)(*port));
+ }
+
+ /* Default action is to simple pass */
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_prog(struct xdp_md *ctx)
+{
+ return xdp_prog_common(ctx);
+}
+
+SEC("xdp.frags")
+int xdp_prog_frags(struct xdp_md *ctx)
+{
+ return xdp_prog_common(ctx);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/link_netns.py b/tools/testing/selftests/net/link_netns.py
new file mode 100755
index 000000000000..aab043c59d69
--- /dev/null
+++ b/tools/testing/selftests/net/link_netns.py
@@ -0,0 +1,141 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import time
+
+from lib.py import ksft_run, ksft_exit, ksft_true
+from lib.py import ip
+from lib.py import NetNS, NetNSEnter
+from lib.py import RtnlFamily
+
+
+LINK_NETNSID = 100
+
+
+def test_event() -> None:
+ with NetNS() as ns1, NetNS() as ns2:
+ with NetNSEnter(str(ns2)):
+ rtnl = RtnlFamily()
+
+ rtnl.ntf_subscribe("rtnlgrp-link")
+
+ ip(f"netns set {ns2} {LINK_NETNSID}", ns=str(ns1))
+ ip(f"link add netns {ns1} link-netnsid {LINK_NETNSID} dummy1 type dummy")
+ ip(f"link add netns {ns1} dummy2 type dummy", ns=str(ns2))
+
+ ip("link del dummy1", ns=str(ns1))
+ ip("link del dummy2", ns=str(ns1))
+
+ time.sleep(1)
+ rtnl.check_ntf()
+ ksft_true(rtnl.async_msg_queue.empty(),
+ "Received unexpected link notification")
+
+
+def validate_link_netns(netns, ifname, link_netnsid) -> bool:
+ link_info = ip(f"-d link show dev {ifname}", ns=netns, json=True)
+ if not link_info:
+ return False
+ return link_info[0].get("link_netnsid") == link_netnsid
+
+
+def test_link_net() -> None:
+ configs = [
+ # type, common args, type args, fallback to dev_net
+ ("ipvlan", "link dummy1", "", False),
+ ("macsec", "link dummy1", "", False),
+ ("macvlan", "link dummy1", "", False),
+ ("macvtap", "link dummy1", "", False),
+ ("vlan", "link dummy1", "id 100", False),
+ ("gre", "", "local 192.0.2.1", True),
+ ("vti", "", "local 192.0.2.1", True),
+ ("ipip", "", "local 192.0.2.1", True),
+ ("ip6gre", "", "local 2001:db8::1", True),
+ ("ip6tnl", "", "local 2001:db8::1", True),
+ ("vti6", "", "local 2001:db8::1", True),
+ ("sit", "", "local 192.0.2.1", True),
+ ("xfrm", "", "if_id 1", True),
+ ]
+
+ with NetNS() as ns1, NetNS() as ns2, NetNS() as ns3:
+ net1, net2, net3 = str(ns1), str(ns2), str(ns3)
+
+ # prepare link netnsid and a dummy link needed by certain drivers
+ ip(f"netns set {net3} {LINK_NETNSID}", ns=str(net2))
+ ip("link add dummy1 type dummy", ns=net3)
+
+ cases = [
+ # source, "netns", "link-netns", expected link-netns
+ (net3, None, None, None, None),
+ (net3, net2, None, None, LINK_NETNSID),
+ (net2, None, net3, LINK_NETNSID, LINK_NETNSID),
+ (net1, net2, net3, LINK_NETNSID, LINK_NETNSID),
+ ]
+
+ for src_net, netns, link_netns, exp1, exp2 in cases:
+ tgt_net = netns or src_net
+ for typ, cargs, targs, fb_dev_net in configs:
+ cmd = "link add"
+ if netns:
+ cmd += f" netns {netns}"
+ if link_netns:
+ cmd += f" link-netns {link_netns}"
+ cmd += f" {cargs} foo type {typ} {targs}"
+ ip(cmd, ns=src_net)
+ if fb_dev_net:
+ ksft_true(validate_link_netns(tgt_net, "foo", exp1),
+ f"{typ} link_netns validation failed")
+ else:
+ ksft_true(validate_link_netns(tgt_net, "foo", exp2),
+ f"{typ} link_netns validation failed")
+ ip(f"link del foo", ns=tgt_net)
+
+
+def test_peer_net() -> None:
+ types = [
+ "vxcan",
+ "netkit",
+ "veth",
+ ]
+
+ with NetNS() as ns1, NetNS() as ns2, NetNS() as ns3, NetNS() as ns4:
+ net1, net2, net3, net4 = str(ns1), str(ns2), str(ns3), str(ns4)
+
+ ip(f"netns set {net3} {LINK_NETNSID}", ns=str(net2))
+
+ cases = [
+ # source, "netns", "link-netns", "peer netns", expected
+ (net1, None, None, None, None),
+ (net1, net2, None, None, None),
+ (net2, None, net3, None, LINK_NETNSID),
+ (net1, net2, net3, None, None),
+ (net2, None, None, net3, LINK_NETNSID),
+ (net1, net2, None, net3, LINK_NETNSID),
+ (net2, None, net2, net3, LINK_NETNSID),
+ (net1, net2, net4, net3, LINK_NETNSID),
+ ]
+
+ for src_net, netns, link_netns, peer_netns, exp in cases:
+ tgt_net = netns or src_net
+ for typ in types:
+ cmd = "link add"
+ if netns:
+ cmd += f" netns {netns}"
+ if link_netns:
+ cmd += f" link-netns {link_netns}"
+ cmd += f" foo type {typ}"
+ if peer_netns:
+ cmd += f" peer netns {peer_netns}"
+ ip(cmd, ns=src_net)
+ ksft_true(validate_link_netns(tgt_net, "foo", exp),
+ f"{typ} peer_netns validation failed")
+ ip(f"link del foo", ns=tgt_net)
+
+
+def main() -> None:
+ ksft_run([test_event, test_link_net, test_peer_net])
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh
new file mode 100755
index 000000000000..881eb399798f
--- /dev/null
+++ b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh
@@ -0,0 +1,246 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Author: Justin Iurman <justin.iurman@uliege.be>
+#
+# WARNING
+# -------
+# This is just a dummy script that triggers encap cases with possible dst cache
+# reference loops in affected lwt users (see list below). Some cases are
+# pathological configurations for simplicity, others are valid. Overall, we
+# don't want this issue to happen, no matter what. In order to catch any
+# reference loops, kmemleak MUST be used. The results alone are always blindly
+# successful, don't rely on them. Note that the following tests may crash the
+# kernel if the fix to prevent lwtunnel_{input|output|xmit}() reentry loops is
+# not present.
+#
+# Affected lwt users so far (please update accordingly if needed):
+# - ila_lwt (output only)
+# - ioam6_iptunnel (output only)
+# - rpl_iptunnel (both input and output)
+# - seg6_iptunnel (both input and output)
+
+source lib.sh
+
+check_compatibility()
+{
+ setup_ns tmp_node &>/dev/null
+ if [ $? != 0 ]; then
+ echo "SKIP: Cannot create netns."
+ exit $ksft_skip
+ fi
+
+ ip link add name veth0 netns $tmp_node type veth \
+ peer name veth1 netns $tmp_node &>/dev/null
+ local ret=$?
+
+ ip -netns $tmp_node link set veth0 up &>/dev/null
+ ret=$((ret + $?))
+
+ ip -netns $tmp_node link set veth1 up &>/dev/null
+ ret=$((ret + $?))
+
+ if [ $ret != 0 ]; then
+ echo "SKIP: Cannot configure links."
+ cleanup_ns $tmp_node
+ exit $ksft_skip
+ fi
+
+ lsmod 2>/dev/null | grep -q "ila"
+ ila_lsmod=$?
+ [ $ila_lsmod != 0 ] && modprobe ila &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:1::/64 \
+ encap ila 1:2:3:4 csum-mode no-action ident-type luid \
+ hook-type output \
+ dev veth0 &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:2::/64 \
+ encap ioam6 trace prealloc type 0x800000 ns 0 size 4 \
+ dev veth0 &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:3::/64 \
+ encap rpl segs 2001:db8:3::1 dev veth0 &>/dev/null
+
+ ip -netns $tmp_node route add 2001:db8:4::/64 \
+ encap seg6 mode inline segs 2001:db8:4::1 dev veth0 &>/dev/null
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ila"
+ skip_ila=$?
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ioam6"
+ skip_ioam6=$?
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap rpl"
+ skip_rpl=$?
+
+ ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap seg6"
+ skip_seg6=$?
+
+ cleanup_ns $tmp_node
+}
+
+setup()
+{
+ setup_ns alpha beta gamma &>/dev/null
+
+ ip link add name veth-alpha netns $alpha type veth \
+ peer name veth-betaL netns $beta &>/dev/null
+
+ ip link add name veth-betaR netns $beta type veth \
+ peer name veth-gamma netns $gamma &>/dev/null
+
+ ip -netns $alpha link set veth-alpha name veth0 &>/dev/null
+ ip -netns $beta link set veth-betaL name veth0 &>/dev/null
+ ip -netns $beta link set veth-betaR name veth1 &>/dev/null
+ ip -netns $gamma link set veth-gamma name veth0 &>/dev/null
+
+ ip -netns $alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null
+ ip -netns $alpha link set veth0 up &>/dev/null
+ ip -netns $alpha link set lo up &>/dev/null
+ ip -netns $alpha route add 2001:db8:2::/64 \
+ via 2001:db8:1::1 dev veth0 &>/dev/null
+
+ ip -netns $beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null
+ ip -netns $beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null
+ ip -netns $beta link set veth0 up &>/dev/null
+ ip -netns $beta link set veth1 up &>/dev/null
+ ip -netns $beta link set lo up &>/dev/null
+ ip -netns $beta route del 2001:db8:2::/64
+ ip -netns $beta route add 2001:db8:2::/64 dev veth1
+ ip netns exec $beta \
+ sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null
+
+ ip -netns $gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null
+ ip -netns $gamma link set veth0 up &>/dev/null
+ ip -netns $gamma link set lo up &>/dev/null
+ ip -netns $gamma route add 2001:db8:1::/64 \
+ via 2001:db8:2::1 dev veth0 &>/dev/null
+
+ sleep 1
+
+ ip netns exec $alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null
+ if [ $? != 0 ]; then
+ echo "SKIP: Setup failed."
+ exit $ksft_skip
+ fi
+
+ sleep 1
+}
+
+cleanup()
+{
+ cleanup_ns $alpha $beta $gamma
+ [ $ila_lsmod != 0 ] && modprobe -r ila &>/dev/null
+}
+
+run_ila()
+{
+ if [ $skip_ila != 0 ]; then
+ echo "SKIP: ila (output)"
+ return
+ fi
+
+ ip -netns $beta route del 2001:db8:2::/64
+ ip -netns $beta route add 2001:db8:2:0:0:0:0:2/128 \
+ encap ila 2001:db8:2:0 csum-mode no-action ident-type luid \
+ hook-type output \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: ila (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+
+ ip -netns $beta route del 2001:db8:2:0:0:0:0:2/128
+ ip -netns $beta route add 2001:db8:2::/64 dev veth1
+ sleep 1
+}
+
+run_ioam6()
+{
+ if [ $skip_ioam6 != 0 ]; then
+ echo "SKIP: ioam6 (output)"
+ return
+ fi
+
+ ip -netns $beta route change 2001:db8:2::/64 \
+ encap ioam6 trace prealloc type 0x800000 ns 1 size 4 \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: ioam6 (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+}
+
+run_rpl()
+{
+ if [ $skip_rpl != 0 ]; then
+ echo "SKIP: rpl (input)"
+ echo "SKIP: rpl (output)"
+ return
+ fi
+
+ ip -netns $beta route change 2001:db8:2::/64 \
+ encap rpl segs 2001:db8:2::2 \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: rpl (input)"
+ ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+
+ echo "TEST: rpl (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+}
+
+run_seg6()
+{
+ if [ $skip_seg6 != 0 ]; then
+ echo "SKIP: seg6 (input)"
+ echo "SKIP: seg6 (output)"
+ return
+ fi
+
+ ip -netns $beta route change 2001:db8:2::/64 \
+ encap seg6 mode inline segs 2001:db8:2::2 \
+ dev veth1 &>/dev/null
+ sleep 1
+
+ echo "TEST: seg6 (input)"
+ ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+
+ echo "TEST: seg6 (output)"
+ ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null
+ sleep 1
+}
+
+run()
+{
+ run_ila
+ run_ioam6
+ run_rpl
+ run_seg6
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+ echo "SKIP: Need root privileges."
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool."
+ exit $ksft_skip
+fi
+
+check_compatibility
+
+trap cleanup EXIT
+
+setup
+run
+
+exit $ksft_pass
diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore
index 49daae73c41e..833279fb34e2 100644
--- a/tools/testing/selftests/net/mptcp/.gitignore
+++ b/tools/testing/selftests/net/mptcp/.gitignore
@@ -1,5 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
mptcp_connect
+mptcp_diag
mptcp_inq
mptcp_sockopt
pm_nl_ctl
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 8e3fc05a5397..15d144a25d82 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -2,16 +2,35 @@
top_srcdir = ../../../../..
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
-
-TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
- simult_flows.sh mptcp_sockopt.sh userspace_pm.sh
-
-TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq
-
-TEST_FILES := mptcp_lib.sh settings
-
-TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh) ../net_helper.sh
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+
+TEST_PROGS := \
+ diag.sh \
+ mptcp_connect.sh \
+ mptcp_connect_checksum.sh \
+ mptcp_connect_mmap.sh \
+ mptcp_connect_sendfile.sh \
+ mptcp_join.sh \
+ mptcp_sockopt.sh \
+ pm_netlink.sh \
+ simult_flows.sh \
+ userspace_pm.sh \
+# end of TEST_PROGS
+
+TEST_GEN_FILES := \
+ mptcp_connect \
+ mptcp_diag \
+ mptcp_inq \
+ mptcp_sockopt \
+ pm_nl_ctl \
+# end of TEST_GEN_FILES
+
+TEST_FILES := \
+ mptcp_lib.sh \
+ settings \
+# end of TEST_FILES
+
+TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh)
EXTRA_CLEAN := *.pcap
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 4f80014cae49..59051ee2a986 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -1,34 +1,36 @@
-CONFIG_KALLSYMS=y
-CONFIG_MPTCP=y
-CONFIG_IPV6=y
-CONFIG_MPTCP_IPV6=y
CONFIG_INET_DIAG=m
CONFIG_INET_MPTCP_DIAG=m
-CONFIG_VETH=y
-CONFIG_NET_SCH_NETEM=m
-CONFIG_SYN_COOKIES=y
-CONFIG_NETFILTER=y
-CONFIG_NETFILTER_ADVANCED=y
-CONFIG_NETFILTER_NETLINK=m
-CONFIG_NF_TABLES=m
-CONFIG_NFT_COMPAT=m
-CONFIG_NETFILTER_XTABLES=m
-CONFIG_NETFILTER_XT_MATCH_BPF=m
-CONFIG_NETFILTER_XT_MATCH_LENGTH=m
-CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
-CONFIG_NETFILTER_XT_TARGET_MARK=m
-CONFIG_NF_TABLES_INET=y
-CONFIG_NFT_TPROXY=m
-CONFIG_NFT_SOCKET=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IPV6=y
CONFIG_IPV6_MULTIPLE_TABLES=y
-CONFIG_IP6_NF_FILTER=m
+CONFIG_KALLSYMS=y
+CONFIG_MPTCP=y
+CONFIG_MPTCP_IPV6=y
CONFIG_NET_ACT_CSUM=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_CLS_FW=m
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XTABLES_LEGACY=y
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NF_TABLES=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_SOCKET=m
+CONFIG_NFT_TPROXY=m
+CONFIG_SYN_COOKIES=y
+CONFIG_VETH=y
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 2bd0c1eb70c5..d847ff1737c3 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -28,7 +28,7 @@ flush_pids()
}
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null
@@ -200,6 +200,62 @@ chk_msk_cestab()
"${expected}" "${msg}" ""
}
+chk_dump_one()
+{
+ local ss_token
+ local token
+ local msg
+
+ ss_token="$(ss -inmHMN $ns |
+ mptcp_lib_get_info_value "token" "token")"
+
+ token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\
+ awk -F':[ \t]+' '/^token/ {print $2}')"
+
+ msg="....chk dump_one"
+
+ mptcp_lib_print_title "$msg"
+ if [ -n "$ss_token" ] && [ "$ss_token" = "$token" ]; then
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "${msg}"
+ else
+ mptcp_lib_pr_fail "expected $ss_token found $token"
+ mptcp_lib_result_fail "${msg}"
+ ret=${KSFT_FAIL}
+ fi
+}
+
+chk_dump_subflow()
+{
+ local inet_diag_token
+ local subflow_line
+ local ss_output
+ local ss_token
+ local msg
+
+ ss_output=$(ss -tniN $ns)
+
+ subflow_line=$(echo "$ss_output" | \
+ grep -m1 -Eo '[0-9.]+:[0-9].+ +[0-9.]+:[0-9.]+')
+
+ ss_token=$(echo "$ss_output" | grep -m1 -Eo 'token:[^ ]+')
+
+ inet_diag_token=$(ip netns exec $ns ./mptcp_diag -s "$subflow_line" | \
+ grep -Eo 'token:[^ ]+')
+
+ msg="....chk dump_subflow"
+
+ mptcp_lib_print_title "$msg"
+ if [ -n "$ss_token" ] && [ "$ss_token" = "$inet_diag_token" ]; then
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "${msg}"
+ else
+ mptcp_lib_pr_fail "expected $ss_token found $inet_diag_token"
+ mptcp_lib_result_fail "${msg}"
+ ret=${KSFT_FAIL}
+ fi
+}
+
msk_info_get_value()
{
local port="${1}"
@@ -290,6 +346,8 @@ chk_msk_remote_key_nr 2 "....chk remote_key"
chk_msk_fallback_nr 0 "....chk no fallback"
chk_msk_inuse 2
chk_msk_cestab 2
+chk_dump_one
+chk_dump_subflow
flush_pids
chk_msk_inuse 0 "2->0"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 4209b9569039..404a77bf366a 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -25,6 +25,8 @@
#include <sys/types.h>
#include <sys/mman.h>
+#include <arpa/inet.h>
+
#include <netdb.h>
#include <netinet/in.h>
@@ -178,13 +180,27 @@ static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen,
}
static void xgetaddrinfo(const char *node, const char *service,
- const struct addrinfo *hints,
+ struct addrinfo *hints,
struct addrinfo **res)
{
- int err = getaddrinfo(node, service, hints, res);
+ int err;
+again:
+ err = getaddrinfo(node, service, hints, res);
if (err) {
- const char *errstr = getxinfo_strerr(err);
+ const char *errstr;
+
+ /* glibc starts to support MPTCP since v2.42.
+ * For older versions, use IPPROTO_TCP to resolve,
+ * and use TCP/MPTCP to create socket.
+ * Link: https://sourceware.org/git/?p=glibc.git;a=commit;h=a8e9022e0f82
+ */
+ if (err == EAI_SOCKTYPE) {
+ hints->ai_protocol = IPPROTO_TCP;
+ goto again;
+ }
+
+ errstr = getxinfo_strerr(err);
fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
node ? node : "", service ? service : "", errstr);
@@ -290,7 +306,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
{
int sock = -1;
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
.ai_flags = AI_PASSIVE | AI_NUMERICHOST
};
@@ -354,7 +370,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
int infd, struct wstate *winfo)
{
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
@@ -694,8 +710,14 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd,
bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len);
if (bw < 0) {
- if (cfg_rcv_trunc)
- return 0;
+ /* expected reset, continue to read */
+ if (cfg_rcv_trunc &&
+ (errno == ECONNRESET ||
+ errno == EPIPE)) {
+ fds.events &= ~POLLOUT;
+ continue;
+ }
+
perror("write");
return 111;
}
@@ -721,8 +743,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd,
}
if (fds.revents & (POLLERR | POLLNVAL)) {
- if (cfg_rcv_trunc)
- return 0;
+ if (cfg_rcv_trunc) {
+ fds.events &= ~(POLLERR | POLLNVAL);
+ continue;
+ }
fprintf(stderr, "Unexpected revents: "
"POLLERR/POLLNVAL(%x)\n", fds.revents);
return 5;
@@ -1048,6 +1072,8 @@ static void check_getpeername_connect(int fd)
socklen_t salen = sizeof(ss);
char a[INET6_ADDRSTRLEN];
char b[INET6_ADDRSTRLEN];
+ const char *iface;
+ size_t len;
if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) {
perror("getpeername");
@@ -1057,7 +1083,13 @@ static void check_getpeername_connect(int fd)
xgetnameinfo((struct sockaddr *)&ss, salen,
a, sizeof(a), b, sizeof(b));
- if (strcmp(cfg_host, a) || strcmp(cfg_port, b))
+ iface = strchr(cfg_host, '%');
+ if (iface)
+ len = iface - cfg_host;
+ else
+ len = strlen(cfg_host) + 1;
+
+ if (strncmp(cfg_host, a, len) || strcmp(cfg_port, b))
fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__,
cfg_host, a, cfg_port, b);
}
@@ -1077,6 +1109,7 @@ int main_loop_s(int listensock)
struct pollfd polls;
socklen_t salen;
int remotesock;
+ int err = 0;
int fd = 0;
again:
@@ -1109,7 +1142,7 @@ again:
SOCK_TEST_TCPULP(remotesock, 0);
memset(&winfo, 0, sizeof(winfo));
- copyfd_io(fd, remotesock, 1, true, &winfo);
+ err = copyfd_io(fd, remotesock, 1, true, &winfo);
} else {
perror("accept");
return 1;
@@ -1118,10 +1151,10 @@ again:
if (cfg_input)
close(fd);
- if (--cfg_repeat > 0)
+ if (!err && --cfg_repeat > 0)
goto again;
- return 0;
+ return err;
}
static void init_rng(void)
@@ -1211,23 +1244,42 @@ static void parse_setsock_options(const char *name)
exit(1);
}
-void xdisconnect(int fd, int addrlen)
+void xdisconnect(int fd)
{
- struct sockaddr_storage empty;
+ socklen_t addrlen = sizeof(struct sockaddr_storage);
+ struct sockaddr_storage addr, empty;
int msec_sleep = 10;
- int queued = 1;
- int i;
+ void *raw_addr;
+ int i, cmdlen;
+ char cmd[128];
+
+ /* get the local address and convert it to string */
+ if (getsockname(fd, (struct sockaddr *)&addr, &addrlen) < 0)
+ xerror("getsockname");
+
+ if (addr.ss_family == AF_INET)
+ raw_addr = &(((struct sockaddr_in *)&addr)->sin_addr);
+ else if (addr.ss_family == AF_INET6)
+ raw_addr = &(((struct sockaddr_in6 *)&addr)->sin6_addr);
+ else
+ xerror("bad family");
+
+ strcpy(cmd, "ss -Mnt | grep -q ");
+ cmdlen = strlen(cmd);
+ if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen],
+ sizeof(cmd) - cmdlen))
+ xerror("inet_ntop");
shutdown(fd, SHUT_WR);
- /* while until the pending data is completely flushed, the later
+ /*
+ * wait until the pending data is completely flushed and all
+ * the sockets reached the closed status.
* disconnect will bypass/ignore/drop any pending data.
*/
for (i = 0; ; i += msec_sleep) {
- if (ioctl(fd, SIOCOUTQ, &queued) < 0)
- xerror("can't query out socket queue: %d", errno);
-
- if (!queued)
+ /* closed socket are not listed by 'ss' */
+ if (system(cmd) != 0)
break;
if (i > poll_timeout)
@@ -1249,7 +1301,7 @@ int main_loop(void)
if (cfg_input && cfg_sockopt_types.mptfo) {
fd_in = open(cfg_input, O_RDONLY);
- if (fd < 0)
+ if (fd_in < 0)
xerror("can't open %s:%d", cfg_input, errno);
}
@@ -1272,18 +1324,18 @@ again:
if (cfg_input && !cfg_sockopt_types.mptfo) {
fd_in = open(cfg_input, O_RDONLY);
- if (fd < 0)
+ if (fd_in < 0)
xerror("can't open %s:%d", cfg_input, errno);
}
ret = copyfd_io(fd_in, fd, 1, 0, &winfo);
if (ret)
- return ret;
+ goto out;
if (cfg_truncate > 0) {
- xdisconnect(fd, peer->ai_addrlen);
+ shutdown(fd, SHUT_WR);
} else if (--cfg_repeat > 0) {
- xdisconnect(fd, peer->ai_addrlen);
+ xdisconnect(fd);
/* the socket could be unblocking at this point, we need the
* connect to be blocking
@@ -1299,7 +1351,10 @@ again:
close(fd);
}
- return 0;
+out:
+ if (cfg_input)
+ close(fd_in);
+ return ret;
}
int parse_proto(const char *proto)
@@ -1394,7 +1449,7 @@ static void parse_opts(int argc, char **argv)
*/
if (cfg_truncate < 0) {
cfg_rcv_trunc = true;
- signal(SIGPIPE, handle_signal);
+ signal(SIGPIPE, SIG_IGN);
}
break;
case 'j':
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index b48b4e56826a..a6447f7a31fe 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -134,10 +134,10 @@ ns4=""
TEST_GROUP=""
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
- rm -f "$cin_disconnect" "$cout_disconnect"
+ rm -f "$cin_disconnect"
rm -f "$cin" "$cout"
rm -f "$sin" "$sout"
rm -f "$capout"
@@ -155,7 +155,6 @@ cin=$(mktemp)
cout=$(mktemp)
capout=$(mktemp)
cin_disconnect="$cin".disconnect
-cout_disconnect="$cout".disconnect
trap cleanup EXIT
mptcp_lib_ns_init ns1 ns2 ns3 ns4
@@ -212,6 +211,11 @@ if $checksum; then
done
fi
+if $capture; then
+ rndh="${ns1:4}"
+ mptcp_lib_pr_info "Packet capture files will have this prefix: ${rndh}-"
+fi
+
set_ethtool_flags() {
local ns="$1"
local dev="$2"
@@ -362,7 +366,6 @@ do_transfer()
if $capture; then
local capuser
- local rndh="${connector_ns:4}"
if [ -z $SUDO_USER ] ; then
capuser=""
else
@@ -372,85 +375,75 @@ do_transfer()
local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}"
local capopt="-i any -s 65535 -B 32768 ${capuser}"
- ip netns exec ${listener_ns} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
+ ip netns exec ${listener_ns} tcpdump ${capopt} \
+ -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
local cappid_listener=$!
- ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
- local cappid_connector=$!
+ if [ ${listener_ns} != ${connector_ns} ]; then
+ ip netns exec ${connector_ns} tcpdump ${capopt} \
+ -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
+ local cappid_connector=$!
+ fi
sleep 1
fi
- NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
- nstat -n
+ mptcp_lib_nstat_init "${listener_ns}"
if [ ${listener_ns} != ${connector_ns} ]; then
- NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
- nstat -n
- fi
-
- local stat_synrx_last_l
- local stat_ackrx_last_l
- local stat_cookietx_last
- local stat_cookierx_last
- local stat_csum_err_s
- local stat_csum_err_c
- local stat_tcpfb_last_l
- stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
- stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
- stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
- stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
- stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
- stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
- stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
-
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
- $extra_args $local_addr < "$sin" > "$sout" &
+ mptcp_lib_nstat_init "${connector_ns}"
+ fi
+
+ ip netns exec ${listener_ns} \
+ ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ $extra_args $local_addr < "$sin" > "$sout" &
local spid=$!
mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
local start
start=$(date +%s%3N)
- timeout ${timeout_test} \
- ip netns exec ${connector_ns} \
- ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
- $extra_args $connect_addr < "$cin" > "$cout" &
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_args $connect_addr < "$cin" > "$cout" &
local cpid=$!
+ mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \
+ "${connector_ns}" "${port}" "${cpid}" "${spid}" &
+ local timeout_pid=$!
+
wait $cpid
local retc=$?
wait $spid
local rets=$?
+ if kill -0 $timeout_pid; then
+ # Finished before the timeout: kill the background job
+ mptcp_lib_kill_group_wait $timeout_pid
+ timeout_pid=0
+ fi
+
local stop
stop=$(date +%s%3N)
if $capture; then
sleep 1
kill ${cappid_listener}
- kill ${cappid_connector}
+ if [ ${listener_ns} != ${connector_ns} ]; then
+ kill ${cappid_connector}
+ fi
fi
- NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
- nstat | grep Tcp > /tmp/${listener_ns}.out
+ mptcp_lib_nstat_get "${listener_ns}"
if [ ${listener_ns} != ${connector_ns} ]; then
- NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
- nstat | grep Tcp > /tmp/${connector_ns}.out
+ mptcp_lib_nstat_get "${connector_ns}"
fi
local duration
duration=$((stop-start))
printf "(duration %05sms) " "${duration}"
- if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then
mptcp_lib_pr_fail "client exit code $retc, server $rets"
- echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
- cat /tmp/${listener_ns}.out
- echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
- [ ${listener_ns} != ${connector_ns} ] && cat /tmp/${connector_ns}.out
+ mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
echo
cat "$capout"
@@ -464,38 +457,38 @@ do_transfer()
rets=$?
local extra=""
- local stat_synrx_now_l
- local stat_ackrx_now_l
- local stat_cookietx_now
- local stat_cookierx_now
- local stat_ooo_now
- local stat_tcpfb_now_l
- stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
- stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
- stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
- stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
- stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
- stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
-
- expect_synrx=$((stat_synrx_last_l))
- expect_ackrx=$((stat_ackrx_last_l))
+ local stat_synrx
+ local stat_ackrx
+ local stat_cookietx
+ local stat_cookierx
+ local stat_ooo
+ local stat_tcpfb
+ stat_synrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+ stat_ackrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+ stat_cookietx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+ stat_cookierx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ stat_ooo=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+ stat_tcpfb=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK")
+
+ expect_synrx=0
+ expect_ackrx=0
cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies)
cookies=${cookies##*=}
if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
- expect_synrx=$((stat_synrx_last_l+connect_per_transfer))
- expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer))
+ expect_synrx=${connect_per_transfer}
+ expect_ackrx=${connect_per_transfer}
fi
- if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then
- mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \
+ if [ ${stat_synrx} -lt ${expect_synrx} ]; then
+ mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx})" \
"than expected (${expect_synrx})"
retc=1
fi
- if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then
- if [ ${stat_ooo_now} -eq 0 ]; then
- mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \
+ if [ ${stat_ackrx} -lt ${expect_ackrx} ]; then
+ if [ ${stat_ooo} -eq 0 ]; then
+ mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx})" \
"than expected (${expect_ackrx})"
rets=1
else
@@ -509,47 +502,45 @@ do_transfer()
csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
- local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
- if [ $csum_err_s_nr -gt 0 ]; then
- mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]"
+ if [ $csum_err_s -gt 0 ]; then
+ mptcp_lib_pr_fail "server got ${csum_err_s} data checksum error[s]"
rets=1
fi
- local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
- if [ $csum_err_c_nr -gt 0 ]; then
- mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]"
+ if [ $csum_err_c -gt 0 ]; then
+ mptcp_lib_pr_fail "client got ${csum_err_c} data checksum error[s]"
retc=1
fi
fi
- if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then
+ if [ ${stat_ooo} -eq 0 ] && [ ${stat_tcpfb} -gt 0 ]; then
mptcp_lib_pr_fail "unexpected fallback to TCP"
rets=1
fi
if [ $cookies -eq 2 ];then
- if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
+ if [ $stat_cookietx -eq 0 ] ;then
extra+=" WARN: CookieSent: did not advance"
fi
- if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then
+ if [ $stat_cookierx -eq 0 ] ;then
extra+=" WARN: CookieRecv: did not advance"
fi
else
- if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then
+ if [ $stat_cookietx -gt 0 ] ;then
extra+=" WARN: CookieSent: changed"
fi
- if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then
+ if [ $stat_cookierx -gt 0 ] ;then
extra+=" WARN: CookieRecv: changed"
fi
fi
- if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then
+ if [ ${stat_synrx} -gt ${expect_synrx} ]; then
extra+=" WARN: SYNRX: expect ${expect_synrx},"
- extra+=" got ${stat_synrx_now_l} (probably retransmissions)"
+ extra+=" got ${stat_synrx} (probably retransmissions)"
fi
- if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then
+ if [ ${stat_ackrx} -gt ${expect_ackrx} ]; then
extra+=" WARN: ACKRX: expect ${expect_ackrx},"
- extra+=" got ${stat_ackrx_now_l} (probably retransmissions)"
+ extra+=" got ${stat_ackrx} (probably retransmissions)"
fi
if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
@@ -587,7 +578,7 @@ make_file()
mptcp_lib_make_file $name 1024 $ksize
dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null
- echo "Created $name (size $(du -b "$name")) containing data sent by $who"
+ echo "Created $name (size $(stat -c "%s" "$name") B) containing data sent by $who"
}
run_tests_lo()
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh
new file mode 100755
index 000000000000..ce93ec2f107f
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -C "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh
new file mode 100755
index 000000000000..5dd30f9394af
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -m mmap "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh
new file mode 100755
index 000000000000..1d16fb1cc9bb
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \
+ "$(dirname "${0}")/mptcp_connect.sh" -m sendfile "${@}"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c
new file mode 100644
index 000000000000..e084796e804d
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c
@@ -0,0 +1,435 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025, Kylin Software */
+
+#include <linux/sock_diag.h>
+#include <linux/rtnetlink.h>
+#include <linux/inet_diag.h>
+#include <linux/netlink.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <linux/tcp.h>
+#include <arpa/inet.h>
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <stdio.h>
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
+#define parse_rtattr_nested(tb, max, rta) \
+ (parse_rtattr_flags((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta), \
+ NLA_F_NESTED))
+
+struct params {
+ __u32 target_token;
+ char subflow_addrs[1024];
+};
+
+struct mptcp_info {
+ __u8 mptcpi_subflows;
+ __u8 mptcpi_add_addr_signal;
+ __u8 mptcpi_add_addr_accepted;
+ __u8 mptcpi_subflows_max;
+ __u8 mptcpi_add_addr_signal_max;
+ __u8 mptcpi_add_addr_accepted_max;
+ __u32 mptcpi_flags;
+ __u32 mptcpi_token;
+ __u64 mptcpi_write_seq;
+ __u64 mptcpi_snd_una;
+ __u64 mptcpi_rcv_nxt;
+ __u8 mptcpi_local_addr_used;
+ __u8 mptcpi_local_addr_max;
+ __u8 mptcpi_csum_enabled;
+ __u32 mptcpi_retransmits;
+ __u64 mptcpi_bytes_retrans;
+ __u64 mptcpi_bytes_sent;
+ __u64 mptcpi_bytes_received;
+ __u64 mptcpi_bytes_acked;
+ __u8 mptcpi_subflows_total;
+ __u8 reserved[3];
+ __u32 mptcpi_last_data_sent;
+ __u32 mptcpi_last_data_recv;
+ __u32 mptcpi_last_ack_recv;
+};
+
+enum {
+ MPTCP_SUBFLOW_ATTR_UNSPEC,
+ MPTCP_SUBFLOW_ATTR_TOKEN_REM,
+ MPTCP_SUBFLOW_ATTR_TOKEN_LOC,
+ MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ,
+ MPTCP_SUBFLOW_ATTR_MAP_SEQ,
+ MPTCP_SUBFLOW_ATTR_MAP_SFSEQ,
+ MPTCP_SUBFLOW_ATTR_SSN_OFFSET,
+ MPTCP_SUBFLOW_ATTR_MAP_DATALEN,
+ MPTCP_SUBFLOW_ATTR_FLAGS,
+ MPTCP_SUBFLOW_ATTR_ID_REM,
+ MPTCP_SUBFLOW_ATTR_ID_LOC,
+ MPTCP_SUBFLOW_ATTR_PAD,
+
+ __MPTCP_SUBFLOW_ATTR_MAX
+};
+
+#define MPTCP_SUBFLOW_ATTR_MAX (__MPTCP_SUBFLOW_ATTR_MAX - 1)
+
+#define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0)
+#define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1)
+#define MPTCP_SUBFLOW_FLAG_JOIN_REM _BITUL(2)
+#define MPTCP_SUBFLOW_FLAG_JOIN_LOC _BITUL(3)
+#define MPTCP_SUBFLOW_FLAG_BKUP_REM _BITUL(4)
+#define MPTCP_SUBFLOW_FLAG_BKUP_LOC _BITUL(5)
+#define MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED _BITUL(6)
+#define MPTCP_SUBFLOW_FLAG_CONNECTED _BITUL(7)
+#define MPTCP_SUBFLOW_FLAG_MAPVALID _BITUL(8)
+
+#define rta_getattr(type, value) (*(type *)RTA_DATA(value))
+
+static void die_perror(const char *msg)
+{
+ perror(msg);
+ exit(1);
+}
+
+static void die_usage(int r)
+{
+ fprintf(stderr, "Usage:\n"
+ "mptcp_diag -t <token>\n"
+ "mptcp_diag -s \"<saddr>:<sport> <daddr>:<dport>\"\n");
+ exit(r);
+}
+
+static void send_query(int fd, struct inet_diag_req_v2 *r, __u32 proto)
+{
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK
+ };
+ struct {
+ struct nlmsghdr nlh;
+ struct inet_diag_req_v2 r;
+ } req = {
+ .nlh = {
+ .nlmsg_len = sizeof(req),
+ .nlmsg_type = SOCK_DIAG_BY_FAMILY,
+ .nlmsg_flags = NLM_F_REQUEST
+ },
+ .r = *r
+ };
+ struct rtattr rta_proto;
+ struct iovec iov[6];
+ int iovlen = 0;
+
+ iov[iovlen++] = (struct iovec) {
+ .iov_base = &req,
+ .iov_len = sizeof(req)
+ };
+
+ if (proto == IPPROTO_MPTCP) {
+ rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL;
+ rta_proto.rta_len = RTA_LENGTH(sizeof(proto));
+
+ iov[iovlen++] = (struct iovec){ &rta_proto, sizeof(rta_proto)};
+ iov[iovlen++] = (struct iovec){ &proto, sizeof(proto)};
+ req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto));
+ }
+
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = iov,
+ .msg_iovlen = iovlen
+ };
+
+ for (;;) {
+ if (sendmsg(fd, &msg, 0) < 0) {
+ if (errno == EINTR)
+ continue;
+ die_perror("sendmsg");
+ }
+ break;
+ }
+}
+
+static void parse_rtattr_flags(struct rtattr *tb[], int max, struct rtattr *rta,
+ int len, unsigned short flags)
+{
+ unsigned short type;
+
+ memset(tb, 0, sizeof(struct rtattr *) * (max + 1));
+ while (RTA_OK(rta, len)) {
+ type = rta->rta_type & ~flags;
+ if (type <= max && !tb[type])
+ tb[type] = rta;
+ rta = RTA_NEXT(rta, len);
+ }
+}
+
+static void print_info_msg(struct mptcp_info *info)
+{
+ printf("Token & Flags\n");
+ printf("token: %x\n", info->mptcpi_token);
+ printf("flags: %x\n", info->mptcpi_flags);
+ printf("csum_enabled: %u\n", info->mptcpi_csum_enabled);
+
+ printf("\nBasic Info\n");
+ printf("subflows: %u\n", info->mptcpi_subflows);
+ printf("subflows_max: %u\n", info->mptcpi_subflows_max);
+ printf("subflows_total: %u\n", info->mptcpi_subflows_total);
+ printf("local_addr_used: %u\n", info->mptcpi_local_addr_used);
+ printf("local_addr_max: %u\n", info->mptcpi_local_addr_max);
+ printf("add_addr_signal: %u\n", info->mptcpi_add_addr_signal);
+ printf("add_addr_accepted: %u\n", info->mptcpi_add_addr_accepted);
+ printf("add_addr_signal_max: %u\n", info->mptcpi_add_addr_signal_max);
+ printf("add_addr_accepted_max: %u\n", info->mptcpi_add_addr_accepted_max);
+
+ printf("\nTransmission Info\n");
+ printf("write_seq: %llu\n", info->mptcpi_write_seq);
+ printf("snd_una: %llu\n", info->mptcpi_snd_una);
+ printf("rcv_nxt: %llu\n", info->mptcpi_rcv_nxt);
+ printf("last_data_sent: %u\n", info->mptcpi_last_data_sent);
+ printf("last_data_recv: %u\n", info->mptcpi_last_data_recv);
+ printf("last_ack_recv: %u\n", info->mptcpi_last_ack_recv);
+ printf("retransmits: %u\n", info->mptcpi_retransmits);
+ printf("retransmit bytes: %llu\n", info->mptcpi_bytes_retrans);
+ printf("bytes_sent: %llu\n", info->mptcpi_bytes_sent);
+ printf("bytes_received: %llu\n", info->mptcpi_bytes_received);
+ printf("bytes_acked: %llu\n", info->mptcpi_bytes_acked);
+}
+
+/*
+ * 'print_subflow_info' is from 'mptcp_subflow_info'
+ * which is a function in 'misc/ss.c' of iproute2.
+ */
+static void print_subflow_info(struct rtattr *tb[])
+{
+ u_int32_t flags = 0;
+
+ printf("It's a mptcp subflow, the subflow info:\n");
+ if (tb[MPTCP_SUBFLOW_ATTR_FLAGS]) {
+ char caps[32 + 1] = { 0 }, *cap = &caps[0];
+
+ flags = rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_FLAGS]);
+
+ if (flags & MPTCP_SUBFLOW_FLAG_MCAP_REM)
+ *cap++ = 'M';
+ if (flags & MPTCP_SUBFLOW_FLAG_MCAP_LOC)
+ *cap++ = 'm';
+ if (flags & MPTCP_SUBFLOW_FLAG_JOIN_REM)
+ *cap++ = 'J';
+ if (flags & MPTCP_SUBFLOW_FLAG_JOIN_LOC)
+ *cap++ = 'j';
+ if (flags & MPTCP_SUBFLOW_FLAG_BKUP_REM)
+ *cap++ = 'B';
+ if (flags & MPTCP_SUBFLOW_FLAG_BKUP_LOC)
+ *cap++ = 'b';
+ if (flags & MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED)
+ *cap++ = 'e';
+ if (flags & MPTCP_SUBFLOW_FLAG_CONNECTED)
+ *cap++ = 'c';
+ if (flags & MPTCP_SUBFLOW_FLAG_MAPVALID)
+ *cap++ = 'v';
+
+ if (flags)
+ printf(" flags:%s", caps);
+ }
+ if (tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM] &&
+ tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC] &&
+ tb[MPTCP_SUBFLOW_ATTR_ID_REM] &&
+ tb[MPTCP_SUBFLOW_ATTR_ID_LOC])
+ printf(" token:%04x(id:%u)/%04x(id:%u)",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM]),
+ rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_REM]),
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC]),
+ rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_LOC]));
+ if (tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ])
+ printf(" seq:%llu",
+ rta_getattr(__u64, tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ]));
+ if (tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ])
+ printf(" sfseq:%u",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ]));
+ if (tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET])
+ printf(" ssnoff:%u",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET]));
+ if (tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN])
+ printf(" maplen:%u",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN]));
+ printf("\n");
+}
+
+static void parse_nlmsg(struct nlmsghdr *nlh, __u32 proto)
+{
+ struct inet_diag_msg *r = NLMSG_DATA(nlh);
+ struct rtattr *tb[INET_DIAG_MAX + 1];
+
+ parse_rtattr_flags(tb, INET_DIAG_MAX, (struct rtattr *)(r + 1),
+ nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)),
+ NLA_F_NESTED);
+
+ if (proto == IPPROTO_MPTCP && tb[INET_DIAG_INFO]) {
+ int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]);
+ struct mptcp_info *info;
+
+ /* workaround fort older kernels with less fields */
+ if (len < sizeof(*info)) {
+ info = alloca(sizeof(*info));
+ memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len);
+ memset((char *)info + len, 0, sizeof(*info) - len);
+ } else {
+ info = RTA_DATA(tb[INET_DIAG_INFO]);
+ }
+ print_info_msg(info);
+ }
+ if (proto == IPPROTO_TCP && tb[INET_DIAG_ULP_INFO]) {
+ struct rtattr *ulpinfo[INET_ULP_INFO_MAX + 1] = { 0 };
+
+ parse_rtattr_nested(ulpinfo, INET_ULP_INFO_MAX,
+ tb[INET_DIAG_ULP_INFO]);
+
+ if (ulpinfo[INET_ULP_INFO_MPTCP]) {
+ struct rtattr *sfinfo[MPTCP_SUBFLOW_ATTR_MAX + 1] = { 0 };
+
+ parse_rtattr_nested(sfinfo, MPTCP_SUBFLOW_ATTR_MAX,
+ ulpinfo[INET_ULP_INFO_MPTCP]);
+ print_subflow_info(sfinfo);
+ } else {
+ printf("It's a normal TCP!\n");
+ }
+ }
+}
+
+static void recv_nlmsg(int fd, __u32 proto)
+{
+ char rcv_buff[8192];
+ struct nlmsghdr *nlh = (struct nlmsghdr *)rcv_buff;
+ struct sockaddr_nl rcv_nladdr = {
+ .nl_family = AF_NETLINK
+ };
+ struct iovec rcv_iov = {
+ .iov_base = rcv_buff,
+ .iov_len = sizeof(rcv_buff)
+ };
+ struct msghdr rcv_msg = {
+ .msg_name = &rcv_nladdr,
+ .msg_namelen = sizeof(rcv_nladdr),
+ .msg_iov = &rcv_iov,
+ .msg_iovlen = 1
+ };
+ int len;
+
+ len = recvmsg(fd, &rcv_msg, 0);
+
+ while (NLMSG_OK(nlh, len)) {
+ if (nlh->nlmsg_type == NLMSG_DONE) {
+ printf("NLMSG_DONE\n");
+ break;
+ } else if (nlh->nlmsg_type == NLMSG_ERROR) {
+ struct nlmsgerr *err;
+
+ err = (struct nlmsgerr *)NLMSG_DATA(nlh);
+ printf("Error %d:%s\n",
+ -(err->error), strerror(-(err->error)));
+ break;
+ }
+ parse_nlmsg(nlh, proto);
+ nlh = NLMSG_NEXT(nlh, len);
+ }
+}
+
+static void get_mptcpinfo(__u32 token)
+{
+ struct inet_diag_req_v2 r = {
+ .sdiag_family = AF_INET,
+ /* Real proto is set via INET_DIAG_REQ_PROTOCOL */
+ .sdiag_protocol = IPPROTO_TCP,
+ .idiag_ext = 1 << (INET_DIAG_INFO - 1),
+ .id.idiag_cookie[0] = token,
+ };
+ __u32 proto = IPPROTO_MPTCP;
+ int fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+ if (fd < 0)
+ die_perror("Netlink socket");
+
+ send_query(fd, &r, proto);
+ recv_nlmsg(fd, proto);
+
+ close(fd);
+}
+
+static void get_subflow_info(char *subflow_addrs)
+{
+ struct inet_diag_req_v2 r = {
+ .sdiag_family = AF_INET,
+ .sdiag_protocol = IPPROTO_TCP,
+ .idiag_ext = 1 << (INET_DIAG_INFO - 1),
+ .id.idiag_cookie[0] = INET_DIAG_NOCOOKIE,
+ .id.idiag_cookie[1] = INET_DIAG_NOCOOKIE,
+ };
+ char saddr[64], daddr[64];
+ int sport, dport;
+ int ret;
+ int fd;
+
+ ret = sscanf(subflow_addrs, "%[^:]:%d %[^:]:%d", saddr, &sport, daddr, &dport);
+ if (ret != 4)
+ die_perror("IP PORT Pairs has style problems!");
+
+ printf("%s:%d -> %s:%d\n", saddr, sport, daddr, dport);
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+ if (fd < 0)
+ die_perror("Netlink socket");
+
+ r.id.idiag_sport = htons(sport);
+ r.id.idiag_dport = htons(dport);
+
+ inet_pton(AF_INET, saddr, &r.id.idiag_src);
+ inet_pton(AF_INET, daddr, &r.id.idiag_dst);
+ send_query(fd, &r, IPPROTO_TCP);
+ recv_nlmsg(fd, IPPROTO_TCP);
+}
+
+static void parse_opts(int argc, char **argv, struct params *p)
+{
+ int c;
+
+ if (argc < 2)
+ die_usage(1);
+
+ while ((c = getopt(argc, argv, "ht:s:")) != -1) {
+ switch (c) {
+ case 'h':
+ die_usage(0);
+ break;
+ case 't':
+ sscanf(optarg, "%x", &p->target_token);
+ break;
+ case 's':
+ strncpy(p->subflow_addrs, optarg,
+ sizeof(p->subflow_addrs) - 1);
+ break;
+ default:
+ die_usage(1);
+ break;
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct params p = { 0 };
+
+ parse_opts(argc, argv, &p);
+
+ if (p.target_token)
+ get_mptcpinfo(p.target_token);
+
+ if (p.subflow_addrs[0] != '\0')
+ get_subflow_info(p.subflow_addrs);
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c
index 218aac467321..8e8f6441ad8b 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_inq.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c
@@ -72,13 +72,22 @@ static const char *getxinfo_strerr(int err)
}
static void xgetaddrinfo(const char *node, const char *service,
- const struct addrinfo *hints,
+ struct addrinfo *hints,
struct addrinfo **res)
{
- int err = getaddrinfo(node, service, hints, res);
+ int err;
+again:
+ err = getaddrinfo(node, service, hints, res);
if (err) {
- const char *errstr = getxinfo_strerr(err);
+ const char *errstr;
+
+ if (err == EAI_SOCKTYPE) {
+ hints->ai_protocol = IPPROTO_TCP;
+ goto again;
+ }
+
+ errstr = getxinfo_strerr(err);
fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
node ? node : "", service ? service : "", errstr);
@@ -91,7 +100,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
{
int sock = -1;
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
.ai_flags = AI_PASSIVE | AI_NUMERICHOST
};
@@ -136,7 +145,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
const char * const port, int proto)
{
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
@@ -493,6 +502,7 @@ static int server(int unixfd)
process_one_client(r, unixfd);
+ close(fd);
return 0;
}
@@ -571,8 +581,12 @@ int main(int argc, char *argv[])
die_perror("pipe");
s = xfork();
- if (s == 0)
- return server(unixfds[1]);
+ if (s == 0) {
+ close(unixfds[0]);
+ ret = server(unixfds[1]);
+ close(unixfds[1]);
+ return ret;
+ }
close(unixfds[1]);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index c07e2bd3a315..b2e6e548f796 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -8,7 +8,7 @@
# ShellCheck incorrectly believes that most of the code here is unreachable
# because it's invoked by variable name, see how the "tests" array is used
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
. "$(dirname "${0}")/mptcp_lib.sh"
@@ -62,6 +62,8 @@ unset sflags
unset fastclose
unset fullmesh
unset speed
+unset bind_addr
+unset join_syn_rej
unset join_csum_ns1
unset join_csum_ns2
unset join_fail_nr
@@ -73,6 +75,17 @@ unset join_create_err
unset join_bind_err
unset join_connect_err
+unset fb_ns1
+unset fb_ns2
+unset fb_infinite_map_tx
+unset fb_dss_corruption
+unset fb_simult_conn
+unset fb_mpc_passive
+unset fb_mpc_active
+unset fb_mpc_data
+unset fb_md5_sig
+unset fb_dss
+
# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
# (ip6 && (ip6[74] & 0xf0) == 0x30)'"
CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
@@ -346,6 +359,7 @@ reset_with_add_addr_timeout()
tables="${ip6tables}"
fi
+ # set a maximum, to avoid too long timeout with exponential backoff
ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \
@@ -632,6 +646,27 @@ wait_mpj()
done
}
+wait_ll_ready()
+{
+ local ns="${1}"
+
+ local i
+ for i in $(seq 50); do
+ ip -n "${ns}" -6 addr show scope link | grep "inet6 fe80" |
+ grep -qw "tentative" || break
+ sleep 0.1
+ done
+}
+
+get_ll_addr()
+{
+ local ns="${1}"
+ local iface="${2}"
+
+ ip -n "${ns}" -6 addr show dev "${iface}" scope link |
+ grep "inet6 fe80" | sed 's#.*\(fe80::.*\)/.*#\1#'
+}
+
kill_events_pids()
{
mptcp_lib_kill_wait $evts_ns1_pid
@@ -938,6 +973,9 @@ do_transfer()
local FAILING_LINKS=${FAILING_LINKS:-""}
local fastclose=${fastclose:-""}
local speed=${speed:-"fast"}
+ local bind_addr=${bind_addr:-"::"}
+ local listener_in="${sin}"
+ local connector_in="${cin}"
port=$(get_port)
:> "$cout"
@@ -945,10 +983,8 @@ do_transfer()
cond_start_capture ${listener_ns}
- NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
- nstat -n
- NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
- nstat -n
+ mptcp_lib_nstat_init "${listener_ns}"
+ mptcp_lib_nstat_init "${connector_ns}"
local extra_args
if [ $speed = "fast" ]; then
@@ -986,42 +1022,40 @@ do_transfer()
extra_srv_args="$extra_args $extra_srv_args"
if [ "$test_linkfail" -gt 1 ];then
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
- $extra_srv_args "::" < "$sinfail" > "$sout" &
- else
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
- $extra_srv_args "::" < "$sin" > "$sout" &
+ listener_in="${sinfail}"
fi
+ ip netns exec ${listener_ns} \
+ ./mptcp_connect -t ${timeout_poll} -l -p ${port} -s ${srv_proto} \
+ ${extra_srv_args} "${bind_addr}" < "${listener_in}" > "${sout}" &
local spid=$!
mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
extra_cl_args="$extra_args $extra_cl_args"
if [ "$test_linkfail" -eq 0 ];then
- timeout ${timeout_test} \
- ip netns exec ${connector_ns} \
- ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
- $extra_cl_args $connect_addr < "$cin" > "$cout" &
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_cl_args $connect_addr < "$cin" > "$cout" &
elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then
+ connector_in="${cinsent}"
( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
tee "$cinsent" | \
- timeout ${timeout_test} \
ip netns exec ${connector_ns} \
./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
$extra_cl_args $connect_addr > "$cout" &
else
+ connector_in="${cinsent}"
tee "$cinsent" < "$cinfail" | \
- timeout ${timeout_test} \
- ip netns exec ${connector_ns} \
- ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
- $extra_cl_args $connect_addr > "$cout" &
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_cl_args $connect_addr > "$cout" &
fi
local cpid=$!
+ mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \
+ "${connector_ns}" "${port}" "${cpid}" "${spid}" &
+ local timeout_pid=$!
+
pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr
check_cestab $listener_ns $connector_ns
@@ -1030,36 +1064,26 @@ do_transfer()
wait $spid
local rets=$?
+ if kill -0 $timeout_pid; then
+ # Finished before the timeout: kill the background job
+ mptcp_lib_kill_group_wait $timeout_pid
+ timeout_pid=0
+ fi
+
cond_stop_capture
- NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
- nstat | grep Tcp > /tmp/${listener_ns}.out
- NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
- nstat | grep Tcp > /tmp/${connector_ns}.out
+ mptcp_lib_nstat_get "${listener_ns}"
+ mptcp_lib_nstat_get "${connector_ns}"
- if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then
fail_test "client exit code $retc, server $rets"
- echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
- cat /tmp/${listener_ns}.out
- echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
- cat /tmp/${connector_ns}.out
-
+ mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
return 1
fi
- if [ "$test_linkfail" -gt 1 ];then
- check_transfer $sinfail $cout "file received by client" $trunc_size
- else
- check_transfer $sin $cout "file received by client" $trunc_size
- fi
+ check_transfer $listener_in $cout "file received by client" $trunc_size
retc=$?
- if [ "$test_linkfail" -eq 0 ];then
- check_transfer $cin $sout "file received by server" $trunc_size
- else
- check_transfer $cinsent $sout "file received by server" $trunc_size
- fi
+ check_transfer $connector_in $sout "file received by server" $trunc_size
rets=$?
[ $retc -eq 0 ] && [ $rets -eq 0 ]
@@ -1128,12 +1152,20 @@ run_tests()
do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
}
+_dump_stats()
+{
+ local ns="${1}"
+ local side="${2}"
+
+ mptcp_lib_print_err "${side} ns stats (${ns2})"
+ mptcp_lib_pr_nstat "${ns}"
+ echo
+}
+
dump_stats()
{
- echo Server ns stats
- ip netns exec $ns1 nstat -as | grep Tcp
- echo Client ns stats
- ip netns exec $ns2 nstat -as | grep Tcp
+ _dump_stats "${ns1}" "Server"
+ _dump_stats "${ns2}" "Client"
}
chk_csum_nr()
@@ -1403,11 +1435,121 @@ chk_join_tx_nr()
print_results "join Tx" ${rc}
}
+chk_fallback_nr()
+{
+ local infinite_map_tx=${fb_infinite_map_tx:-0}
+ local dss_corruption=${fb_dss_corruption:-0}
+ local simult_conn=${fb_simult_conn:-0}
+ local mpc_passive=${fb_mpc_passive:-0}
+ local mpc_active=${fb_mpc_active:-0}
+ local mpc_data=${fb_mpc_data:-0}
+ local md5_sig=${fb_md5_sig:-0}
+ local dss=${fb_dss:-0}
+ local rc=${KSFT_PASS}
+ local ns=$1
+ local count
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$infinite_map_tx" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns infinite map tx fallback"
+ fail_test "got $count infinite map tx fallback[s] in $ns expected $infinite_map_tx"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$dss_corruption" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns dss corruption fallback"
+ fail_test "got $count dss corruption fallback[s] in $ns expected $dss_corruption"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$simult_conn" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns simult conn fallback"
+ fail_test "got $count simult conn fallback[s] in $ns expected $simult_conn"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$mpc_passive" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns mpc passive fallback"
+ fail_test "got $count mpc passive fallback[s] in $ns expected $mpc_passive"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$mpc_active" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns mpc active fallback"
+ fail_test "got $count mpc active fallback[s] in $ns expected $mpc_active"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$mpc_data" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns mpc data fallback"
+ fail_test "got $count mpc data fallback[s] in $ns expected $mpc_data"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$md5_sig" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns MD5 Sig fallback"
+ fail_test "got $count MD5 Sig fallback[s] in $ns expected $md5_sig"
+ fi
+
+ count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$dss" ]; then
+ rc=${KSFT_FAIL}
+ print_check "$ns dss fallback"
+ fail_test "got $count dss fallback[s] in $ns expected $dss"
+ fi
+
+ return $rc
+}
+
+chk_fallback_nr_all()
+{
+ local netns=("ns1" "ns2")
+ local fb_ns=("fb_ns1" "fb_ns2")
+ local rc=${KSFT_PASS}
+
+ for i in 0 1; do
+ if [ -n "${!fb_ns[i]}" ]; then
+ eval "${!fb_ns[i]}" \
+ chk_fallback_nr ${netns[i]} || rc=${?}
+ else
+ chk_fallback_nr ${netns[i]} || rc=${?}
+ fi
+ done
+
+ if [ "${rc}" != "${KSFT_PASS}" ]; then
+ print_results "fallback" ${rc}
+ fi
+}
+
chk_join_nr()
{
local syn_nr=$1
local syn_ack_nr=$2
local ack_nr=$3
+ local syn_rej=${join_syn_rej:-0}
local csum_ns1=${join_csum_ns1:-0}
local csum_ns2=${join_csum_ns2:-0}
local fail_nr=${join_fail_nr:-0}
@@ -1446,6 +1588,15 @@ chk_join_nr()
fi
fi
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckHMacFailure")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "0" ]; then
+ rc=${KSFT_FAIL}
+ print_check "synack HMAC"
+ fail_test "got $count JOIN[s] synack HMAC failure expected 0"
+ fi
+
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
if [ -z "$count" ]; then
rc=${KSFT_SKIP}
@@ -1455,11 +1606,31 @@ chk_join_nr()
fail_test "got $count JOIN[s] ack rx expected $ack_nr"
fi
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckHMacFailure")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "0" ]; then
+ rc=${KSFT_FAIL}
+ print_check "ack HMAC"
+ fail_test "got $count JOIN[s] ack HMAC failure expected 0"
+ fi
+
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$syn_rej" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn rejected"
+ fail_test "got $count JOIN[s] syn rejected expected $syn_rej"
+ fi
+
print_results "join Rx" ${rc}
join_syn_tx="${join_syn_tx:-${syn_nr}}" \
chk_join_tx_nr
+ chk_fallback_nr_all
+
if $validate_checksum; then
chk_csum_nr $csum_ns1 $csum_ns2
chk_fail_nr $fail_nr $fail_nr
@@ -1523,7 +1694,6 @@ chk_add_nr()
local tx=""
local rx=""
local count
- local timeout
if [[ $ns_invert = "invert" ]]; then
ns_tx=$ns2
@@ -1532,15 +1702,13 @@ chk_add_nr()
rx=" server"
fi
- timeout=$(ip netns exec ${ns_tx} sysctl -n net.mptcp.add_addr_timeout)
-
print_check "add addr rx${rx}"
count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr")
if [ -z "$count" ]; then
print_skip
- # if the test configured a short timeout tolerate greater then expected
- # add addrs options, due to retransmissions
- elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then
+ # Tolerate more ADD_ADDR then expected (if any), due to retransmissions
+ elif [ "$count" != "$add_nr" ] &&
+ { [ "$add_nr" -eq 0 ] || [ "$count" -lt "$add_nr" ]; }; then
fail_test "got $count ADD_ADDR[s] expected $add_nr"
else
print_ok
@@ -1628,18 +1796,15 @@ chk_add_tx_nr()
{
local add_tx_nr=$1
local echo_tx_nr=$2
- local timeout
local count
- timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
-
print_check "add addr tx"
count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx")
if [ -z "$count" ]; then
print_skip
- # if the test configured a short timeout tolerate greater then expected
- # add addrs options, due to retransmissions
- elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
+ # Tolerate more ADD_ADDR then expected (if any), due to retransmissions
+ elif [ "$count" != "$add_tx_nr" ] &&
+ { [ "$add_tx_nr" -eq 0 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
fail_test "got $count ADD_ADDR[s] TX, expected $add_tx_nr"
else
print_ok
@@ -1950,7 +2115,8 @@ subflows_tests()
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
fi
# subflow
@@ -1979,7 +2145,8 @@ subflows_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 2 2 1
+ join_syn_rej=1 \
+ chk_join_nr 2 2 1
fi
# single subflow, dev
@@ -2125,7 +2292,8 @@ signal_address_tests()
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
pm_nl_set_limits $ns2 3 3
- run_tests $ns1 $ns2 10.0.1.1
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
chk_add_nr 3 3
fi
@@ -2137,7 +2305,8 @@ signal_address_tests()
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
pm_nl_set_limits $ns2 3 3
- run_tests $ns1 $ns2 10.0.1.1
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
join_syn_tx=3 \
chk_join_nr 1 1 1
chk_add_nr 3 3
@@ -2175,6 +2344,74 @@ signal_address_tests()
fi
}
+laminar_endp_tests()
+{
+ # no laminar endpoints: routing rules are used
+ if reset_with_tcp_filter "without a laminar endpoint" ns1 10.0.2.2 REJECT &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ join_syn_tx=1 \
+ chk_join_nr 0 0 0
+ chk_add_nr 1 1
+ fi
+
+ # laminar endpoints: this endpoint is used
+ if reset_with_tcp_filter "with a laminar endpoint" ns1 10.0.2.2 REJECT &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ fi
+
+ # laminar endpoints: these endpoints are used
+ if reset_with_tcp_filter "with multiple laminar endpoints" ns1 10.0.2.2 REJECT &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns2 dead:beef:3::2 flags laminar
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags laminar
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_add_nr 2 2
+ fi
+
+ # laminar endpoints: only one endpoint is used
+ if reset_with_tcp_filter "single laminar endpoint" ns1 10.0.2.2 REJECT &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 2 2
+ fi
+
+ # laminar endpoints: subflow and laminar flags
+ if reset_with_tcp_filter "sublow + laminar endpoints" ns1 10.0.2.2 REJECT &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ pm_nl_set_limits $ns1 0 4
+ pm_nl_set_limits $ns2 2 4
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,laminar
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,laminar
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ fi
+}
+
link_failure_tests()
{
# accept and use add_addr with additional subflows and link loss
@@ -2319,7 +2556,7 @@ remove_tests()
if reset "remove single subflow"; then
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
addr_nr_ns2=-1 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
@@ -2332,8 +2569,8 @@ remove_tests()
if reset "remove multiple subflows"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 0 2
- pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
addr_nr_ns2=-2 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
@@ -2344,7 +2581,7 @@ remove_tests()
# single address, remove
if reset "remove single address"; then
pm_nl_set_limits $ns1 0 1
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
pm_nl_set_limits $ns2 1 1
addr_nr_ns1=-1 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
@@ -2357,9 +2594,9 @@ remove_tests()
# subflow and signal, remove
if reset "remove subflow and signal"; then
pm_nl_set_limits $ns1 0 2
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
pm_nl_set_limits $ns2 1 2
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
@@ -2371,10 +2608,10 @@ remove_tests()
# subflows and signal, remove
if reset "remove subflows and signal"; then
pm_nl_set_limits $ns1 0 3
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
pm_nl_set_limits $ns2 1 3
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup
addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
@@ -2386,9 +2623,9 @@ remove_tests()
# addresses remove
if reset "remove addresses"; then
pm_nl_set_limits $ns1 3 3
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
- pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
- pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
+ pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup
pm_nl_set_limits $ns2 3 3
addr_nr_ns1=-3 speed=10 \
run_tests $ns1 $ns2 10.0.1.1
@@ -2401,10 +2638,10 @@ remove_tests()
# invalid addresses remove
if reset "remove invalid addresses"; then
pm_nl_set_limits $ns1 3 3
- pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup
# broadcast IP: no packet for this address will be received on ns1
- pm_nl_add_endpoint $ns1 224.0.0.1 flags signal
- pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 224.0.0.1 flags signal,backup
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
pm_nl_set_limits $ns2 2 2
addr_nr_ns1=-3 speed=10 \
run_tests $ns1 $ns2 10.0.1.1
@@ -2418,10 +2655,10 @@ remove_tests()
# subflows and signal, flush
if reset "flush subflows and signal"; then
pm_nl_set_limits $ns1 0 3
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
pm_nl_set_limits $ns2 1 3
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup
addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
@@ -2434,9 +2671,9 @@ remove_tests()
if reset "flush subflows"; then
pm_nl_set_limits $ns1 3 3
pm_nl_set_limits $ns2 3 3
- pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150
- pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
- pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup id 150
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup
addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3
@@ -2453,9 +2690,9 @@ remove_tests()
# addresses flush
if reset "flush addresses"; then
pm_nl_set_limits $ns1 3 3
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
- pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
- pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
+ pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup
pm_nl_set_limits $ns2 3 3
addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
@@ -2468,9 +2705,9 @@ remove_tests()
# invalid addresses flush
if reset "flush invalid addresses"; then
pm_nl_set_limits $ns1 3 3
- pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
- pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
- pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup
+ pm_nl_add_endpoint $ns1 10.0.14.1 flags signal,backup
pm_nl_set_limits $ns2 3 3
addr_nr_ns1=-8 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
@@ -2739,7 +2976,11 @@ mixed_tests()
pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
speed=slow \
run_tests $ns1 $ns2 dead:beef:2::1
- chk_join_nr 1 1 1
+ if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_fullmesh_max$"; then
+ chk_join_nr 0 0 0
+ else
+ chk_join_nr 1 1 1
+ fi
fi
# fullmesh still tries to create all the possibly subflows with
@@ -3020,6 +3261,133 @@ add_addr_ports_tests()
fi
}
+bind_tests()
+{
+ # bind to one address should not allow extra subflows to other addresses
+ if reset "bind main address v4, no join v4"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ bind_addr="10.0.1.1" \
+ run_tests $ns1 $ns2 10.0.1.1
+ join_syn_tx=1 \
+ chk_join_nr 0 0 0
+ chk_add_nr 1 1
+ fi
+
+ # bind to one address should not allow extra subflows to other addresses
+ if reset "bind main address v6, no join v6"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+ bind_addr="dead:beef:1::1" \
+ run_tests $ns1 $ns2 dead:beef:1::1
+ join_syn_tx=1 \
+ chk_join_nr 0 0 0
+ chk_add_nr 1 1
+ fi
+
+ # multiple binds to allow extra subflows to other addresses
+ if reset "multiple bind to allow joins v4"; then
+ local extra_bind
+
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+
+ # Launching another app listening on a different address
+ # Note: it could be a totally different app, e.g. nc, socat, ...
+ ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+ -s MPTCP 10.0.2.1 &
+ extra_bind=$!
+
+ bind_addr="10.0.1.1" \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+
+ kill ${extra_bind}
+ fi
+
+ # multiple binds to allow extra subflows to other addresses
+ if reset "multiple bind to allow joins v6"; then
+ local extra_bind
+
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+
+ # Launching another app listening on a different address
+ # Note: it could be a totally different app, e.g. nc, socat, ...
+ ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+ -s MPTCP dead:beef:2::1 &
+ extra_bind=$!
+
+ bind_addr="dead:beef:1::1" \
+ run_tests $ns1 $ns2 dead:beef:1::1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+
+ kill ${extra_bind}
+ fi
+
+ # multiple binds to allow extra subflows to other addresses: v6 LL case
+ if reset "multiple bind to allow joins v6 link-local routing"; then
+ local extra_bind ns1ll1 ns1ll2
+
+ ns1ll1="$(get_ll_addr $ns1 ns1eth1)"
+ ns1ll2="$(get_ll_addr $ns1 ns1eth2)"
+
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal
+
+ wait_ll_ready $ns1 # to be able to bind
+ wait_ll_ready $ns2 # also needed to bind on the client side
+ ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+ -s MPTCP "${ns1ll2}%ns1eth2" &
+ extra_bind=$!
+
+ bind_addr="${ns1ll1}%ns1eth1" \
+ run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1"
+ # it is not possible to connect to the announced LL addr without
+ # specifying the outgoing interface.
+ join_connect_err=1 \
+ chk_join_nr 0 0 0
+ chk_add_nr 1 1
+
+ kill ${extra_bind}
+ fi
+
+ # multiple binds to allow extra subflows to v6 LL addresses: laminar
+ if reset "multiple bind to allow joins v6 link-local laminar" &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then
+ local extra_bind ns1ll1 ns1ll2 ns2ll2
+
+ ns1ll1="$(get_ll_addr $ns1 ns1eth1)"
+ ns1ll2="$(get_ll_addr $ns1 ns1eth2)"
+ ns2ll2="$(get_ll_addr $ns2 ns2eth2)"
+
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal
+ pm_nl_add_endpoint $ns2 "${ns2ll2}" flags laminar dev ns2eth2
+
+ wait_ll_ready $ns1 # to be able to bind
+ wait_ll_ready $ns2 # also needed to bind on the client side
+ ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \
+ -s MPTCP "${ns1ll2}%ns1eth2" &
+ extra_bind=$!
+
+ bind_addr="${ns1ll1}%ns1eth1" \
+ run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1"
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+
+ kill ${extra_bind}
+ fi
+}
+
syncookies_tests()
{
# single subflow, syncookies
@@ -3048,7 +3416,8 @@ syncookies_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 2 1 1
+ join_syn_rej=1 \
+ chk_join_nr 2 1 1
fi
# test signal address with cookies
@@ -3160,6 +3529,17 @@ deny_join_id0_tests()
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
fi
+
+ # default limits, server deny join id 0 + signal
+ if reset_with_allow_join_id0 "default limits, server deny join id 0" 0 1; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ fi
}
fullmesh_tests()
@@ -3275,7 +3655,6 @@ fullmesh_tests()
fastclose_tests()
{
if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
- MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=1024 fastclose=client \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
@@ -3284,7 +3663,6 @@ fastclose_tests()
fi
if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
- MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=1024 fastclose=server \
run_tests $ns1 $ns2 10.0.1.1
join_rst_nr=1 \
@@ -3310,6 +3688,7 @@ fail_tests()
join_csum_ns1=+1 join_csum_ns2=+0 \
join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \
join_corrupted_pkts="$(pedit_action_pkts)" \
+ fb_ns1="fb_dss=1" fb_ns2="fb_infinite_map_tx=1" \
chk_join_nr 0 0 0
chk_fail_nr 1 -1 invert
fi
@@ -3532,7 +3911,8 @@ userspace_tests()
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
fi
# userspace pm type does not send join
@@ -3555,7 +3935,8 @@ userspace_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
chk_prio_nr 0 0 0 0
fi
@@ -3578,7 +3959,7 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns2 2 2
- { speed=5 \
+ { timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns1
@@ -3603,7 +3984,7 @@ userspace_tests()
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
kill_events_pids
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
# userspace pm create destroy subflow
@@ -3611,7 +3992,7 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- { speed=5 \
+ { timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
@@ -3631,7 +4012,7 @@ userspace_tests()
chk_mptcp_info subflows 0 subflows 0
chk_subflows_total 1 1
kill_events_pids
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
# userspace pm create id 0 subflow
@@ -3639,7 +4020,7 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- { speed=5 \
+ { timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
@@ -3652,7 +4033,7 @@ userspace_tests()
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 2 2
kill_events_pids
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
# userspace pm remove initial subflow
@@ -3660,7 +4041,7 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns2
pm_nl_set_limits $ns1 0 1
- { speed=5 \
+ { timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns2
@@ -3676,7 +4057,7 @@ userspace_tests()
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 1 1
kill_events_pids
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
# userspace pm send RM_ADDR for ID 0
@@ -3684,7 +4065,7 @@ userspace_tests()
continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
set_userspace_pm $ns1
pm_nl_set_limits $ns2 1 1
- { speed=5 \
+ { timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
wait_mpj $ns1
@@ -3702,7 +4083,7 @@ userspace_tests()
chk_mptcp_info subflows 1 subflows 1
chk_subflows_total 1 1
kill_events_pids
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
}
@@ -3711,11 +4092,11 @@ endpoint_tests()
# subflow_rebuild_header is needed to support the implicit flag
# userspace pm type prevents add_addr
if reset "implicit EP" &&
- mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 2 2
pm_nl_set_limits $ns2 2 2
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- { speed=slow \
+ { timeout_test=120 test_linkfail=128 speed=slow \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
@@ -3732,17 +4113,17 @@ endpoint_tests()
pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
pm_nl_check_endpoint "modif is allowed" \
$ns2 10.0.2.2 id 1 flags signal
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
fi
if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT &&
- mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
start_events
pm_nl_set_limits $ns1 0 3
pm_nl_set_limits $ns2 0 3
pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
- { test_linkfail=4 speed=5 \
+ { timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
@@ -3787,7 +4168,7 @@ endpoint_tests()
chk_mptcp_info subflows 3 subflows 3
done
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
kill_events_pids
chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1
@@ -3812,14 +4193,15 @@ endpoint_tests()
# remove and re-add
if reset_with_events "delete re-add signal" &&
- mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=0
pm_nl_set_limits $ns1 0 3
pm_nl_set_limits $ns2 3 3
pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
# broadcast IP: no packet for this address will be received on ns1
pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal
- { test_linkfail=4 speed=5 \
+ { timeout_test=120 test_linkfail=128 speed=5 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
@@ -3828,39 +4210,46 @@ endpoint_tests()
$ns1 10.0.2.1 id 1 flags signal
chk_subflow_nr "before delete" 2
chk_mptcp_info subflows 1 subflows 1
+ chk_mptcp_info add_addr_signal 2 add_addr_accepted 1
pm_nl_del_endpoint $ns1 1 10.0.2.1
pm_nl_del_endpoint $ns1 2 224.0.0.1
sleep 0.5
chk_subflow_nr "after delete" 1
chk_mptcp_info subflows 0 subflows 0
+ chk_mptcp_info add_addr_signal 0 add_addr_accepted 0
pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
wait_mpj $ns2
chk_subflow_nr "after re-add" 3
chk_mptcp_info subflows 2 subflows 2
+ chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
pm_nl_del_endpoint $ns1 42 10.0.1.1
sleep 0.5
chk_subflow_nr "after delete ID 0" 2
chk_mptcp_info subflows 2 subflows 2
+ chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal
wait_mpj $ns2
chk_subflow_nr "after re-add ID 0" 3
chk_mptcp_info subflows 3 subflows 3
+ chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
pm_nl_del_endpoint $ns1 99 10.0.1.1
sleep 0.5
chk_subflow_nr "after re-delete ID 0" 2
chk_mptcp_info subflows 2 subflows 2
+ chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal
wait_mpj $ns2
chk_subflow_nr "after re-re-add ID 0" 3
chk_mptcp_info subflows 3 subflows 3
- mptcp_lib_kill_wait $tests_pid
+ chk_mptcp_info add_addr_signal 3 add_addr_accepted 2
+ mptcp_lib_kill_group_wait $tests_pid
kill_events_pids
chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1
@@ -3886,13 +4275,13 @@ endpoint_tests()
# flush and re-add
if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT &&
- mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 0 2
pm_nl_set_limits $ns2 1 2
# broadcast IP: no packet for this address will be received on ns1
pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal
pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow
- { test_linkfail=4 speed=20 \
+ { timeout_test=120 test_linkfail=128 speed=20 \
run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null
local tests_pid=$!
@@ -3908,7 +4297,7 @@ endpoint_tests()
wait_mpj $ns2
pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal
wait_mpj $ns2
- mptcp_lib_kill_wait $tests_pid
+ mptcp_lib_kill_group_wait $tests_pid
join_syn_tx=3 join_connect_err=1 \
chk_join_nr 2 2 2
@@ -3948,6 +4337,7 @@ all_tests_sorted=(
f@subflows_tests
e@subflows_error_tests
s@signal_address_tests
+ L@laminar_endp_tests
l@link_failure_tests
t@add_addr_timeout_tests
r@remove_tests
@@ -3957,6 +4347,7 @@ all_tests_sorted=(
M@mixed_tests
b@backup_tests
p@add_addr_ports_tests
+ B@bind_tests
k@syncookies_tests
S@checksum_tests
d@deny_join_id0_tests
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 975d4d4c862a..5fea7e7df628 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -2,7 +2,6 @@
# SPDX-License-Identifier: GPL-2.0
. "$(dirname "${0}")/../lib.sh"
-. "$(dirname "${0}")/../net_helper.sh"
readonly KSFT_PASS=0
readonly KSFT_FAIL=1
@@ -107,6 +106,36 @@ mptcp_lib_pr_info() {
mptcp_lib_print_info "INFO: ${*}"
}
+mptcp_lib_pr_nstat() {
+ local ns="${1}"
+ local hist="/tmp/${ns}.out"
+
+ if [ -f "${hist}" ]; then
+ awk '$2 != 0 { print " "$0 }' "${hist}"
+ else
+ ip netns exec "${ns}" nstat -as | grep Tcp
+ fi
+}
+
+# $1-2: listener/connector ns ; $3 port
+mptcp_lib_pr_err_stats() {
+ local lns="${1}"
+ local cns="${2}"
+ local port="${3}"
+
+ echo -en "${MPTCP_LIB_COLOR_RED}"
+ {
+ printf "\nnetns %s (listener) socket stat for %d:\n" "${lns}" "${port}"
+ ip netns exec "${lns}" ss -Menitam -o "sport = :${port}"
+ mptcp_lib_pr_nstat "${lns}"
+
+ printf "\nnetns %s (connector) socket stat for %d:\n" "${cns}" "${port}"
+ ip netns exec "${cns}" ss -Menitam -o "dport = :${port}"
+ [ "${lns}" != "${cns}" ] && mptcp_lib_pr_nstat "${cns}"
+ } 1>&2
+ echo -en "${MPTCP_LIB_COLOR_RESET}"
+}
+
# SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all
# features using the last version of the kernel and the selftests to make sure
# a test is not being skipped by mistake.
@@ -310,12 +339,28 @@ mptcp_lib_result_print_all_tap() {
# get the value of keyword $1 in the line marked by keyword $2
mptcp_lib_get_info_value() {
- grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+ grep "${2}" 2>/dev/null |
+ sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+ # the ';q' at the end limits to the first matched entry.
}
# $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]]
mptcp_lib_evts_get_info() {
- grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
+ grep "${4:-}" "${2}" 2>/dev/null |
+ mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
+}
+
+mptcp_lib_wait_timeout() {
+ local timeout_test="${1}"
+ local listener_ns="${2}"
+ local connector_ns="${3}"
+ local port="${4}"
+ shift 4 # rest are PIDs
+
+ sleep "${timeout_test}"
+ mptcp_lib_print_err "timeout"
+ mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
+ kill "${@}" 2>/dev/null
}
# $1: PID
@@ -327,19 +372,62 @@ mptcp_lib_kill_wait() {
wait "${1}" 2>/dev/null
}
+# $1: PID
+mptcp_lib_pid_list_children() {
+ local curr="${1}"
+ # evoke 'ps' only once
+ local pids="${2:-"$(ps o pid,ppid)"}"
+
+ echo "${curr}"
+
+ local pid
+ for pid in $(echo "${pids}" | awk "\$2 == ${curr} { print \$1 }"); do
+ mptcp_lib_pid_list_children "${pid}" "${pids}"
+ done
+}
+
+# $1: PID
+mptcp_lib_kill_group_wait() {
+ # Some users might not have procps-ng: cannot use "kill -- -PID"
+ mptcp_lib_pid_list_children "${1}" | xargs -r kill &>/dev/null
+ wait "${1}" 2>/dev/null
+}
+
# $1: IP address
mptcp_lib_is_v6() {
[ -z "${1##*:*}" ]
}
+mptcp_lib_nstat_init() {
+ local ns="${1}"
+
+ rm -f "/tmp/${ns}."{nstat,out}
+ NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -n
+}
+
+mptcp_lib_nstat_get() {
+ local ns="${1}"
+
+ # filter out non-*TCP stats, and the rate (last column)
+ NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -sz |
+ grep -o ".*Tcp\S\+\s\+[0-9]\+" > "/tmp/${ns}.out"
+}
+
# $1: ns, $2: MIB counter
+# Get the counter from the history (mptcp_lib_nstat_{init,get}()) if available.
+# If not, get the counter from nstat ignoring any history.
mptcp_lib_get_counter() {
local ns="${1}"
local counter="${2}"
+ local hist="/tmp/${ns}.out"
local count
- count=$(ip netns exec "${ns}" nstat -asz "${counter}" |
- awk 'NR==1 {next} {print $2}')
+ if [[ -s "${hist}" && "${counter}" == *"Tcp"* ]]; then
+ count=$(awk "/^${counter} / {print \$2; exit}" "${hist}")
+ else
+ count=$(ip netns exec "${ns}" nstat -asz "${counter}" |
+ awk 'NR==1 {next} {print $2}')
+ fi
if [ -z "${count}" ]; then
mptcp_lib_fail_if_expected_feature "${counter} counter"
return 1
@@ -361,7 +449,7 @@ mptcp_lib_make_file() {
mptcp_lib_print_file_err() {
ls -l "${1}" 1>&2
echo "Trailing bytes are: "
- tail -c 27 "${1}"
+ tail -c 32 "${1}" | od -x | head -n2
}
# $1: input file ; $2: output file ; $3: what kind of file
@@ -455,8 +543,6 @@ mptcp_lib_ns_init() {
local netns
for netns in "${@}"; do
ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1
- ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0
- ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0
done
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
index 926b0be87c99..286164f7246e 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -159,13 +159,22 @@ static const char *getxinfo_strerr(int err)
}
static void xgetaddrinfo(const char *node, const char *service,
- const struct addrinfo *hints,
+ struct addrinfo *hints,
struct addrinfo **res)
{
- int err = getaddrinfo(node, service, hints, res);
+ int err;
+again:
+ err = getaddrinfo(node, service, hints, res);
if (err) {
- const char *errstr = getxinfo_strerr(err);
+ const char *errstr;
+
+ if (err == EAI_SOCKTYPE) {
+ hints->ai_protocol = IPPROTO_TCP;
+ goto again;
+ }
+
+ errstr = getxinfo_strerr(err);
fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
node ? node : "", service ? service : "", errstr);
@@ -178,7 +187,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
{
int sock = -1;
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
.ai_flags = AI_PASSIVE | AI_NUMERICHOST
};
@@ -223,7 +232,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
const char * const port, int proto)
{
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
@@ -658,22 +667,26 @@ static void process_one_client(int fd, int pipefd)
do_getsockopts(&s, fd, ret, ret2);
if (s.mptcpi_rcv_delta != (uint64_t)ret + 1)
- xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret);
+ xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64 ", diff %" PRId64,
+ s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - (ret + 1));
/* be nice when running on top of older kernel */
if (s.pkt_stats_avail) {
if (s.last_sample.mptcpi_bytes_sent != ret2)
- xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64,
+ xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64
+ ", diff %" PRId64,
s.last_sample.mptcpi_bytes_sent, ret2,
s.last_sample.mptcpi_bytes_sent - ret2);
if (s.last_sample.mptcpi_bytes_received != ret)
- xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64,
+ xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64
+ ", diff %" PRId64,
s.last_sample.mptcpi_bytes_received, ret,
s.last_sample.mptcpi_bytes_received - ret);
if (s.last_sample.mptcpi_bytes_acked != ret)
- xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64,
- s.last_sample.mptcpi_bytes_acked, ret2,
- s.last_sample.mptcpi_bytes_acked - ret2);
+ xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64
+ ", diff %" PRId64,
+ s.last_sample.mptcpi_bytes_acked, ret,
+ s.last_sample.mptcpi_bytes_acked - ret);
}
close(fd);
@@ -713,6 +726,7 @@ static int server(int pipefd)
process_one_client(r, pipefd);
+ close(fd);
return 0;
}
@@ -838,8 +852,12 @@ int main(int argc, char *argv[])
die_perror("pipe");
s = xfork();
- if (s == 0)
- return server(pipefds[1]);
+ if (s == 0) {
+ close(pipefds[0]);
+ ret = server(pipefds[1]);
+ close(pipefds[1]);
+ return ret;
+ }
close(pipefds[1]);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 5e8d5b83e2d0..ab8bce06b262 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -95,7 +95,7 @@ init()
}
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}"
@@ -169,34 +169,44 @@ do_transfer()
cmsg+=",TCPINQ"
fi
- timeout ${timeout_test} \
- ip netns exec ${listener_ns} \
- $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \
- ${local_addr} < "$sin" > "$sout" &
+ mptcp_lib_nstat_init "${listener_ns}"
+ mptcp_lib_nstat_init "${connector_ns}"
+
+ ip netns exec ${listener_ns} \
+ $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \
+ ${local_addr} < "$sin" > "$sout" &
local spid=$!
- sleep 1
+ mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
- timeout ${timeout_test} \
- ip netns exec ${connector_ns} \
- $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \
- $connect_addr < "$cin" > "$cout" &
+ ip netns exec ${connector_ns} \
+ $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \
+ $connect_addr < "$cin" > "$cout" &
local cpid=$!
+ mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \
+ "${connector_ns}" "${port}" "${cpid}" "${spid}" &
+ local timeout_pid=$!
+
wait $cpid
local retc=$?
wait $spid
local rets=$?
+ if kill -0 $timeout_pid; then
+ # Finished before the timeout: kill the background job
+ mptcp_lib_kill_group_wait $timeout_pid
+ timeout_pid=0
+ fi
+
+ mptcp_lib_nstat_get "${listener_ns}"
+ mptcp_lib_nstat_get "${connector_ns}"
+
print_title "Transfer ${ip:2}"
- if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then
mptcp_lib_pr_fail "client exit code $retc, server $rets"
- echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
-
- echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
+ mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}"
mptcp_lib_result_fail "transfer ${ip}"
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 2e6648a2b2c0..ec6a87588191 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -32,7 +32,7 @@ ns1=""
err=$(mktemp)
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
rm -f "${err}"
@@ -70,8 +70,9 @@ format_endpoints() {
mptcp_lib_pm_nl_format_endpoints "${@}"
}
+# This function is invoked indirectly
+#shellcheck disable=SC2317,SC2329
get_endpoint() {
- # shellcheck disable=SC2317 # invoked indirectly
mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}"
}
@@ -198,6 +199,7 @@ set_limits 1 9 2>/dev/null
check "get_limits" "${default_limits}" "subflows above hard limit"
set_limits 8 8
+flush_endpoint ## to make sure it doesn't affect the limits
check "get_limits" "$(format_limits 8 8)" "set limits"
flush_endpoint
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 994a556f46c1..65b374232ff5 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -2,6 +2,7 @@
#include <errno.h>
#include <error.h>
+#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -113,6 +114,8 @@ static int capture_events(int fd, int event_group)
error(1, errno, "could not join the " MPTCP_PM_EV_GRP_NAME " mcast group");
do {
+ bool server_side = false;
+
FD_ZERO(&rfds);
FD_SET(fd, &rfds);
res_len = NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
@@ -187,11 +190,22 @@ static int capture_events(int fd, int event_group)
else if (attrs->rta_type == MPTCP_ATTR_ERROR)
fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs));
else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE)
- fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs));
+ server_side = !!*(__u8 *)RTA_DATA(attrs);
+ else if (attrs->rta_type == MPTCP_ATTR_FLAGS) {
+ __u16 flags = *(__u16 *)RTA_DATA(attrs);
+
+ /* only print when present, easier */
+ if (flags & MPTCP_PM_EV_FLAG_DENY_JOIN_ID0)
+ fprintf(stderr, ",deny_join_id0:1");
+ if (flags & MPTCP_PM_EV_FLAG_SERVER_SIDE)
+ server_side = true;
+ }
attrs = RTA_NEXT(attrs, msg_len);
}
}
+ if (server_side)
+ fprintf(stderr, ",server_side:1");
fprintf(stderr, "\n");
} while (1);
@@ -816,6 +830,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW;
else if (!strcmp(tok, "signal"))
flags |= MPTCP_PM_ADDR_FLAG_SIGNAL;
+ else if (!strcmp(tok, "laminar"))
+ flags |= MPTCP_PM_ADDR_FLAG_LAMINAR;
else if (!strcmp(tok, "backup"))
flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
else if (!strcmp(tok, "fullmesh"))
@@ -1004,6 +1020,13 @@ static void print_addr(struct rtattr *attrs, int len)
printf(",");
}
+ if (flags & MPTCP_PM_ADDR_FLAG_LAMINAR) {
+ printf("laminar");
+ flags &= ~MPTCP_PM_ADDR_FLAG_LAMINAR;
+ if (flags)
+ printf(",");
+ }
+
if (flags & MPTCP_PM_ADDR_FLAG_BACKUP) {
printf("backup");
flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 8fa77c8e9b65..806aaa7d2d61 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -28,14 +28,14 @@ size=0
usage() {
echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]"
- echo -e "\t-b: bail out after first error, otherwise runs al testcases"
+ echo -e "\t-b: bail out after first error, otherwise runs all testcases"
echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
echo -e "\t-d: debug this script"
echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'"
}
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
rm -f "$cout" "$sout"
@@ -155,50 +155,61 @@ do_transfer()
sleep 1
fi
- timeout ${timeout_test} \
- ip netns exec ${ns3} \
- ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
- 0.0.0.0 < "$sin" > "$sout" &
+ mptcp_lib_nstat_init "${ns3}"
+ mptcp_lib_nstat_init "${ns1}"
+
+ ip netns exec ${ns3} \
+ ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
+ 0.0.0.0 < "$sin" > "$sout" &
local spid=$!
mptcp_lib_wait_local_port_listen "${ns3}" "${port}"
- timeout ${timeout_test} \
- ip netns exec ${ns1} \
- ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \
- 10.0.3.3 < "$cin" > "$cout" &
+ ip netns exec ${ns1} \
+ ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \
+ 10.0.3.3 < "$cin" > "$cout" &
local cpid=$!
+ mptcp_lib_wait_timeout "${timeout_test}" "${ns3}" "${ns1}" "${port}" \
+ "${cpid}" "${spid}" &
+ local timeout_pid=$!
+
wait $cpid
local retc=$?
wait $spid
local rets=$?
+ if kill -0 $timeout_pid; then
+ # Finished before the timeout: kill the background job
+ mptcp_lib_kill_group_wait $timeout_pid
+ timeout_pid=0
+ fi
+
if $capture; then
sleep 1
kill ${cappid_listener}
kill ${cappid_connector}
fi
+ mptcp_lib_nstat_get "${ns3}"
+ mptcp_lib_nstat_get "${ns1}"
+
cmp $sin $cout > /dev/null 2>&1
local cmps=$?
cmp $cin $sout > /dev/null 2>&1
local cmpc=$?
- printf "%-16s" " max $max_time "
- if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
- [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ] &&
+ [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] &&
+ [ $timeout_pid -eq 0 ]; then
+ printf "%-16s" " max $max_time "
mptcp_lib_pr_ok
cat "$capout"
return 0
fi
- mptcp_lib_pr_fail
- echo "client exit code $retc, server $rets" 1>&2
- echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2
- ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port"
- echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2
- ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port"
+ mptcp_lib_pr_fail "client exit code $retc, server $rets"
+ mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}"
ls -l $sin $cout
ls -l $cin $sout
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 3651f73451cf..e9ae1806ab07 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -94,7 +94,7 @@ test_fail()
}
# This function is used in the cleanup trap
-#shellcheck disable=SC2317
+#shellcheck disable=SC2317,SC2329
cleanup()
{
print_title "Cleanup"
@@ -117,7 +117,36 @@ cleanup()
trap cleanup EXIT
# Create and configure network namespaces for testing
+print_title "Init"
mptcp_lib_ns_init ns1 ns2
+
+# check path_manager and pm_type sysctl mapping
+if [ -f /proc/sys/net/mptcp/path_manager ]; then
+ ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=userspace
+ pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)"
+ if [ "${pm_type}" != "1" ]; then
+ test_fail "unexpected pm_type: ${pm_type}"
+ mptcp_lib_result_print_all_tap
+ exit ${KSFT_FAIL}
+ fi
+
+ ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=error 2>/dev/null
+ pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)"
+ if [ "${pm_type}" != "1" ]; then
+ test_fail "unexpected pm_type after error: ${pm_type}"
+ mptcp_lib_result_print_all_tap
+ exit ${KSFT_FAIL}
+ fi
+
+ ip netns exec "$ns1" sysctl -q net.mptcp.pm_type=0
+ pm_name="$(ip netns exec "$ns1" sysctl -n net.mptcp.path_manager)"
+ if [ "${pm_name}" != "kernel" ]; then
+ test_fail "unexpected path-manager: ${pm_name}"
+ mptcp_lib_result_print_all_tap
+ exit ${KSFT_FAIL}
+ fi
+fi
+
for i in "$ns1" "$ns2" ;do
ip netns exec "$i" sysctl -q net.mptcp.pm_type=1
done
@@ -152,7 +181,6 @@ mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid
sleep 0.5
mptcp_lib_subtests_last_ts_reset
-print_title "Init"
print_test "Created network namespaces ns1, ns2"
test_pass
@@ -173,6 +201,9 @@ make_connection()
is_v6="v4"
fi
+ # set this on the client side only: will not affect the rest
+ ip netns exec "$ns2" sysctl -q net.mptcp.allow_join_initial_addr_port=0
+
:>"$client_evts"
:>"$server_evts"
@@ -180,7 +211,8 @@ make_connection()
ip netns exec "$ns1" \
./mptcp_connect -s MPTCP -w 300 -p $app_port -l $listen_addr > /dev/null 2>&1 &
local server_pid=$!
- sleep 0.5
+
+ mptcp_lib_wait_local_port_listen "${ns1}" "${port}"
# Run the client, transfer $file and stay connected to the server
# to conduct tests
@@ -195,23 +227,28 @@ make_connection()
local client_token
local client_port
local client_serverside
+ local client_nojoin
local server_token
local server_serverside
+ local server_nojoin
client_token=$(mptcp_lib_evts_get_info token "$client_evts")
client_port=$(mptcp_lib_evts_get_info sport "$client_evts")
client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts")
+ client_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$client_evts")
server_token=$(mptcp_lib_evts_get_info token "$server_evts")
server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts")
+ server_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$server_evts")
print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1"
- if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
- [ "$server_serverside" = 1 ]
+ if [ "${client_token}" != "" ] && [ "${server_token}" != "" ] &&
+ [ "${client_serverside:-0}" = 0 ] && [ "${server_serverside:-0}" = 1 ] &&
+ [ "${client_nojoin:-0}" = 0 ] && [ "${server_nojoin:-0}" = 1 ]
then
test_pass
print_title "Connection info: ${client_addr}:${client_port} -> ${connect_addr}:${app_port}"
else
- test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})"
+ test_fail "Expected tokens (c:${client_token} - s:${server_token}), server (c:${client_serverside} - s:${server_serverside}), nojoin (c:${client_nojoin} - s:${server_nojoin})"
mptcp_lib_result_print_all_tap
exit ${KSFT_FAIL}
fi
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 7ea5fb28c93d..1d5d3c4e7e87 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -77,6 +77,7 @@
static int cfg_cork;
static bool cfg_cork_mixed;
static int cfg_cpu = -1; /* default: pin to last cpu */
+static int cfg_expect_zerocopy = -1;
static int cfg_family = PF_UNSPEC;
static int cfg_ifindex = 1;
static int cfg_payload_len;
@@ -92,9 +93,9 @@ static socklen_t cfg_alen;
static struct sockaddr_storage cfg_dst_addr;
static struct sockaddr_storage cfg_src_addr;
+static int exitcode;
static char payload[IP_MAXPACKET];
static long packets, bytes, completions, expected_completions;
-static int zerocopied = -1;
static uint32_t next_completion;
static uint32_t sends_since_notify;
@@ -444,11 +445,13 @@ static bool do_recv_completion(int fd, int domain)
next_completion = hi + 1;
zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED);
- if (zerocopied == -1)
- zerocopied = zerocopy;
- else if (zerocopied != zerocopy) {
- fprintf(stderr, "serr: inconsistent\n");
- zerocopied = zerocopy;
+ if (cfg_expect_zerocopy != -1 &&
+ cfg_expect_zerocopy != zerocopy) {
+ fprintf(stderr, "serr: ee_code: %u != expected %u\n",
+ zerocopy, cfg_expect_zerocopy);
+ exitcode = 1;
+ /* suppress repeated messages */
+ cfg_expect_zerocopy = zerocopy;
}
if (cfg_verbose >= 2)
@@ -571,7 +574,7 @@ static void do_tx(int domain, int type, int protocol)
fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n",
packets, bytes >> 20, completions,
- zerocopied == 1 ? 'y' : 'n');
+ cfg_zerocopy && cfg_expect_zerocopy == 1 ? 'y' : 'n');
}
static int do_setup_rx(int domain, int type, int protocol)
@@ -715,7 +718,7 @@ static void parse_opts(int argc, char **argv)
cfg_payload_len = max_payload_len;
- while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
+ while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vzZ:")) != -1) {
switch (c) {
case '4':
if (cfg_family != PF_UNSPEC)
@@ -770,6 +773,9 @@ static void parse_opts(int argc, char **argv)
case 'z':
cfg_zerocopy = true;
break;
+ case 'Z':
+ cfg_expect_zerocopy = !!atoi(optarg);
+ break;
}
}
@@ -817,5 +823,5 @@ int main(int argc, char **argv)
else
error(1, 0, "unknown cfg_test %s", cfg_test);
- return 0;
+ return exitcode;
}
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index 89c22f5320e0..28178a38a4e7 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -6,6 +6,7 @@
set -e
readonly DEV="veth0"
+readonly DUMMY_DEV="dummy0"
readonly DEV_MTU=65535
readonly BIN="./msg_zerocopy"
@@ -14,21 +15,25 @@ readonly NSPREFIX="ns-${RAND}"
readonly NS1="${NSPREFIX}1"
readonly NS2="${NSPREFIX}2"
-readonly SADDR4='192.168.1.1'
-readonly DADDR4='192.168.1.2'
-readonly SADDR6='fd::1'
-readonly DADDR6='fd::2'
+readonly LPREFIX4='192.168.1'
+readonly RPREFIX4='192.168.2'
+readonly LPREFIX6='fd'
+readonly RPREFIX6='fc'
+
readonly path_sysctl_mem="net.core.optmem_max"
# No arguments: automated test
if [[ "$#" -eq "0" ]]; then
- $0 4 tcp -t 1
- $0 6 tcp -t 1
- $0 4 udp -t 1
- $0 6 udp -t 1
- echo "OK. All tests passed"
- exit 0
+ ret=0
+
+ $0 4 tcp -t 1 || ret=1
+ $0 6 tcp -t 1 || ret=1
+ $0 4 udp -t 1 || ret=1
+ $0 6 udp -t 1 || ret=1
+
+ [[ "$ret" == "0" ]] && echo "OK. All tests passed"
+ exit $ret
fi
# Argument parsing
@@ -45,11 +50,18 @@ readonly EXTRA_ARGS="$@"
# Argument parsing: configure addresses
if [[ "${IP}" == "4" ]]; then
- readonly SADDR="${SADDR4}"
- readonly DADDR="${DADDR4}"
+ readonly SADDR="${LPREFIX4}.1"
+ readonly DADDR="${LPREFIX4}.2"
+ readonly DUMMY_ADDR="${RPREFIX4}.1"
+ readonly DADDR_TXONLY="${RPREFIX4}.2"
+ readonly MASK="24"
elif [[ "${IP}" == "6" ]]; then
- readonly SADDR="${SADDR6}"
- readonly DADDR="${DADDR6}"
+ readonly SADDR="${LPREFIX6}::1"
+ readonly DADDR="${LPREFIX6}::2"
+ readonly DUMMY_ADDR="${RPREFIX6}::1"
+ readonly DADDR_TXONLY="${RPREFIX6}::2"
+ readonly MASK="64"
+ readonly NODAD="nodad"
else
echo "Invalid IP version ${IP}"
exit 1
@@ -89,33 +101,61 @@ ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000"
ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+ip link add "${DUMMY_DEV}" mtu "${DEV_MTU}" netns "${NS2}" type dummy
+
# Bring the devices up
ip -netns "${NS1}" link set "${DEV}" up
ip -netns "${NS2}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DUMMY_DEV}" up
# Set fixed MAC addresses on the devices
ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
# Add fixed IP addresses to the devices
-ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
-ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
-ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad
-ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad
+ip -netns "${NS1}" addr add "${SADDR}/${MASK}" dev "${DEV}" ${NODAD}
+ip -netns "${NS2}" addr add "${DADDR}/${MASK}" dev "${DEV}" ${NODAD}
+ip -netns "${NS2}" addr add "${DUMMY_ADDR}/${MASK}" dev "${DUMMY_DEV}" ${NODAD}
+
+ip -netns "${NS1}" route add default via "${DADDR}" dev "${DEV}"
+ip -netns "${NS2}" route add default via "${DADDR_TXONLY}" dev "${DUMMY_DEV}"
+
+ip netns exec "${NS2}" sysctl -wq net.ipv4.ip_forward=1
+ip netns exec "${NS2}" sysctl -wq net.ipv6.conf.all.forwarding=1
# Optionally disable sg or csum offload to test edge cases
# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+ret=0
+
do_test() {
local readonly ARGS="$1"
- echo "ipv${IP} ${TXMODE} ${ARGS}"
- ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
+ # tx-rx test
+ # packets queued to a local socket are copied,
+ # sender notification has SO_EE_CODE_ZEROCOPY_COPIED.
+
+ echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-rx\n"
+ ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 \
+ -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" &
sleep 0.2
- ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}"
+ ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \
+ -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" -Z 0 || ret=1
wait
+
+ # next test is unconnected tx to dummy0, cannot exercise with tcp
+ [[ "${TXMODE}" == "tcp" ]] && return
+
+ # tx-only test: send out dummy0
+ # packets leaving the host are not copied,
+ # sender notification does not have SO_EE_CODE_ZEROCOPY_COPIED.
+
+ echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-only\n"
+ ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \
+ -S "${SADDR}" -D "${DADDR_TXONLY}" ${ARGS} "${TXMODE}" -Z 1 || ret=1
}
do_test "${EXTRA_ARGS}"
do_test "-z ${EXTRA_ARGS}"
-echo ok
+
+[[ "$ret" == "0" ]] && echo "OK"
diff --git a/tools/testing/selftests/net/nat6to4.sh b/tools/testing/selftests/net/nat6to4.sh
new file mode 100755
index 000000000000..0ee859b622a4
--- /dev/null
+++ b/tools/testing/selftests/net/nat6to4.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NS="ns-peer-$(mktemp -u XXXXXX)"
+
+ip netns add "${NS}"
+ip -netns "${NS}" link set lo up
+ip -netns "${NS}" route add default via 127.0.0.2 dev lo
+
+tc -n "${NS}" qdisc add dev lo ingress
+tc -n "${NS}" filter add dev lo ingress prio 4 protocol ip \
+ bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action
+
+ip netns exec "${NS}" \
+ bash -c 'echo 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abc | socat - UDP4-DATAGRAM:224.1.0.1:6666,ip-multicast-loop=1'
diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh
deleted file mode 100644
index 6596fe03c77f..000000000000
--- a/tools/testing/selftests/net/net_helper.sh
+++ /dev/null
@@ -1,25 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Helper functions
-
-wait_local_port_listen()
-{
- local listener_ns="${1}"
- local port="${2}"
- local protocol="${3}"
- local pattern
- local i
-
- pattern=":$(printf "%04X" "${port}") "
-
- # for tcp protocol additionally check the socket state
- [ ${protocol} = "tcp" ] && pattern="${pattern}0A"
- for i in $(seq 10); do
- if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \
- /proc/net/"${protocol}"* | grep -q "${pattern}"; then
- break
- fi
- sleep 0.1
- done
-}
diff --git a/tools/testing/selftests/net/netdev-l2addr.sh b/tools/testing/selftests/net/netdev-l2addr.sh
new file mode 100755
index 000000000000..18509da293e5
--- /dev/null
+++ b/tools/testing/selftests/net/netdev-l2addr.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+set -o pipefail
+
+NSIM_ADDR=2025
+TEST_ADDR="d0:be:d0:be:d0:00"
+
+RET_CODE=0
+
+cleanup() {
+ cleanup_netdevsim "$NSIM_ADDR"
+ cleanup_ns "$NS"
+}
+
+trap cleanup EXIT
+
+fail() {
+ echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+ RET_CODE=1
+}
+
+get_addr()
+{
+ local type="$1"
+ local dev="$2"
+ local ns="$3"
+
+ ip -j -n "$ns" link show dev "$dev" | jq -er ".[0].$type"
+}
+
+setup_ns NS
+
+nsim=$(create_netdevsim $NSIM_ADDR "$NS")
+
+get_addr address "$nsim" "$NS" >/dev/null || fail "Couldn't get ether addr"
+get_addr broadcast "$nsim" "$NS" >/dev/null || fail "Couldn't get brd addr"
+get_addr permaddr "$nsim" "$NS" >/dev/null && fail "Found perm_addr without setting it"
+
+ip -n "$NS" link set dev "$nsim" address "$TEST_ADDR"
+ip -n "$NS" link set dev "$nsim" brd "$TEST_ADDR"
+
+[[ "$(get_addr address "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set ether addr"
+[[ "$(get_addr broadcast "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set brd addr"
+
+if create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "FF:FF:FF:FF:FF:FF" 2>/dev/null; then
+ fail "Created netdevsim with broadcast permaddr"
+fi
+
+nsim_port=$(create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "$TEST_ADDR")
+
+get_addr address "$nsim_port" "$NS" >/dev/null || fail "Couldn't get ether addr"
+get_addr broadcast "$nsim_port" "$NS" >/dev/null || fail "Couldn't get brd addr"
+[[ "$(get_addr permaddr "$nsim_port" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't get permaddr"
+
+cleanup_netdevsim "$NSIM_ADDR" "$NS"
+
+exit $RET_CODE
diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore
index 64c4f8d9aa6c..5d2be9a00627 100644
--- a/tools/testing/selftests/net/netfilter/.gitignore
+++ b/tools/testing/selftests/net/netfilter/.gitignore
@@ -5,3 +5,4 @@ conntrack_dump_flush
conntrack_reverse_clash
sctp_collision
nf_queue
+udpclash
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
index ffe161fac8b5..ee2d1a5254f8 100644
--- a/tools/testing/selftests/net/netfilter/Makefile
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -6,42 +6,52 @@ HOSTPKG_CONFIG := pkg-config
MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
-TEST_PROGS := br_netfilter.sh bridge_brouter.sh
-TEST_PROGS += br_netfilter_queue.sh
-TEST_PROGS += conntrack_dump_flush.sh
-TEST_PROGS += conntrack_icmp_related.sh
-TEST_PROGS += conntrack_ipip_mtu.sh
-TEST_PROGS += conntrack_tcp_unreplied.sh
-TEST_PROGS += conntrack_sctp_collision.sh
-TEST_PROGS += conntrack_vrf.sh
-TEST_PROGS += conntrack_reverse_clash.sh
-TEST_PROGS += ipvs.sh
-TEST_PROGS += nf_conntrack_packetdrill.sh
-TEST_PROGS += nf_nat_edemux.sh
-TEST_PROGS += nft_audit.sh
-TEST_PROGS += nft_concat_range.sh
-TEST_PROGS += nft_conntrack_helper.sh
-TEST_PROGS += nft_fib.sh
-TEST_PROGS += nft_flowtable.sh
-TEST_PROGS += nft_meta.sh
-TEST_PROGS += nft_nat.sh
-TEST_PROGS += nft_nat_zones.sh
-TEST_PROGS += nft_queue.sh
-TEST_PROGS += nft_synproxy.sh
-TEST_PROGS += nft_tproxy_tcp.sh
-TEST_PROGS += nft_tproxy_udp.sh
-TEST_PROGS += nft_zones_many.sh
-TEST_PROGS += rpath.sh
-TEST_PROGS += vxlan_mtu_frag.sh
-TEST_PROGS += xt_string.sh
+TEST_PROGS := \
+ br_netfilter.sh \
+ br_netfilter_queue.sh \
+ bridge_brouter.sh \
+ conntrack_clash.sh \
+ conntrack_dump_flush.sh \
+ conntrack_icmp_related.sh \
+ conntrack_ipip_mtu.sh \
+ conntrack_resize.sh \
+ conntrack_reverse_clash.sh \
+ conntrack_sctp_collision.sh \
+ conntrack_tcp_unreplied.sh \
+ conntrack_vrf.sh \
+ ipvs.sh \
+ nf_conntrack_packetdrill.sh \
+ nf_nat_edemux.sh \
+ nft_audit.sh \
+ nft_concat_range.sh \
+ nft_conntrack_helper.sh \
+ nft_fib.sh \
+ nft_flowtable.sh \
+ nft_interface_stress.sh \
+ nft_meta.sh \
+ nft_nat.sh \
+ nft_nat_zones.sh \
+ nft_queue.sh \
+ nft_synproxy.sh \
+ nft_tproxy_tcp.sh \
+ nft_tproxy_udp.sh \
+ nft_zones_many.sh \
+ rpath.sh \
+ vxlan_mtu_frag.sh \
+ xt_string.sh \
+# end of TEST_PROGS
TEST_PROGS_EXTENDED = nft_concat_range_perf.sh
-TEST_GEN_FILES = audit_logread
-TEST_GEN_FILES += connect_close nf_queue
-TEST_GEN_FILES += conntrack_dump_flush
-TEST_GEN_FILES += conntrack_reverse_clash
-TEST_GEN_FILES += sctp_collision
+TEST_GEN_FILES = \
+ audit_logread \
+ connect_close \
+ conntrack_dump_flush \
+ conntrack_reverse_clash \
+ nf_queue \
+ sctp_collision \
+ udpclash \
+# end of TEST_GEN_FILES
include ../../lib.mk
@@ -50,10 +60,14 @@ $(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS)
$(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS)
$(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS)
+$(OUTPUT)/udpclash: LDLIBS += -lpthread
-TEST_FILES := lib.sh
-TEST_FILES += packetdrill
+TEST_FILES := \
+ lib.sh \
+ packetdrill \
+# end of TEST_FILES
TEST_INCLUDES := \
+ $(wildcard ../lib/sh/*.sh) \
../lib.sh \
- $(wildcard ../lib/sh/*.sh)
+# end of TEST_INCLUDES
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh
index c28379a965d8..011de8763094 100755
--- a/tools/testing/selftests/net/netfilter/br_netfilter.sh
+++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh
@@ -13,6 +13,12 @@ source lib.sh
checktool "nft --version" "run test without nft tool"
+read t < /proc/sys/kernel/tainted
+if [ "$t" -ne 0 ];then
+ echo SKIP: kernel is tainted
+ exit $ksft_skip
+fi
+
cleanup() {
cleanup_all_ns
}
@@ -54,9 +60,6 @@ bcast_ping()
done
}
-ip netns exec "$ns0" sysctl -q net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q net.ipv4.conf.default.rp_filter=0
-
if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then
echo "SKIP: Can't create veth device"
exit $ksft_skip
@@ -165,6 +168,7 @@ if [ "$t" -eq 0 ];then
echo PASS: kernel not tainted
else
echo ERROR: kernel is tainted
+ dmesg
ret=1
fi
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
index 6a764d70ab06..4788641717d9 100755
--- a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
+++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh
@@ -4,6 +4,12 @@ source lib.sh
checktool "nft --version" "run test without nft tool"
+read t < /proc/sys/kernel/tainted
+if [ "$t" -ne 0 ];then
+ echo SKIP: kernel is tainted
+ exit $ksft_skip
+fi
+
cleanup() {
cleanup_all_ns
}
@@ -72,6 +78,7 @@ if [ "$t" -eq 0 ];then
echo PASS: kernel not tainted
else
echo ERROR: kernel is tainted
+ dmesg
exit 1
fi
diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
index 2549b6590693..ea76f2bc2f59 100755
--- a/tools/testing/selftests/net/netfilter/bridge_brouter.sh
+++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
@@ -22,8 +22,6 @@ trap cleanup EXIT
setup_ns nsbr ns1 ns2
-ip netns exec "$nsbr" sysctl -q net.ipv4.conf.default.rp_filter=0
-ip netns exec "$nsbr" sysctl -q net.ipv4.conf.all.rp_filter=0
if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then
echo "SKIP: Can't create veth device"
exit $ksft_skip
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 43d8b500d391..12ce61fa15a8 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -7,64 +7,74 @@ CONFIG_BRIDGE_EBT_REDIRECT=m
CONFIG_BRIDGE_EBT_T_FILTER=m
CONFIG_BRIDGE_NETFILTER=m
CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m
CONFIG_BRIDGE_VLAN_FILTERING=y
CONFIG_CGROUP_BPF=y
+CONFIG_CRYPTO_SHA1=m
CONFIG_DUMMY=m
+CONFIG_INET_DIAG=m
CONFIG_INET_ESP=m
-CONFIG_IP_NF_MATCH_RPFILTER=m
-CONFIG_IP6_NF_MATCH_RPFILTER=m
-CONFIG_IP_NF_IPTABLES=m
+CONFIG_INET_SCTP_DIAG=m
+CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES_LEGACY=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_RAW=m
CONFIG_IP_NF_FILTER=m
-CONFIG_IP6_NF_FILTER=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP_NF_IPTABLES_LEGACY=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_RAW=m
-CONFIG_IP6_NF_RAW=m
CONFIG_IP_SCTP=m
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IP_VS=m
CONFIG_IP_VS_PROTO_TCP=y
CONFIG_IP_VS_RR=m
-CONFIG_IPV6=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_MACVLAN=m
CONFIG_NAMESPACES=y
CONFIG_NET_CLS_U32=m
-CONFIG_NET_L3_MASTER_DEV=y
-CONFIG_NET_NS=y
-CONFIG_NET_SCH_NETEM=m
-CONFIG_NET_SCH_HTB=m
-CONFIG_NET_IPIP=m
-CONFIG_NET_VRF=y
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NETFILTER_NETLINK=m
CONFIG_NETFILTER_NETLINK_QUEUE=m
CONFIG_NETFILTER_SYNPROXY=m
CONFIG_NETFILTER_XTABLES=m
-CONFIG_NETFILTER_XT_NAT=m
+CONFIG_NETFILTER_XTABLES_LEGACY=y
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
CONFIG_NETFILTER_XT_MATCH_STATE=m
CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_NAT=m
CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
+CONFIG_NET_IPIP=m
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_NS=y
+CONFIG_NET_PKTGEN=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_VRF=y
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_PROTO_SCTP=y
CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_FLOW_TABLE_INET=m
CONFIG_NF_LOG_IPV4=m
CONFIG_NF_LOG_IPV6=m
CONFIG_NF_NAT=m
-CONFIG_NF_NAT_REDIRECT=y
CONFIG_NF_NAT_MASQUERADE=y
+CONFIG_NF_NAT_REDIRECT=y
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_BRIDGE=m
CONFIG_NF_TABLES_INET=y
CONFIG_NF_TABLES_IPV4=y
CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_NETDEV=y
-CONFIG_NF_FLOW_TABLE_INET=m
CONFIG_NFT_BRIDGE_META=m
CONFIG_NFT_COMPAT=m
CONFIG_NFT_CT=m
@@ -83,12 +93,9 @@ CONFIG_NFT_QUOTA=m
CONFIG_NFT_REDIR=m
CONFIG_NFT_SYNPROXY=m
CONFIG_NFT_TPROXY=m
+CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VLAN_8021Q=m
CONFIG_VXLAN=m
-CONFIG_XFRM_USER=m
CONFIG_XFRM_STATISTICS=y
-CONFIG_NET_PKTGEN=m
-CONFIG_TUN=m
-CONFIG_INET_DIAG=m
-CONFIG_SCTP_DIAG=m
+CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
new file mode 100755
index 000000000000..7fc6c5dbd551
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+clash_resolution_active=0
+dport=22111
+ret=0
+
+cleanup()
+{
+ # netns cleanup also zaps any remaining socat echo server.
+ cleanup_all_ns
+}
+
+checktool "nft --version" "run test without nft"
+checktool "conntrack --version" "run test without conntrack"
+checktool "socat -h" "run test without socat"
+
+trap cleanup EXIT
+
+setup_ns nsclient1 nsclient2 nsrouter
+
+ip netns exec "$nsrouter" nft -f -<<EOF
+table ip t {
+ chain lb {
+ meta l4proto udp dnat to numgen random mod 3 map { 0 : 10.0.2.1 . 9000, 1 : 10.0.2.1 . 9001, 2 : 10.0.2.1 . 9002 }
+ }
+
+ chain prerouting {
+ type nat hook prerouting priority dstnat
+
+ udp dport $dport counter jump lb
+ }
+
+ chain output {
+ type nat hook output priority dstnat
+
+ udp dport $dport counter jump lb
+ }
+}
+EOF
+
+load_simple_ruleset()
+{
+ip netns exec "$1" nft -f -<<EOF
+table ip t {
+ chain forward {
+ type filter hook forward priority 0
+
+ ct state new counter
+ }
+}
+EOF
+}
+
+spawn_servers()
+{
+ local ns="$1"
+ local ports="9000 9001 9002"
+
+ for port in $ports; do
+ ip netns exec "$ns" socat UDP-RECVFROM:$port,fork PIPE 2>/dev/null &
+ done
+
+ for port in $ports; do
+ wait_local_port_listen "$ns" $port udp
+ done
+}
+
+add_addr()
+{
+ local ns="$1"
+ local dev="$2"
+ local i="$3"
+ local j="$4"
+
+ ip -net "$ns" link set "$dev" up
+ ip -net "$ns" addr add "10.0.$i.$j/24" dev "$dev"
+}
+
+ping_test()
+{
+ local ns="$1"
+ local daddr="$2"
+
+ if ! ip netns exec "$ns" ping -q -c 1 $daddr > /dev/null;then
+ echo "FAIL: ping from $ns to $daddr"
+ exit 1
+ fi
+}
+
+run_one_clash_test()
+{
+ local ns="$1"
+ local ctns="$2"
+ local daddr="$3"
+ local dport="$4"
+ local entries
+ local cre
+
+ if ! ip netns exec "$ns" timeout 30 ./udpclash $daddr $dport;then
+ echo "INFO: did not receive expected number of replies for $daddr:$dport"
+ ip netns exec "$ctns" conntrack -S
+ # don't fail: check if clash resolution triggered after all.
+ fi
+
+ entries=$(ip netns exec "$ctns" conntrack -S | wc -l)
+ cre=$(ip netns exec "$ctns" conntrack -S | grep "clash_resolve=0" | wc -l)
+
+ if [ "$cre" -ne "$entries" ];then
+ clash_resolution_active=1
+ return 0
+ fi
+
+ # not a failure: clash resolution logic did not trigger.
+ # With right timing, xmit completed sequentially and
+ # no parallel insertion occurs.
+ return $ksft_skip
+}
+
+run_clash_test()
+{
+ local ns="$1"
+ local ctns="$2"
+ local daddr="$3"
+ local dport="$4"
+ local softerr=0
+
+ for i in $(seq 1 10);do
+ run_one_clash_test "$ns" "$ctns" "$daddr" "$dport"
+ local rv=$?
+ if [ $rv -eq 0 ];then
+ echo "PASS: clash resolution test for $daddr:$dport on attempt $i"
+ return 0
+ elif [ $rv -eq $ksft_skip ]; then
+ softerr=1
+ fi
+ done
+
+ [ $softerr -eq 1 ] && echo "SKIP: clash resolution for $daddr:$dport did not trigger"
+}
+
+ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter"
+ip link add veth0 netns "$nsclient2" type veth peer name veth1 netns "$nsrouter"
+add_addr "$nsclient1" veth0 1 1
+add_addr "$nsclient2" veth0 2 1
+add_addr "$nsrouter" veth0 1 99
+add_addr "$nsrouter" veth1 2 99
+
+ip -net "$nsclient1" route add default via 10.0.1.99
+ip -net "$nsclient2" route add default via 10.0.2.99
+ip netns exec "$nsrouter" sysctl -q net.ipv4.ip_forward=1
+
+ping_test "$nsclient1" 10.0.1.99
+ping_test "$nsclient1" 10.0.2.1
+ping_test "$nsclient2" 10.0.1.1
+
+spawn_servers "$nsclient2"
+
+# exercise clash resolution with nat:
+# nsrouter is supposed to dnat to 10.0.2.1:900{0,1,2,3}.
+run_clash_test "$nsclient1" "$nsrouter" 10.0.1.99 "$dport"
+
+# exercise clash resolution without nat.
+load_simple_ruleset "$nsclient2"
+run_clash_test "$nsclient2" "$nsclient2" 127.0.0.1 9001
+
+if [ $clash_resolution_active -eq 0 ];then
+ [ "$ret" -eq 0 ] && ret=$ksft_skip
+ echo "SKIP: Clash resolution did not trigger"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
index 5f827e10717d..5cecb8a1bc94 100644
--- a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
+++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
@@ -10,7 +10,7 @@
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
-#include "../../kselftest_harness.h"
+#include "kselftest_harness.h"
#define TEST_ZONE_ID 123
#define NF_CT_DEFAULT_ZONE_ID 0
diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
new file mode 100755
index 000000000000..615fe3c6f405
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
@@ -0,0 +1,515 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+checktool "conntrack --version" "run test without conntrack"
+checktool "nft --version" "run test without nft tool"
+
+init_net_max=0
+ct_buckets=0
+tmpfile=""
+tmpfile_proc=""
+tmpfile_uniq=""
+ret=0
+have_socat=0
+
+socat -h > /dev/null && have_socat=1
+
+insert_count=2000
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400
+
+modprobe -q nf_conntrack
+if ! sysctl -q net.netfilter.nf_conntrack_max >/dev/null;then
+ echo "SKIP: conntrack sysctls not available"
+ exit $KSFT_SKIP
+fi
+
+init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) || exit 1
+ct_buckets=$(sysctl -n net.netfilter.nf_conntrack_buckets) || exit 1
+
+cleanup() {
+ cleanup_all_ns
+
+ rm -f "$tmpfile" "$tmpfile_proc" "$tmpfile_uniq"
+
+ # restore original sysctl setting
+ sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+ sysctl -q net.netfilter.nf_conntrack_buckets=$ct_buckets
+}
+trap cleanup EXIT
+
+check_max_alias()
+{
+ local expected="$1"
+ # old name, expected to alias to the first, i.e. changing one
+ # changes the other as well.
+ local lv=$(sysctl -n net.nf_conntrack_max)
+
+ if [ $expected -ne "$lv" ];then
+ echo "nf_conntrack_max sysctls should have identical values"
+ exit 1
+ fi
+}
+
+insert_ctnetlink() {
+ local ns="$1"
+ local count="$2"
+ local i=0
+ local bulk=16
+
+ while [ $i -lt $count ] ;do
+ ip netns exec "$ns" bash -c "for i in \$(seq 1 $bulk); do \
+ if ! conntrack -I -s \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
+ -d \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
+ --protonum 17 --timeout 3600 --status ASSURED,SEEN_REPLY --sport \$RANDOM --dport 53; then \
+ return;\
+ fi & \
+ done ; wait" 2>/dev/null
+
+ i=$((i+bulk))
+ done
+}
+
+check_ctcount() {
+ local ns="$1"
+ local count="$2"
+ local msg="$3"
+
+ local now=$(ip netns exec "$ns" conntrack -C)
+
+ if [ $now -ne "$count" ] ;then
+ echo "expected $count entries in $ns, not $now: $msg"
+ exit 1
+ fi
+
+ echo "PASS: got $count connections: $msg"
+}
+
+ctresize() {
+ local duration="$1"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ while [ $now -lt $end ]; do
+ sysctl -q net.netfilter.nf_conntrack_buckets=$RANDOM
+ now=$(date +%s)
+ done
+}
+
+do_rsleep() {
+ local limit="$1"
+ local r=$RANDOM
+
+ r=$((r%limit))
+ sleep "$r"
+}
+
+ct_flush_once() {
+ local ns="$1"
+
+ ip netns exec "$ns" conntrack -F 2>/dev/null
+}
+
+ctflush() {
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ do_rsleep "$duration"
+
+ while [ $now -lt $end ]; do
+ ct_flush_once "$ns"
+ do_rsleep "$duration"
+ now=$(date +%s)
+ done
+}
+
+ct_pingflood()
+{
+ local ns="$1"
+ local duration="$2"
+ local msg="$3"
+ local now=$(date +%s)
+ local end=$((now + duration))
+ local j=0
+ local k=0
+
+ while [ $now -lt $end ]; do
+ j=$((j%256))
+ k=$((k%256))
+
+ ip netns exec "$ns" bash -c \
+ "j=$j k=$k; for i in \$(seq 1 254); do ping -q -c 1 127.\$k.\$j.\$i & done; wait" >/dev/null 2>&1
+
+ j=$((j+1))
+
+ if [ $j -eq 256 ];then
+ k=$((k+1))
+ fi
+
+ now=$(date +%s)
+ done
+
+ wait
+}
+
+ct_udpflood()
+{
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ [ $have_socat -ne "1" ] && return
+
+ while [ $now -lt $end ]; do
+ip netns exec "$ns" bash<<"EOF"
+ for i in $(seq 1 100);do
+ dport=$(((RANDOM%65536)+1))
+
+ echo bar | socat -u STDIN UDP:"127.0.0.1:$dport" &
+ done > /dev/null 2>&1
+ wait
+EOF
+ now=$(date +%s)
+ done
+}
+
+ct_udpclash()
+{
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ [ -x udpclash ] || return
+
+ while [ $now -lt $end ]; do
+ ip netns exec "$ns" timeout 30 ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1
+
+ now=$(date +%s)
+ done
+}
+
+# dump to /dev/null. We don't want dumps to cause infinite loops
+# or use-after-free even when conntrack table is altered while dumps
+# are in progress.
+ct_nulldump()
+{
+ local ns="$1"
+
+ ip netns exec "$ns" conntrack -L > /dev/null 2>&1 &
+
+ # Don't require /proc support in conntrack
+ if [ -r /proc/self/net/nf_conntrack ] ; then
+ ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack" > /dev/null &
+ fi
+
+ wait
+}
+
+ct_nulldump_loop()
+{
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ while [ $now -lt $end ]; do
+ ct_nulldump "$ns"
+ sleep $((RANDOM%2))
+ now=$(date +%s)
+ done
+}
+
+change_timeouts()
+{
+ local ns="$1"
+ local r1=$((RANDOM%2))
+ local r2=$((RANDOM%2))
+
+ [ "$r1" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=$((RANDOM%5))
+ [ "$r2" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=$((RANDOM%5))
+}
+
+ct_change_timeouts_loop()
+{
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ while [ $now -lt $end ]; do
+ change_timeouts "$ns"
+ sleep $((RANDOM%2))
+ now=$(date +%s)
+ done
+
+ # restore defaults
+ ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=30
+ ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=30
+}
+
+check_taint()
+{
+ local tainted_then="$1"
+ local msg="$2"
+
+ local tainted_now=0
+
+ if [ "$tainted_then" -ne 0 ];then
+ return
+ fi
+
+ read tainted_now < /proc/sys/kernel/tainted
+
+ if [ "$tainted_now" -eq 0 ];then
+ echo "PASS: $msg"
+ else
+ echo "TAINT: $msg"
+ dmesg
+ exit 1
+ fi
+}
+
+insert_flood()
+{
+ local n="$1"
+ local timeout="$2"
+ local r=0
+
+ r=$((RANDOM%$insert_count))
+
+ ct_pingflood "$n" "$timeout" "floodresize" &
+ ct_udpflood "$n" "$timeout" &
+ ct_udpclash "$n" "$timeout" &
+
+ insert_ctnetlink "$n" "$r" &
+ ctflush "$n" "$timeout" &
+ ct_nulldump_loop "$n" "$timeout" &
+ ct_change_timeouts_loop "$n" "$timeout" &
+
+ wait
+}
+
+test_floodresize_all()
+{
+ local timeout=20
+ local n=""
+ local tainted_then=""
+
+ read tainted_then < /proc/sys/kernel/tainted
+
+ for n in "$nsclient1" "$nsclient2";do
+ insert_flood "$n" "$timeout" &
+ done
+
+ # resize table constantly while flood/insert/dump/flushs
+ # are happening in parallel.
+ ctresize "$timeout"
+
+ # wait for subshells to complete, everything is limited
+ # by $timeout.
+ wait
+
+ check_taint "$tainted_then" "resize+flood"
+}
+
+check_dump()
+{
+ local ns="$1"
+ local protoname="$2"
+ local c=0
+ local proto=0
+ local proc=0
+ local unique=""
+ local lret=0
+
+ # NOTE: assumes timeouts are large enough to not have
+ # expirations in all following tests.
+ l=$(ip netns exec "$ns" conntrack -L 2>/dev/null | sort | tee "$tmpfile" | wc -l)
+ c=$(ip netns exec "$ns" conntrack -C)
+
+ if [ "$c" -eq 0 ]; then
+ echo "FAIL: conntrack count for $ns is 0"
+ lret=1
+ fi
+
+ if [ "$c" -ne "$l" ]; then
+ echo "FAIL: conntrack count inconsistency for $ns -L: $c != $l"
+ lret=1
+ fi
+
+ # check the dump we retrieved is free of duplicated entries.
+ unique=$(uniq "$tmpfile" | tee "$tmpfile_uniq" | wc -l)
+ if [ "$l" -ne "$unique" ]; then
+ echo "FAIL: listing contained redundant entries for $ns: $l != $unique"
+ diff -u "$tmpfile" "$tmpfile_uniq"
+ lret=1
+ fi
+
+ # we either inserted icmp or only udp, hence, --proto should return same entry count as without filter.
+ proto=$(ip netns exec "$ns" conntrack -L --proto $protoname 2>/dev/null | sort | uniq | tee "$tmpfile_uniq" | wc -l)
+ if [ "$l" -ne "$proto" ]; then
+ echo "FAIL: dump inconsistency for $ns -L --proto $protoname: $l != $proto"
+ diff -u "$tmpfile" "$tmpfile_uniq"
+ lret=1
+ fi
+
+ if [ -r /proc/self/net/nf_conntrack ] ; then
+ proc=$(ip netns exec "$ns" bash -c "sort < /proc/self/net/nf_conntrack | tee \"$tmpfile_proc\" | wc -l")
+
+ if [ "$l" -ne "$proc" ]; then
+ echo "FAIL: proc inconsistency for $ns: $l != $proc"
+ lret=1
+ fi
+
+ proc=$(uniq "$tmpfile_proc" | tee "$tmpfile_uniq" | wc -l)
+ if [ "$l" -ne "$proc" ]; then
+ echo "FAIL: proc inconsistency after uniq filter for $ns: $l != $proc"
+ diff -u "$tmpfile_proc" "$tmpfile_uniq"
+ lret=1
+ fi
+ fi
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: dump in netns $ns had same entry count (-C $c, -L $l, -p $proto, /proc $proc)"
+ else
+ echo "FAIL: dump in netns $ns had different entry count (-C $c, -L $l, -p $proto, /proc $proc)"
+ ret=1
+ fi
+}
+
+test_dump_all()
+{
+ local timeout=3
+ local tainted_then=""
+
+ read tainted_then < /proc/sys/kernel/tainted
+
+ ct_flush_once "$nsclient1"
+ ct_flush_once "$nsclient2"
+
+ ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600
+
+ ct_pingflood "$nsclient1" $timeout "dumpall" &
+ insert_ctnetlink "$nsclient2" $insert_count
+
+ wait
+
+ check_dump "$nsclient1" "icmp"
+ check_dump "$nsclient2" "udp"
+
+ check_taint "$tainted_then" "test parallel conntrack dumps"
+}
+
+check_sysctl_immutable()
+{
+ local ns="$1"
+ local name="$2"
+ local failhard="$3"
+ local o=0
+ local n=0
+
+ o=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
+ n=$((o+1))
+
+ # return value isn't reliable, need to read it back
+ ip netns exec "$ns" sysctl -q "$name"=$n 2>/dev/null >/dev/null
+
+ n=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
+
+ [ -z "$n" ] && return 1
+
+ if [ $o -ne $n ]; then
+ if [ $failhard -gt 0 ] ;then
+ echo "FAIL: net.$name should not be changeable from namespace (now $n)"
+ ret=1
+ fi
+ return 0
+ fi
+
+ return 1
+}
+
+test_conntrack_max_limit()
+{
+ sysctl -q net.netfilter.nf_conntrack_max=100
+ insert_ctnetlink "$nsclient1" 101
+
+ # check netns is clamped by init_net, i.e., either netns follows
+ # init_net value, or a higher pernet limit (compared to init_net) is ignored.
+ check_ctcount "$nsclient1" 100 "netns conntrack_max is init_net bound"
+
+ sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+}
+
+test_conntrack_disable()
+{
+ local timeout=2
+
+ # disable conntrack pickups
+ ip netns exec "$nsclient1" nft flush table ip test_ct
+
+ ct_flush_once "$nsclient1"
+ ct_flush_once "$nsclient2"
+
+ ct_pingflood "$nsclient1" "$timeout" "conntrack disable"
+ ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1
+
+ # Disabled, should not have picked up any connection.
+ check_ctcount "$nsclient1" 0 "conntrack disabled"
+
+ # This one is still active, expect 1 connection.
+ check_ctcount "$nsclient2" 1 "conntrack enabled"
+}
+
+init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max)
+
+check_max_alias $init_net_max
+
+sysctl -q net.netfilter.nf_conntrack_max="262000"
+check_max_alias 262000
+
+setup_ns nsclient1 nsclient2
+
+# check this only works from init_net
+for n in netfilter.nf_conntrack_buckets netfilter.nf_conntrack_expect_max net.nf_conntrack_max;do
+ check_sysctl_immutable "$nsclient1" "net.$n" 1
+done
+
+# won't work on older kernels. If it works, check that the netns obeys the limit
+if check_sysctl_immutable "$nsclient1" net.netfilter.nf_conntrack_max 0;then
+ # subtest: if pernet is changeable, check that reducing it in pernet
+ # limits the pernet entries. Inverse, pernet clamped by a lower init_net
+ # setting, is already checked by "test_conntrack_max_limit" test.
+
+ ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=1
+ insert_ctnetlink "$nsclient1" 2
+ check_ctcount "$nsclient1" 1 "netns conntrack_max is pernet bound"
+ ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+fi
+
+for n in "$nsclient1" "$nsclient2";do
+# enable conntrack in both namespaces
+ip netns exec "$n" nft -f - <<EOF
+table ip test_ct {
+ chain input {
+ type filter hook input priority 0
+ ct state new counter
+ }
+}
+EOF
+done
+
+tmpfile=$(mktemp)
+tmpfile_proc=$(mktemp)
+tmpfile_uniq=$(mktemp)
+test_conntrack_max_limit
+test_dump_all
+test_floodresize_all
+test_conntrack_disable
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
index e95ecb37c2b1..207b79932d91 100755
--- a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
@@ -32,7 +32,6 @@ source lib.sh
IP0=172.30.30.1
IP1=172.30.30.2
-DUMMYNET=10.9.9
PFXL=30
ret=0
@@ -52,11 +51,6 @@ trap cleanup EXIT
setup_ns ns0 ns1
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.forwarding=1
-
if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then
echo "SKIP: Could not add veth device"
exit $ksft_skip
@@ -67,18 +61,13 @@ if ! ip -net "$ns0" li add tvrf type vrf table 9876; then
exit $ksft_skip
fi
-ip -net "$ns0" link add dummy0 type dummy
-
ip -net "$ns0" li set veth0 master tvrf
-ip -net "$ns0" li set dummy0 master tvrf
ip -net "$ns0" li set tvrf up
ip -net "$ns0" li set veth0 up
-ip -net "$ns0" li set dummy0 up
ip -net "$ns1" li set veth0 up
ip -net "$ns0" addr add $IP0/$PFXL dev veth0
ip -net "$ns1" addr add $IP1/$PFXL dev veth0
-ip -net "$ns0" addr add $DUMMYNET.1/$PFXL dev dummy0
listener_ready()
{
@@ -219,35 +208,9 @@ EOF
fi
}
-test_fib()
-{
-ip netns exec "$ns0" nft -f - <<EOF
-flush ruleset
-table ip t {
- counter fibcount { }
-
- chain prerouting {
- type filter hook prerouting priority 0;
- meta iifname veth0 ip daddr $DUMMYNET.2 fib daddr oif dummy0 counter name fibcount notrack
- }
-}
-EOF
- ip -net "$ns1" route add 10.9.9.0/24 via "$IP0" dev veth0
- ip netns exec "$ns1" ping -q -w 1 -c 1 "$DUMMYNET".2 > /dev/null
-
- if ip netns exec "$ns0" nft list counter t fibcount | grep -q "packets 1"; then
- echo "PASS: fib lookup returned exepected output interface"
- else
- echo "FAIL: fib lookup did not return exepected output interface"
- ret=1
- return
- fi
-}
-
test_ct_zone_in
test_masquerade_vrf "default"
test_masquerade_vrf "pfifo"
test_masquerade_veth
-test_fib
exit $ret
diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh
index d3edb16cd4b3..9c9d5b38ab71 100755
--- a/tools/testing/selftests/net/netfilter/ipvs.sh
+++ b/tools/testing/selftests/net/netfilter/ipvs.sh
@@ -129,9 +129,6 @@ test_dr() {
# avoid incorrect arp response
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
- # avoid reverse route lookup
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
test_service
@@ -154,7 +151,7 @@ test_nat() {
test_tun() {
ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
- ip netns exec "${ns1}" modprobe -q ipip
+ modprobe -q ipip
ip netns exec "${ns1}" ip link set tunl0 up
ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0
ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0
@@ -163,13 +160,10 @@ test_tun() {
ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port}
ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1
- ip netns exec "${ns2}" modprobe -q ipip
ip netns exec "${ns2}" ip link set tunl0 up
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
- ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
test_service
diff --git a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
index 1014551dd769..6731fe1eaf2e 100755
--- a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
+++ b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
@@ -17,9 +17,31 @@ cleanup()
checktool "socat -h" "run test without socat"
checktool "iptables --version" "run test without iptables"
+checktool "conntrack --version" "run test without conntrack"
trap cleanup EXIT
+connect_done()
+{
+ local ns="$1"
+ local port="$2"
+
+ ip netns exec "$ns" ss -nt -o state established "dport = :$port" | grep -q "$port"
+}
+
+check_ctstate()
+{
+ local ns="$1"
+ local dp="$2"
+
+ if ! ip netns exec "$ns" conntrack --get -s 192.168.1.2 -d 192.168.1.1 -p tcp \
+ --sport 10000 --dport "$dp" --state ESTABLISHED > /dev/null 2>&1;then
+ echo "FAIL: Did not find expected state for dport $2"
+ ip netns exec "$ns" bash -c 'conntrack -L; conntrack -S; ss -nt'
+ ret=1
+ fi
+}
+
setup_ns ns1 ns2
# Connect the namespaces using a veth pair
@@ -44,15 +66,18 @@ socatpid=$!
ip netns exec "$ns2" sysctl -q net.ipv4.ip_local_port_range="10000 10000"
# add a virtual IP using DNAT
-ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
+ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201 || exit 1
# ... and route it to the other namespace
ip netns exec "$ns2" ip route add 10.96.0.1 via 192.168.1.1
-# add a persistent connection from the other namespace
-ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
+# listener should be up by now, wait if it isn't yet.
+wait_local_port_listen "$ns1" 5201 tcp
-sleep 1
+# add a persistent connection from the other namespace
+sleep 10 | ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
+cpid0=$!
+busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" "5201"
# ip daddr:dport will be rewritten to 192.168.1.1 5201
# NAT must reallocate source port 10000 because
@@ -71,26 +96,25 @@ fi
ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
-sleep 5 | ip netns exec "$ns2" socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
+sleep 5 | ip netns exec "$ns2" socat -T 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
+cpid1=$!
-# if connect succeeds, client closes instantly due to EOF on stdin.
-# if connect hangs, it will time out after 5s.
-echo | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
+sleep 5 | ip netns exec "$ns2" socat -T 5 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
cpid2=$!
-time_then=$(date +%s)
-wait $cpid2
-rv=$?
-time_now=$(date +%s)
+busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" 5202
+busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" 5203
-# Check how much time has elapsed, expectation is for
-# 'cpid2' to connect and then exit (and no connect delay).
-delta=$((time_now - time_then))
+check_ctstate "$ns1" 5202
+check_ctstate "$ns1" 5203
-if [ $delta -lt 2 ] && [ $rv -eq 0 ]; then
+kill $socatpid $cpid0 $cpid1 $cpid2
+socatpid=0
+
+if [ $ret -eq 0 ]; then
echo "PASS: could connect to service via redirected ports"
else
- echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
+ echo "FAIL: socat cannot connect to service via redirect"
ret=1
fi
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index 47088b005390..ad97c6227f35 100755
--- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -15,10 +15,12 @@ source lib.sh
# Available test groups:
# - reported_issues: check for issues that were reported in the past
# - correctness: check that packets match given entries, and only those
+# - correctness_large: same but with additional non-matching entries
# - concurrency: attempt races between insertion, deletion and lookup
# - timeout: check that packets match entries until they expire
# - performance: estimate matching rate, compare with rbtree and hash baselines
-TESTS="reported_issues correctness concurrency timeout"
+TESTS="reported_issues correctness correctness_large concurrency timeout"
+
[ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}"
# Set types, defined by TYPE_ variables below
@@ -27,7 +29,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
-BUGS="flush_remove_add reload net_port_proto_match"
+BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
@@ -376,7 +378,7 @@ display net,port,proto
type_spec ipv4_addr . inet_service . inet_proto
chain_spec ip daddr . udp dport . meta l4proto
dst addr4 port proto
-src
+src
start 1
count 9
src_delta 9
@@ -387,6 +389,37 @@ race_repeat 0
perf_duration 0
"
+
+TYPE_avx2_mismatch="
+display avx2 false match
+type_spec inet_proto . ipv6_addr
+chain_spec meta l4proto . ip6 daddr
+dst proto addr6
+src
+start 1
+count 1
+src_delta 1
+tools ping
+proto icmp6
+
+race_repeat 0
+
+perf_duration 0
+"
+
+
+TYPE_doublecreate="
+display cannot create same element twice
+type_spec ipv4_addr . ipv4_addr
+chain_spec ip saddr . ip daddr
+dst addr4
+proto icmp
+
+race_repeat 0
+
+perf_duration 0
+"
+
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@@ -398,6 +431,7 @@ table inet filter {
set test {
type ${type_spec}
+ counter
flags interval,timeout
}
@@ -1137,9 +1171,18 @@ del() {
fi
}
-# Return packet count from 'test' counter in 'inet filter' table
+# Return packet count for elem $1 from 'test' counter in 'inet filter' table
count_packets() {
found=0
+ for token in $(nft reset element inet filter test "${1}" ); do
+ [ ${found} -eq 1 ] && echo "${token}" && return
+ [ "${token}" = "packets" ] && found=1
+ done
+}
+
+# Return packet count from 'test' counter in 'inet filter' table
+count_packets_nomatch() {
+ found=0
for token in $(nft list counter inet filter test); do
[ ${found} -eq 1 ] && echo "${token}" && return
[ "${token}" = "packets" ] && found=1
@@ -1185,6 +1228,10 @@ perf() {
# Set MAC addresses, send single packet, check that it matches, reset counter
send_match() {
+ local elem="$1"
+
+ shift
+
ip link set veth_a address "$(format_mac "${1}")"
ip -n B link set veth_b address "$(format_mac "${2}")"
@@ -1195,7 +1242,7 @@ send_match() {
eval src_"$f"=\$\(format_\$f "${2}"\)
done
eval send_\$proto
- if [ "$(count_packets)" != "1" ]; then
+ if [ "$(count_packets "$elem")" != "1" ]; then
err "${proto} packet to:"
err " $(for f in ${dst}; do
eval format_\$f "${1}"; printf ' '; done)"
@@ -1221,7 +1268,7 @@ send_nomatch() {
eval src_"$f"=\$\(format_\$f "${2}"\)
done
eval send_\$proto
- if [ "$(count_packets)" != "0" ]; then
+ if [ "$(count_packets_nomatch)" != "0" ]; then
err "${proto} packet to:"
err " $(for f in ${dst}; do
eval format_\$f "${1}"; printf ' '; done)"
@@ -1234,15 +1281,54 @@ send_nomatch() {
fi
}
+maybe_send_nomatch() {
+ local elem="$1"
+ local what="$4"
+
+ [ $((RANDOM%20)) -gt 0 ] && return
+
+ dst_addr4="$2"
+ dst_port="$3"
+ send_udp
+
+ if [ "$(count_packets_nomatch)" != "0" ]; then
+ err "Packet to $dst_addr4:$dst_port did match $what"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+}
+
+maybe_send_match() {
+ local elem="$1"
+ local what="$4"
+
+ [ $((RANDOM%20)) -gt 0 ] && return
+
+ dst_addr4="$2"
+ dst_port="$3"
+ send_udp
+
+ if [ "$(count_packets "{ $elem }")" != "1" ]; then
+ err "Packet to $dst_addr4:$dst_port did not match $what"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+ nft reset counter inet filter test >/dev/null
+ nft reset element inet filter test "{ $elem }" >/dev/null
+}
+
# Correctness test template:
# - add ranged element, check that packets match it
# - check that packets outside range don't match it
# - remove some elements, check that packets don't match anymore
-test_correctness() {
- setup veth send_"${proto}" set || return ${ksft_skip}
-
+test_correctness_main() {
range_size=1
+
+ send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1
+
for i in $(seq "${start}" $((start + count))); do
+ local elem=""
+
end=$((start + range_size))
# Avoid negative or zero-sized port ranges
@@ -1253,15 +1339,16 @@ test_correctness() {
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
- add "$(format)" || return 1
+ elem="$(format)"
+ add "$elem" || return 1
for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
- send_match "${j}" $((j + src_delta)) || return 1
+ send_match "$elem" "${j}" $((j + src_delta)) || return 1
done
send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1
# Delete elements now and then
if [ $((i % 3)) -eq 0 ]; then
- del "$(format)" || return 1
+ del "$elem" || return 1
for j in $(seq "$start" \
$((range_size / 2 + 1)) ${end}); do
send_nomatch "${j}" $((j + src_delta)) \
@@ -1274,6 +1361,163 @@ test_correctness() {
done
}
+test_correctness() {
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ test_correctness_main
+}
+
+# Repeat the correctness tests, but add extra non-matching entries.
+# This exercises the more compact '4 bit group' representation that
+# gets picked when the default 8-bit representation exceed
+# NFT_PIPAPO_LT_SIZE_HIGH bytes of memory.
+# See usage of NFT_PIPAPO_LT_SIZE_HIGH in pipapo_lt_bits_adjust().
+#
+# The format() helper is way too slow when generating lots of
+# entries so its not used here.
+test_correctness_large() {
+ setup veth send_"${proto}" set || return ${ksft_skip}
+ # number of dummy (filler) entries to add.
+ local dcount=16385
+
+ (
+ echo -n "add element inet filter test { "
+
+ case "$type_spec" in
+ "ether_addr . ipv4_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_mac $((1000000 + i))
+ printf ". 172.%i.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ done
+ ;;
+ "inet_proto . ipv6_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "%i . " $((RANDOM%256))
+ format_addr6 $((1000000 + i))
+ done
+ ;;
+ "inet_service . inet_proto")
+ # smaller key sizes, need more entries to hit the
+ # 4-bit threshold.
+ dcount=65536
+ for i in $(seq 1 $dcount); do
+ local proto=$((RANDOM%256))
+
+ # Test uses UDP to match, as it also fails when matching
+ # an entry that doesn't exist, so skip 'udp' entries
+ # to not trigger a wrong failure.
+ [ $proto -eq 17 ] && proto=18
+ [ $i -gt 1 ] && echo ", "
+ printf "%i . %i " $(((i%65534) + 1)) $((proto))
+ done
+ ;;
+ "inet_service . ipv4_addr")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "%i . 172.%i.%i.%i " $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ done
+ ;;
+ "ipv4_addr . ether_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ format_mac $((1000000 + i))
+ done
+ ;;
+ "ipv4_addr . inet_service")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1))
+ done
+ ;;
+ "ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr")
+ dcount=65536
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1))
+ format_mac $((1000000 + i))
+ printf ". %i . 192.168.%i.%i" $((RANDOM%256)) $((RANDOM%256)) $((i%256))
+ done
+ ;;
+ "ipv4_addr . inet_service . inet_proto")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . %i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256))
+ done
+ ;;
+ "ipv4_addr . inet_service . inet_proto . ipv4_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((RANDOM%256))
+ done
+ ;;
+ "ipv4_addr . inet_service . ipv4_addr")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ printf "172.%i.%i.%i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256))
+ done
+ ;;
+ "ipv6_addr . ether_addr")
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . "
+ format_mac $((1000000 + i))
+ done
+ ;;
+ "ipv6_addr . inet_service")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1))"
+ done
+ ;;
+ "ipv6_addr . inet_service . ether_addr")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1)) . "
+ format_mac $((i + 1000000))
+ done
+ ;;
+ "ipv6_addr . inet_service . ether_addr . inet_proto")
+ dcount=65536
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1)) . "
+ format_mac $((i + 1000000))
+ echo -n " . $((RANDOM%256))"
+ done
+ ;;
+ "ipv6_addr . inet_service . ipv6_addr . inet_service")
+ dcount=32768
+ for i in $(seq 1 $dcount); do
+ [ $i -gt 1 ] && echo ", "
+ format_addr6 $((i + 1000000))
+ echo -n " . $(((RANDOM%65534) + 1)) . "
+ format_addr6 $((i + 2123456))
+ echo -n " . $((RANDOM%256))"
+ done
+ ;;
+ *)
+ "Unhandled $type_spec"
+ return 1
+ esac
+ echo -n "}"
+
+ ) | nft -f - || return 1
+
+ test_correctness_main
+}
+
# Concurrency test template:
# - add all the elements
# - start a thread for each physical thread that:
@@ -1396,14 +1640,17 @@ test_timeout() {
range_size=1
for i in $(seq "$start" $((start + count))); do
+ local elem=""
+
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
- add "$(format)" || return 1
+ elem="$(format)"
+ add "$elem" || return 1
for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
- send_match "${j}" $((j + src_delta)) || return 1
+ send_match "$elem" "${j}" $((j + src_delta)) || return 1
done
range_size=$((range_size + 1))
@@ -1561,7 +1808,7 @@ test_bug_reload() {
srcend=$((end + src_delta))
for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
- send_match "${j}" $((j + src_delta)) || return 1
+ send_match "$(format)" "${j}" $((j + src_delta)) || return 1
done
range_size=$((range_size + 1))
@@ -1580,22 +1827,34 @@ test_bug_net_port_proto_match() {
range_size=1
for i in $(seq 1 10); do
for j in $(seq 1 20) ; do
- elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+ local dport=$j
+
+ elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
+
+ # too slow, do not test all addresses
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "before add" || return 1
nft "add element inet filter test { $elem }" || return 1
+
+ maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "after add" || return 1
+
nft "get element inet filter test { $elem }" | grep -q "$elem"
if [ $? -ne 0 ];then
local got=$(nft "get element inet filter test { $elem }")
err "post-add: should have returned $elem but got $got"
return 1
fi
+
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "out-of-range" || return 1
done
done
# recheck after set was filled
for i in $(seq 1 10); do
for j in $(seq 1 20) ; do
- elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+ local dport=$j
+
+ elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
nft "get element inet filter test { $elem }" | grep -q "$elem"
if [ $? -ne 0 ];then
@@ -1603,6 +1862,9 @@ test_bug_net_port_proto_match() {
err "post-fill: should have returned $elem but got $got"
return 1
fi
+
+ maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "recheck" || return 1
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "recheck out-of-range" || return 1
done
done
@@ -1610,9 +1872,10 @@ test_bug_net_port_proto_match() {
for i in $(seq 1 10); do
for j in $(seq 1 20) ; do
local rnd=$((RANDOM%10))
+ local dport=$j
local got=""
- elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+ elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))")
if [ $rnd -gt 0 ];then
continue
fi
@@ -1623,12 +1886,74 @@ test_bug_net_port_proto_match() {
err "post-delete: query for $elem returned $got instead of error."
return 1
fi
+
+ maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "match after deletion" || return 1
done
done
nft flush ruleset
}
+test_bug_avx2_mismatch()
+{
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ local a1="fe80:dead:01ff:0a02:0b03:6007:8009:a001"
+ local a2="fe80:dead:01fe:0a02:0b03:6007:8009:a001"
+
+ nft "add element inet filter test { icmpv6 . $a1 }"
+
+ dst_addr6="$a2"
+ send_icmp6
+
+ if [ "$(count_packets "{ icmpv6 . $a1 }")" -gt "0" ]; then
+ err "False match for $a2"
+ return 1
+ fi
+}
+
+test_bug_doublecreate()
+{
+ local elements="1.2.3.4 . 1.2.4.1, 1.2.4.1 . 1.2.3.4"
+ local ret=1
+ local i
+
+ setup veth send_"${proto}" set || return ${ksft_skip}
+
+ add "{ $elements }" || return 1
+ # expected to work: 'add' on existing should be no-op.
+ add "{ $elements }" || return 1
+
+ # 'create' should return an error.
+ if nft create element inet filter test "{ $elements }" 2>/dev/null; then
+ err "Could create an existing element"
+ return 1
+ fi
+nft -f - <<EOF 2>/dev/null
+flush set inet filter test
+create element inet filter test { $elements }
+create element inet filter test { $elements }
+EOF
+ ret=$?
+ if [ $ret -eq 0 ]; then
+ err "Could create element twice in one transaction"
+ err "$(nft -a list ruleset)"
+ return 1
+ fi
+
+nft -f - <<EOF 2>/dev/null
+flush set inet filter test
+create element inet filter test { $elements }
+EOF
+ ret=$?
+ if [ $ret -ne 0 ]; then
+ err "Could not flush and re-create element in one transaction"
+ return 1
+ fi
+
+ return 0
+}
+
test_reported_issues() {
eval test_bug_"${subtest}"
}
diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh
index ce1451c275fd..04544905c216 100755
--- a/tools/testing/selftests/net/netfilter/nft_fib.sh
+++ b/tools/testing/selftests/net/netfilter/nft_fib.sh
@@ -3,6 +3,10 @@
# This tests the fib expression.
#
# Kselftest framework requirement - SKIP code is 4.
+#
+# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99
+# dead:1::99 dead:1::1 dead:2::1 dead:2::99
+# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2
source lib.sh
@@ -45,6 +49,19 @@ table inet filter {
EOF
}
+load_input_ruleset() {
+ local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+ chain input {
+ type filter hook input priority 0; policy accept;
+ fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+ }
+}
+EOF
+}
+
load_pbr_ruleset() {
local netns=$1
@@ -59,6 +76,89 @@ table inet filter {
EOF
}
+load_type_ruleset() {
+ local netns=$1
+
+ for family in ip ip6;do
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table $family filter {
+ chain type_match_in {
+ fib daddr type local counter comment "daddr configured on other iface"
+ fib daddr . iif type local counter comment "daddr configured on iif"
+ fib daddr type unicast counter comment "daddr not local"
+ fib daddr . iif type unicast counter comment "daddr not configured on iif"
+ }
+
+ chain type_match_out {
+ fib daddr type unicast counter
+ fib daddr . oif type unicast counter
+ fib daddr type local counter
+ fib daddr . oif type local counter
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority 0;
+ icmp type echo-request counter jump type_match_in
+ icmpv6 type echo-request counter jump type_match_in
+ }
+
+ chain input {
+ type filter hook input priority 0;
+ icmp type echo-request counter jump type_match_in
+ icmpv6 type echo-request counter jump type_match_in
+ }
+
+ chain forward {
+ type filter hook forward priority 0;
+ icmp type echo-request counter jump type_match_in
+ icmpv6 type echo-request counter jump type_match_in
+ }
+
+ chain output {
+ type filter hook output priority 0;
+ icmp type echo-request counter jump type_match_out
+ icmpv6 type echo-request counter jump type_match_out
+ }
+
+ chain postrouting {
+ type filter hook postrouting priority 0;
+ icmp type echo-request counter jump type_match_out
+ icmpv6 type echo-request counter jump type_match_out
+ }
+}
+EOF
+done
+}
+
+reload_type_ruleset() {
+ ip netns exec "$1" nft flush table ip filter
+ ip netns exec "$1" nft flush table ip6 filter
+ load_type_ruleset "$1"
+}
+
+check_fib_type_counter_family() {
+ local family="$1"
+ local want="$2"
+ local ns="$3"
+ local chain="$4"
+ local what="$5"
+ local errmsg="$6"
+
+ if ! ip netns exec "$ns" nft list chain "$family" filter "$chain" | grep "$what" | grep -q "packets $want";then
+ echo "Netns $ns $family fib type counter doesn't match expected packet count of $want for $what $errmsg" 1>&2
+ ip netns exec "$ns" nft list chain "$family" filter "$chain"
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
+check_fib_type_counter() {
+ check_fib_type_counter_family "ip" "$@" || return 1
+ check_fib_type_counter_family "ip6" "$@" || return 1
+}
+
load_ruleset_count() {
local netns=$1
@@ -77,6 +177,7 @@ check_drops() {
if dmesg | grep -q ' nft_rpfilter: ';then
dmesg | grep ' nft_rpfilter: '
echo "FAIL: rpfilter did drop packets"
+ ret=1
return 1
fi
@@ -151,19 +252,509 @@ test_ping() {
return 0
}
+test_ping_unreachable() {
+ local daddr4=$1
+ local daddr6=$2
+
+ if ip netns exec "$ns1" ping -c 1 -W 0.1 -q "$daddr4" > /dev/null; then
+ echo "FAIL: ${ns1} could reach $daddr4" 1>&2
+ return 1
+ fi
+
+ if ip netns exec "$ns1" ping -c 1 -W 0.1 -q "$daddr6" > /dev/null; then
+ echo "FAIL: ${ns1} could reach $daddr6" 1>&2
+ return 1
+ fi
+
+ return 0
+}
+
+test_fib_type() {
+ local notice="$1"
+ local errmsg="addr-on-if"
+ local lret=0
+
+ if ! load_type_ruleset "$nsrouter";then
+ echo "SKIP: Could not load fib type ruleset"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return
+ fi
+
+ # makes router receive packet for addresses configured on incoming
+ # interface.
+ test_ping 10.0.1.1 dead:1::1 || return 1
+
+ # expectation: triggers all 'local' in prerouting/input.
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type local" "$errmsg" || lret=1
+
+ reload_type_ruleset "$nsrouter"
+ # makes router receive packet for address configured on a different (but local)
+ # interface.
+ test_ping 10.0.2.1 dead:2::1 || return 1
+
+ # expectation: triggers 'unicast' in prerouting/input for daddr . iif and local for 'daddr'.
+ errmsg="addr-on-host"
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1
+
+ reload_type_ruleset "$nsrouter"
+ test_ping 10.0.2.99 dead:2::99 || return 1
+ errmsg="addr-on-otherhost"
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type unicast" "$errmsg" || lret=1
+ check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: fib expression address types match ($notice)"
+ else
+ echo "FAIL: fib expression address types match ($notice)"
+ ret=1
+ fi
+}
+
+test_fib_vrf_dev_add_dummy()
+{
+ if ! ip -net "$nsrouter" link add dummy0 type dummy ;then
+ echo "SKIP: VRF tests: dummy device type not supported"
+ return 1
+ fi
+
+ if ! ip -net "$nsrouter" link add tvrf type vrf table 9876;then
+ echo "SKIP: VRF tests: vrf device type not supported"
+ return 1
+ fi
+
+ ip -net "$nsrouter" link set dummy0 master tvrf
+ ip -net "$nsrouter" link set dummy0 up
+ ip -net "$nsrouter" link set tvrf up
+}
+
+load_ruleset_vrf()
+{
+# Due to the many different possible combinations using named counters
+# or one-rule-per-expected-result is complex.
+#
+# Instead, add dynamic sets for the fib modes
+# (fib address type, fib output interface lookup .. ),
+# and then add the obtained fib results to them.
+#
+# The test is successful if the sets contain the expected results
+# and no unexpected extra entries existed.
+ip netns exec "$nsrouter" nft -f - <<EOF
+flush ruleset
+table inet t {
+ set fibif4 {
+ typeof meta iif . ip daddr . fib daddr oif
+ flags dynamic
+ counter
+ }
+
+ set fibif4iif {
+ typeof meta iif . ip daddr . fib daddr . iif oif
+ flags dynamic
+ counter
+ }
+
+ set fibif6 {
+ typeof meta iif . ip6 daddr . fib daddr oif
+ flags dynamic
+ counter
+ }
+
+ set fibif6iif {
+ typeof meta iif . ip6 daddr . fib daddr . iif oif
+ flags dynamic
+ counter
+ }
+
+ set fibtype4 {
+ typeof meta iif . ip daddr . fib daddr type
+ flags dynamic
+ counter
+ }
+
+ set fibtype4iif {
+ typeof meta iif . ip daddr . fib daddr . iif type
+ flags dynamic
+ counter
+ }
+
+ set fibtype6 {
+ typeof meta iif . ip6 daddr . fib daddr type
+ flags dynamic
+ counter
+ }
+
+ set fibtype6iif {
+ typeof meta iif . ip6 daddr . fib daddr . iif type
+ flags dynamic
+ counter
+ }
+
+ chain fib_test {
+ meta nfproto ipv4 jump {
+ add @fibif4 { meta iif . ip daddr . fib daddr oif }
+ add @fibif4iif { meta iif . ip daddr . fib daddr . iif oif }
+ add @fibtype4 { meta iif . ip daddr . fib daddr type }
+ add @fibtype4iif { meta iif . ip daddr . fib daddr . iif type }
+
+ add @fibif4 { meta iif . ip saddr . fib saddr oif }
+ add @fibif4iif { meta iif . ip saddr . fib saddr . iif oif }
+ }
+
+ meta nfproto ipv6 jump {
+ add @fibif6 { meta iif . ip6 daddr . fib daddr oif }
+ add @fibif6iif { meta iif . ip6 daddr . fib daddr . iif oif }
+ add @fibtype6 { meta iif . ip6 daddr . fib daddr type }
+ add @fibtype6iif { meta iif . ip6 daddr . fib daddr . iif type }
+
+ add @fibif6 { meta iif . ip6 saddr . fib saddr oif }
+ add @fibif6iif { meta iif . ip6 saddr . fib saddr . iif oif }
+ }
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority 0;
+ icmp type echo-request counter jump fib_test
+
+ # neighbour discovery to be ignored.
+ icmpv6 type echo-request counter jump fib_test
+ }
+}
+EOF
+
+if [ $? -ne 0 ] ;then
+ echo "SKIP: Could not load ruleset for fib vrf test"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return 1
+fi
+}
+
+check_type()
+{
+ local setname="$1"
+ local iifname="$2"
+ local addr="$3"
+ local type="$4"
+ local count="$5"
+ local lret=0
+
+ [ -z "$count" ] && count=1
+
+ if ! ip netns exec "$nsrouter" nft get element inet t "$setname" { "$iifname" . "$addr" . "$type" } |grep -q "counter packets $count";then
+ echo "FAIL: did not find $iifname . $addr . $type in $setname with $count packets"
+ ip netns exec "$nsrouter" nft list set inet t "$setname"
+ ret=1
+ # do not fail right away, delete entry if it exists so later test that
+ # checks for unwanted keys don't get confused by this *expected* key.
+ lret=1
+ fi
+
+ # delete the entry, this allows to check if anything unexpected appeared
+ # at the end of the test run: all dynamic sets should be empty by then.
+ if ! ip netns exec "$nsrouter" nft delete element inet t "$setname" { "$iifname" . "$addr" . "$type" } ; then
+ echo "FAIL: can't delete $iifname . $addr . $type in $setname"
+ ip netns exec "$nsrouter" nft list set inet t "$setname"
+ ret=1
+ return 1
+ fi
+
+ return $lret
+}
+
+check_local()
+{
+ check_type $@ "local" 1
+}
+
+check_unicast()
+{
+ check_type $@ "unicast" 1
+}
+
+check_rpf()
+{
+ check_type $@
+}
+
+check_fib_vrf_sets_empty()
+{
+ local setname=""
+ local lret=0
+
+ # A non-empty set means that we have seen unexpected packets OR
+ # that a fib lookup provided unexpected results.
+ for setname in "fibif4" "fibif4iif" "fibif6" "fibif6iif" \
+ "fibtype4" "fibtype4iif" "fibtype6" "fibtype6iif";do
+ if ip netns exec "$nsrouter" nft list set inet t "$setname" | grep -q elements;then
+ echo "FAIL: $setname not empty"
+ ip netns exec "$nsrouter" nft list set inet t "$setname"
+ ret=1
+ lret=1
+ fi
+ done
+
+ return $lret
+}
+
+check_fib_vrf_type()
+{
+ local msg="$1"
+
+ local addr
+ # the incoming interface is always veth0. As its not linked to a VRF,
+ # the 'tvrf' device should NOT show up anywhere.
+ local ifname="veth0"
+ local lret=0
+
+ # local_veth0, local_veth1
+ for addr in "10.0.1.1" "10.0.2.1"; do
+ check_local fibtype4 "$ifname" "$addr" || lret=1
+ check_type fibif4 "$ifname" "$addr" "0" || lret=1
+ done
+ for addr in "dead:1::1" "dead:2::1";do
+ check_local fibtype6 "$ifname" "$addr" || lret=1
+ check_type fibif6 "$ifname" "$addr" "0" || lret=1
+ done
+
+ # when restricted to the incoming interface, 10.0.1.1 should
+ # be 'local', but 10.0.2.1 unicast.
+ check_local fibtype4iif "$ifname" "10.0.1.1" || lret=1
+ check_unicast fibtype4iif "$ifname" "10.0.2.1" || lret=1
+
+ # same for the ipv6 addresses.
+ check_local fibtype6iif "$ifname" "dead:1::1" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:2::1" || lret=1
+
+ # None of these addresses should find a valid route when restricting
+ # to the incoming interface (we ask for daddr - 10.0.1.1/2.1 are
+ # reachable via 'lo'.
+ for addr in "10.0.1.1" "10.0.2.1" "10.9.9.1" "10.9.9.2";do
+ check_type fibif4iif "$ifname" "$addr" "0" || lret=1
+ done
+
+ # expect default route (veth1), dummy0 is part of VRF but iif isn't.
+ for addr in "10.9.9.1" "10.9.9.2";do
+ check_unicast fibtype4 "$ifname" "$addr" || lret=1
+ check_unicast fibtype4iif "$ifname" "$addr" || lret=1
+ check_type fibif4 "$ifname" "$addr" "veth1" || lret=1
+ done
+ for addr in "dead:9::1" "dead:9::2";do
+ check_unicast fibtype6 "$ifname" "$addr" || lret=1
+ check_unicast fibtype6iif "$ifname" "$addr" || lret=1
+ check_type fibif6 "$ifname" "$addr" "veth1" || lret=1
+ done
+
+ # same for the IPv6 equivalent addresses.
+ for addr in "dead:1::1" "dead:2::1" "dead:9::1" "dead:9::2";do
+ check_type fibif6iif "$ifname" "$addr" "0" || lret=1
+ done
+
+ check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1
+ check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1
+ check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1
+
+ check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1
+ check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1
+ check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1
+ check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1
+
+ check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 5 || lret=1
+ check_rpf fibif4iif "$ifname" "10.0.1.99" "veth0" 5 || lret=1
+ check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 5 || lret=1
+ check_rpf fibif6iif "$ifname" "dead:1::99" "veth0" 5 || lret=1
+
+ check_fib_vrf_sets_empty || lret=1
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: $msg"
+ else
+ echo "FAIL: $msg"
+ ret=1
+ fi
+}
+
+check_fib_veth_vrf_type()
+{
+ local msg="$1"
+
+ local addr
+ local ifname
+ local setname
+ local lret=0
+
+ # as veth0 is now part of tvrf interface, packets will be seen
+ # twice, once with iif veth0, then with iif tvrf.
+
+ for ifname in "veth0" "tvrf"; do
+ for addr in "10.0.1.1" "10.9.9.1"; do
+ check_local fibtype4 "$ifname" "$addr" || lret=1
+ # addr local, but nft_fib doesn't return routes with RTN_LOCAL.
+ check_type fibif4 "$ifname" "$addr" 0 || lret=1
+ check_type fibif4iif "$ifname" "$addr" 0 || lret=1
+ done
+
+ for addr in "dead:1::1" "dead:9::1"; do
+ check_local fibtype6 "$ifname" "$addr" || lret=1
+ # same, address is local but no route is returned for lo.
+ check_type fibif6 "$ifname" "$addr" 0 || lret=1
+ check_type fibif6iif "$ifname" "$addr" 0 || lret=1
+ done
+
+ for t in fibtype4 fibtype4iif; do
+ check_unicast "$t" "$ifname" 10.9.9.2 || lret=1
+ done
+ for t in fibtype6 fibtype6iif; do
+ check_unicast "$t" "$ifname" dead:9::2 || lret=1
+ done
+
+ check_unicast fibtype4iif "$ifname" "10.9.9.1" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:9::1" || lret=1
+
+ check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1
+ check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1
+
+ check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1
+ check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1
+
+ check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1
+ check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1
+ check_type fibif4 "$ifname" "10.9.9.2" "dummy0" || lret=1
+ check_type fibif6 "$ifname" "dead:9::2" "dummy0" || lret=1
+
+ # restricted to iif -- MUST NOT provide result, its != $ifname.
+ check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1
+ check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1
+
+ check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 4 || lret=1
+ check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 4 || lret=1
+ check_rpf fibif4iif "$ifname" "10.0.1.99" "$ifname" 4 || lret=1
+ check_rpf fibif6iif "$ifname" "dead:1::99" "$ifname" 4 || lret=1
+ done
+
+ check_local fibtype4iif "veth0" "10.0.1.1" || lret=1
+ check_local fibtype6iif "veth0" "dead:1::1" || lret=1
+
+ check_unicast fibtype4iif "tvrf" "10.0.1.1" || lret=1
+ check_unicast fibtype6iif "tvrf" "dead:1::1" || lret=1
+
+ # 10.9.9.2 should not provide a result for iif veth, but
+ # should when iif is tvrf.
+ # This is because its reachable via dummy0 which is part of
+ # tvrf. iif veth0 MUST conceal the dummy0 result (i.e. return oif 0).
+ check_type fibif4iif "veth0" "10.9.9.2" 0 || lret=1
+ check_type fibif6iif "veth0" "dead:9::2" 0 || lret=1
+
+ check_type fibif4iif "tvrf" "10.9.9.2" "tvrf" || lret=1
+ check_type fibif6iif "tvrf" "dead:9::2" "tvrf" || lret=1
+
+ check_fib_vrf_sets_empty || lret=1
+
+ if [ $lret -eq 0 ];then
+ echo "PASS: $msg"
+ else
+ echo "FAIL: $msg"
+ ret=1
+ fi
+}
+
+# Extends nsrouter config by adding dummy0+vrf.
+#
+# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99
+# dead:1::99 dead:1::1 dead:2::1 dead:2::99
+# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2
+# [dummy0]
+# 10.9.9.1
+# dead:9::1
+# [tvrf]
+test_fib_vrf()
+{
+ local cntname=""
+
+ if ! test_fib_vrf_dev_add_dummy; then
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return
+ fi
+
+ ip -net "$nsrouter" addr add "10.9.9.1"/24 dev dummy0
+ ip -net "$nsrouter" addr add "dead:9::1"/64 dev dummy0 nodad
+
+ ip -net "$nsrouter" route add default via 10.0.2.99
+ ip -net "$nsrouter" route add default via dead:2::99
+
+ load_ruleset_vrf || return
+
+ # no echo reply for these addresses: The dummy interface is part of tvrf,
+ # but veth0 (incoming interface) isn't linked to it.
+ test_ping_unreachable "10.9.9.1" "dead:9::1" &
+ test_ping_unreachable "10.9.9.2" "dead:9::2" &
+
+ # expect replies from these.
+ test_ping "10.0.1.1" "dead:1::1"
+ test_ping "10.0.2.1" "dead:2::1"
+ test_ping "10.0.2.99" "dead:2::99"
+
+ wait
+
+ check_fib_vrf_type "fib expression address types match (iif not in vrf)"
+
+ # second round: this time, make veth0 (rx interface) part of the vrf.
+ # 10.9.9.1 / dead:9::1 become reachable from ns1, while ns2
+ # becomes unreachable.
+ ip -net "$nsrouter" link set veth0 master tvrf
+ ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+ # this reload should not be needed, but in case
+ # there is some error (missing or unexpected entry) this will prevent them
+ # from leaking into round 2.
+ load_ruleset_vrf || return
+
+ test_ping "10.0.1.1" "dead:1::1"
+ test_ping "10.9.9.1" "dead:9::1"
+
+ # ns2 should no longer be reachable (veth1 not in vrf)
+ test_ping_unreachable "10.0.2.99" "dead:2::99" &
+
+ # vrf via dummy0, but host doesn't exist
+ test_ping_unreachable "10.9.9.2" "dead:9::2" &
+
+ wait
+
+ check_fib_veth_vrf_type "fib expression address types match (iif in vrf)"
+}
+
ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
-ip netns exec "$nsrouter" sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null
-ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
test_ping 10.0.2.1 dead:2::1 || exit 1
-check_drops || exit 1
+check_drops
test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops
+
+[ $ret -eq 0 ] && echo "PASS: fib expression did not cause unwanted packet drops"
+
+load_input_ruleset "$ns1"
+
+test_ping 127.0.0.1 ::1
+check_drops
+
+test_ping 10.0.1.99 dead:1::99
+check_drops
+
+[ $ret -eq 0 ] && echo "PASS: fib expression did not discard loopback packets"
+
+load_input_ruleset "$ns1"
+
+test_ping 127.0.0.1 ::1 || exit 1
check_drops || exit 1
-echo "PASS: fib expression did not cause unwanted packet drops"
+test_ping 10.0.1.99 dead:1::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not discard loopback packets"
ip netns exec "$nsrouter" nft flush table inet filter
@@ -213,7 +804,7 @@ ip -net "$nsrouter" addr del dead:2::1/64 dev veth0
# ... pbr ruleset for the router, check iif+oif.
if ! load_pbr_ruleset "$nsrouter";then
echo "SKIP: Could not load fib forward ruleset"
- exit $ksft_skip
+ [ "$ret" -eq 0 ] && ret=$ksft_skip
fi
ip -net "$nsrouter" rule add from all table 128
@@ -224,11 +815,36 @@ ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1
# drop main ipv4 table
ip -net "$nsrouter" -4 rule delete table main
-if ! test_ping 10.0.2.99 dead:2::99;then
- ip -net "$nsrouter" nft list ruleset
- echo "FAIL: fib mismatch in pbr setup"
- exit 1
+if test_ping 10.0.2.99 dead:2::99;then
+ echo "PASS: fib expression forward check with policy based routing"
+else
+ echo "FAIL: fib expression forward check with policy based routing"
+ ret=1
fi
-echo "PASS: fib expression forward check with policy based routing"
-exit 0
+test_fib_type "policy routing"
+ip netns exec "$nsrouter" nft delete table ip filter
+ip netns exec "$nsrouter" nft delete table ip6 filter
+
+# Un-do policy routing changes
+ip -net "$nsrouter" rule del from all table 128
+ip -net "$nsrouter" rule del from all iif veth0 table 129
+
+ip -net "$nsrouter" route del table 128 to 10.0.1.0/24 dev veth0
+ip -net "$nsrouter" route del table 129 to 10.0.2.0/24 dev veth1
+
+ip -net "$ns1" -4 route del default
+ip -net "$ns1" -6 route del default
+
+ip -net "$ns1" -4 route add default via 10.0.1.1
+ip -net "$ns1" -6 route add default via dead:1::1
+
+ip -net "$nsrouter" -4 rule add from all table main priority 32766
+
+test_fib_type "default table"
+ip netns exec "$nsrouter" nft delete table ip filter
+ip netns exec "$nsrouter" nft delete table ip6 filter
+
+test_fib_vrf
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index a4ee5496f2a1..a68bc882fa4e 100755
--- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -20,6 +20,7 @@ ret=0
SOCAT_TIMEOUT=60
nsin=""
+nsin_small=""
ns1out=""
ns2out=""
@@ -36,7 +37,7 @@ cleanup() {
cleanup_all_ns
- rm -f "$nsin" "$ns1out" "$ns2out"
+ rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out"
[ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns"
}
@@ -72,6 +73,7 @@ lmtu=1500
rmtu=2000
filesize=$((2 * 1024 * 1024))
+filesize_small=$((filesize / 16))
usage(){
echo "nft_flowtable.sh [OPTIONS]"
@@ -89,7 +91,10 @@ do
o) omtu=$OPTARG;;
l) lmtu=$OPTARG;;
r) rmtu=$OPTARG;;
- s) filesize=$OPTARG;;
+ s)
+ filesize=$OPTARG
+ filesize_small=$((OPTARG / 16))
+ ;;
*) usage;;
esac
done
@@ -122,6 +127,8 @@ ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad
ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0
ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad
+ip netns exec "$nsr1" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$nsr2" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
for i in 0 1; do
ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
@@ -148,7 +155,9 @@ ip -net "$ns1" route add default via dead:1::1
ip -net "$ns2" route add default via dead:2::1
ip -net "$nsr1" route add default via 192.168.10.2
+ip -6 -net "$nsr1" route add default via fee1:2::2
ip -net "$nsr2" route add default via 192.168.10.1
+ip -6 -net "$nsr2" route add default via fee1:2::1
ip netns exec "$nsr1" nft -f - <<EOF
table inet filter {
@@ -215,6 +224,7 @@ if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then
fi
nsin=$(mktemp)
+nsin_small=$(mktemp)
ns1out=$(mktemp)
ns2out=$(mktemp)
@@ -265,6 +275,7 @@ check_counters()
check_dscp()
{
local what=$1
+ local pmtud="$2"
local ok=1
local counter
@@ -277,37 +288,39 @@ check_dscp()
local pc4z=${counter%*bytes*}
local pc4z=${pc4z#*packets}
+ local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected"
+
case "$what" in
"dscp_none")
if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_fwd")
if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_ingress")
if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_egress")
if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
- echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+ echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
*)
- echo "FAIL: Unknown DSCP check" 1>&2
+ echo "$failmsg: Unknown DSCP check" 1>&2
ret=1
ok=0
esac
@@ -319,9 +332,9 @@ check_dscp()
check_transfer()
{
- in=$1
- out=$2
- what=$3
+ local in=$1
+ local out=$2
+ local what=$3
if ! cmp "$in" "$out" > /dev/null 2>&1; then
echo "FAIL: file mismatch for $what" 1>&2
@@ -342,25 +355,42 @@ test_tcp_forwarding_ip()
{
local nsa=$1
local nsb=$2
- local dstip=$3
- local dstport=$4
+ local pmtu=$3
+ local proto=$4
+ local dstip=$5
+ local dstport=$6
local lret=0
+ local socatc
+ local socatl
+ local infile="$nsin"
+
+ if [ $pmtu -eq 0 ]; then
+ infile="$nsin_small"
+ fi
- timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" &
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -${proto} \
+ TCP"${proto}"-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" &
lpid=$!
busywait 1000 listener_ready
- timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out"
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -${proto} \
+ TCP"${proto}":"$dstip":"$dstport" STDIO < "$infile" > "$ns1out"
+ socatc=$?
wait $lpid
+ socatl=$?
+
+ if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then
+ rc=1
+ fi
- if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then
+ if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then
lret=1
ret=1
fi
- if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then
+ if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then
lret=1
ret=1
fi
@@ -370,14 +400,22 @@ test_tcp_forwarding_ip()
test_tcp_forwarding()
{
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ local pmtu="$3"
+ local proto="$4"
+ local dstip="$5"
+ local dstport="$6"
+
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
return $?
}
test_tcp_forwarding_set_dscp()
{
- check_dscp "dscp_none"
+ local pmtu="$3"
+ local proto="$4"
+ local dstip="$5"
+ local dstport="$6"
ip netns exec "$nsr1" nft -f - <<EOF
table netdev dscpmangle {
@@ -388,8 +426,8 @@ table netdev dscpmangle {
}
EOF
if [ $? -eq 0 ]; then
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- check_dscp "dscp_ingress"
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
+ check_dscp "dscp_ingress" "$pmtu"
ip netns exec "$nsr1" nft delete table netdev dscpmangle
else
@@ -405,10 +443,10 @@ table netdev dscpmangle {
}
EOF
if [ $? -eq 0 ]; then
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- check_dscp "dscp_egress"
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
+ check_dscp "dscp_egress" "$pmtu"
- ip netns exec "$nsr1" nft flush table netdev dscpmangle
+ ip netns exec "$nsr1" nft delete table netdev dscpmangle
else
echo "SKIP: Could not load netdev:egress for veth1"
fi
@@ -416,48 +454,53 @@ fi
# partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3
# counters should have seen packets (before and after ft offload kicks in).
ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- check_dscp "dscp_fwd"
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport"
+ check_dscp "dscp_fwd" "$pmtu"
}
test_tcp_forwarding_nat()
{
+ local nsa="$1"
+ local nsb="$2"
+ local pmtu="$3"
+ local what="$4"
local lret
- local pmtu
- test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
- lret=$?
+ [ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)"
- pmtu=$3
- what=$4
+ test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 4 10.0.2.99 12345
+ lret=$?
if [ "$lret" -eq 0 ] ; then
if [ "$pmtu" -eq 1 ] ;then
- check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what"
+ check_counters "flow offload for ns1/ns2 with masquerade $what"
else
echo "PASS: flow offload for ns1/ns2 with masquerade $what"
fi
- test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
+ test_tcp_forwarding_ip "$1" "$2" "$pmtu" 4 10.6.6.6 1666
lret=$?
if [ "$pmtu" -eq 1 ] ;then
- check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what"
+ check_counters "flow offload for ns1/ns2 with dnat $what"
elif [ "$lret" -eq 0 ] ; then
echo "PASS: flow offload for ns1/ns2 with dnat $what"
fi
+ else
+ echo "FAIL: flow offload for ns1/ns2 with dnat $what"
fi
return $lret
}
make_file "$nsin" "$filesize"
+make_file "$nsin_small" "$filesize_small"
# First test:
# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
# Due to MTU mismatch in both directions, all packets (except small packets like pure
# acks) have to be handled by normal forwarding path. Therefore, packet counters
# are not checked.
-if test_tcp_forwarding "$ns1" "$ns2"; then
+if test_tcp_forwarding "$ns1" "$ns2" 0 4 10.0.2.99 12345; then
echo "PASS: flow offloaded for ns1/ns2"
else
echo "FAIL: flow offload for ns1/ns2:" 1>&2
@@ -465,6 +508,14 @@ else
ret=1
fi
+if test_tcp_forwarding "$ns1" "$ns2" 0 6 "[dead:2::99]" 12345; then
+ echo "PASS: IPv6 flow offloaded for ns1/ns2"
+else
+ echo "FAIL: IPv6 flow offload for ns1/ns2:" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
# delete default route, i.e. ns2 won't be able to reach ns1 and
# will depend on ns1 being masqueraded in nsr1.
# expect ns1 has nsr1 address.
@@ -489,8 +540,9 @@ table ip nat {
}
EOF
-if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then
- echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2
+check_dscp "dscp_none" "0"
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 4 10.0.2.99 12345; then
+ echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2
exit 0
fi
@@ -513,12 +565,87 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
# For earlier tests (large mtus), packets cannot be handled via flowtable
# (except pure acks and other small packets).
ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
+ip netns exec "$ns2" nft reset counters table inet filter >/dev/null
+
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 4 10.0.2.99 12345; then
+ echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2
+ exit 0
+fi
+
+ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then
echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
ip netns exec "$nsr1" nft list ruleset
fi
+# IPIP tunnel test:
+# Add IPIP tunnel interfaces and check flowtable acceleration.
+test_ipip() {
+if ! ip -net "$nsr1" link add name tun0 type ipip \
+ local 192.168.10.1 remote 192.168.10.2 >/dev/null;then
+ echo "SKIP: could not add ipip tunnel"
+ [ "$ret" -eq 0 ] && ret=$ksft_skip
+ return
+fi
+ip -net "$nsr1" link set tun0 up
+ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0
+ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+
+ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1
+ip -net "$nsr2" link set tun0 up
+ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0
+ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null
+
+ip -net "$nsr1" route change default via 192.168.100.2
+ip -net "$nsr2" route change default via 192.168.100.1
+ip -net "$ns2" route add default via 10.0.2.1
+
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept'
+ip netns exec "$nsr1" nft -a insert rule inet filter forward \
+ 'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept'
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then
+ echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
+# Create vlan tagged devices for IPIP traffic.
+ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10
+ip -net "$nsr1" link set veth1.10 up
+ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10
+ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept'
+ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2
+ip -net "$nsr1" link set tun1 up
+ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1
+ip -net "$nsr1" route change default via 192.168.200.2
+ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
+ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept'
+
+ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10
+ip -net "$nsr2" link set veth0.10 up
+ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10
+ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null
+ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1
+ip -net "$nsr2" link set tun1 up
+ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1
+ip -net "$nsr2" route change default via 192.168.200.1
+ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then
+ echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2
+ ip netns exec "$nsr1" nft list ruleset
+ ret=1
+fi
+
+# Restore the previous configuration
+ip -net "$nsr1" route change default via 192.168.10.2
+ip -net "$nsr2" route change default via 192.168.10.1
+ip -net "$ns2" route del default via 10.0.2.1
+}
+
# Another test:
# Add bridge interface br0 to Router1, with NAT enabled.
test_bridge() {
@@ -604,6 +731,8 @@ ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
ip -net "$nsr1" link set up dev veth0
}
+test_ipip
+
test_bridge
KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1)
@@ -644,7 +773,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1
ip -net "$ns2" route add default via 10.0.2.1
ip -net "$ns2" route add default via dead:2::1
-if test_tcp_forwarding "$ns1" "$ns2"; then
+if test_tcp_forwarding "$ns1" "$ns2" 1 4 10.0.2.99 12345; then
check_counters "ipsec tunnel mode for ns1/ns2"
else
echo "FAIL: ipsec tunnel mode for ns1/ns2"
@@ -652,6 +781,14 @@ else
ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
fi
+if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then
+ check_counters "IPv6 ipsec tunnel mode for ns1/ns2"
+else
+ echo "FAIL: IPv6 ipsec tunnel mode for ns1/ns2"
+ ip netns exec "$nsr1" nft list ruleset 1>&2
+ ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
+fi
+
if [ "$1" = "" ]; then
low=1280
mtu=$((65536 - low))
@@ -668,7 +805,7 @@ if [ "$1" = "" ]; then
fi
echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize"
- $0 -o "$o" -l "$l" -r "$r" -s "$filesize"
+ $0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1
fi
exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
new file mode 100755
index 000000000000..c0fffaa6dbd9
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh
@@ -0,0 +1,157 @@
+#!/bin/bash -e
+#
+# SPDX-License-Identifier: GPL-2.0
+#
+# Torture nftables' netdevice notifier callbacks and related code by frequent
+# renaming of interfaces which netdev-family chains and flowtables hook into.
+
+source lib.sh
+
+checktool "nft --version" "run test without nft tool"
+checktool "iperf3 --version" "run test without iperf3 tool"
+
+read kernel_tainted < /proc/sys/kernel/tainted
+
+# how many seconds to torture the kernel?
+# default to 80% of max run time but don't exceed 48s
+TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10))
+[[ $TEST_RUNTIME -gt 48 ]] && TEST_RUNTIME=48
+
+trap "cleanup_all_ns" EXIT
+
+setup_ns nsc nsr nss
+
+ip -net $nsc link add cr0 type veth peer name rc0 netns $nsr
+ip -net $nsc addr add 10.0.0.1/24 dev cr0
+ip -net $nsc link set cr0 up
+ip -net $nsc route add default via 10.0.0.2
+
+ip -net $nss link add sr0 type veth peer name rs0 netns $nsr
+ip -net $nss addr add 10.1.0.1/24 dev sr0
+ip -net $nss link set sr0 up
+ip -net $nss route add default via 10.1.0.2
+
+ip -net $nsr addr add 10.0.0.2/24 dev rc0
+ip -net $nsr link set rc0 up
+ip -net $nsr addr add 10.1.0.2/24 dev rs0
+ip -net $nsr link set rs0 up
+ip netns exec $nsr sysctl -q net.ipv4.ip_forward=1
+ip netns exec $nsr sysctl -q net.ipv4.conf.all.forwarding=1
+
+{
+ echo "table netdev t {"
+ for ((i = 0; i < 10; i++)); do
+ cat <<-EOF
+ chain chain_rc$i {
+ type filter hook ingress device rc$i priority 0
+ counter
+ }
+ chain chain_rs$i {
+ type filter hook ingress device rs$i priority 0
+ counter
+ }
+ EOF
+ done
+ echo "}"
+ echo "table ip t {"
+ for ((i = 0; i < 10; i++)); do
+ cat <<-EOF
+ flowtable ft_${i} {
+ hook ingress priority 0
+ devices = { rc$i, rs$i }
+ }
+ EOF
+ done
+ echo "chain c {"
+ echo "type filter hook forward priority 0"
+ for ((i = 0; i < 10; i++)); do
+ echo -n "iifname rc$i oifname rs$i "
+ echo "ip protocol tcp counter flow add @ft_${i}"
+ done
+ echo "counter"
+ echo "}"
+ echo "}"
+} | ip netns exec $nsr nft -f - || {
+ echo "SKIP: Could not load nft ruleset"
+ exit $ksft_skip
+}
+
+for ((o=0, n=1; ; o=n, n++, n %= 10)); do
+ ip -net $nsr link set rc$o name rc$n
+ ip -net $nsr link set rs$o name rs$n
+done &
+rename_loop_pid=$!
+
+while true; do ip netns exec $nsr nft list ruleset >/dev/null 2>&1; done &
+nft_list_pid=$!
+
+ip netns exec $nsr nft monitor >/dev/null &
+nft_monitor_pid=$!
+
+ip netns exec $nss iperf3 --server --daemon -1
+summary_expr='s,^\[SUM\] .* \([0-9\.]\+\) Kbits/sec .* receiver,\1,p'
+rate=$(ip netns exec $nsc iperf3 \
+ --format k -c 10.1.0.1 --time $TEST_RUNTIME \
+ --length 56 --parallel 10 -i 0 | sed -n "$summary_expr")
+
+kill $nft_list_pid
+kill $nft_monitor_pid
+kill $rename_loop_pid
+wait
+
+wildcard_prep() {
+ ip netns exec $nsr nft -f - <<EOF
+table ip t {
+ flowtable ft_wild {
+ hook ingress priority 0
+ devices = { wild* }
+ }
+}
+EOF
+}
+
+if ! wildcard_prep; then
+ echo "SKIP wildcard tests: not supported by host's nft?"
+else
+ for ((i = 0; i < 100; i++)); do
+ ip -net $nsr link add wild$i type dummy &
+ done
+ wait
+ for ((i = 80; i < 100; i++)); do
+ ip -net $nsr link del wild$i &
+ done
+ for ((i = 0; i < 80; i++)); do
+ ip -net $nsr link del wild$i &
+ done
+ wait
+ for ((i = 0; i < 100; i += 10)); do
+ (
+ for ((j = 0; j < 10; j++)); do
+ ip -net $nsr link add wild$((i + j)) type dummy
+ done
+ for ((j = 0; j < 10; j++)); do
+ ip -net $nsr link del wild$((i + j))
+ done
+ ) &
+ done
+ wait
+fi
+
+
+[[ $kernel_tainted -eq 0 && $(</proc/sys/kernel/tainted) -ne 0 ]] && {
+ echo "FAIL: Kernel is tainted!"
+ exit $ksft_fail
+}
+
+[[ $rate -gt 0 ]] || {
+ echo "FAIL: Zero throughput in iperf3"
+ exit $ksft_fail
+}
+
+[[ -f /sys/kernel/debug/kmemleak && \
+ -n $(</sys/kernel/debug/kmemleak) ]] && {
+ echo "FAIL: non-empty kmemleak report"
+ exit $ksft_fail
+}
+
+exit $ksft_pass
diff --git a/tools/testing/selftests/net/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh
index 9e39de26455f..b3ec2d0a3f56 100755
--- a/tools/testing/selftests/net/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/net/netfilter/nft_nat.sh
@@ -569,7 +569,7 @@ test_redirect6()
ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
- echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6"
+ echo "ERROR: cannot ping $ns1 from $ns2 via ipv6"
lret=1
fi
@@ -859,13 +859,31 @@ EOF
# from router:service bypass connection tracking.
test_port_shadow_notrack "$family"
- # test nat based mitigation: fowarded packets coming from service port
+ # test nat based mitigation: forwarded packets coming from service port
# are masqueraded with random highport.
test_port_shadow_pat "$family"
ip netns exec "$ns0" nft delete table $family nat
}
+file_cmp()
+{
+ local infile="$1"
+ local outfile="$2"
+
+ if ! cmp "$infile" "$outfile";then
+ echo -n "Infile "
+ ls -l "$infile"
+ echo -n "Outfile "
+ ls -l "$outfile"
+ echo "ERROR: in and output file mismatch when checking $msg" 1>&1
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
test_stateless_nat_ip()
{
local lret=0
@@ -966,11 +984,7 @@ EOF
wait
- if ! cmp "$INFILE" "$OUTFILE";then
- ls -l "$INFILE" "$OUTFILE"
- echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
- lret=1
- fi
+ file_cmp "$INFILE" "$OUTFILE" "udp with stateless nat" || lret=1
:> "$OUTFILE"
@@ -991,6 +1005,62 @@ EOF
return $lret
}
+test_dnat_clash()
+{
+ local lret=0
+
+ if ! socat -h > /dev/null 2>&1;then
+ echo "SKIP: Could not run dnat clash test without socat tool"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return $ksft_skip
+ fi
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+flush ruleset
+table ip dnat-test {
+ chain prerouting {
+ type nat hook prerouting priority dstnat; policy accept;
+ ip daddr 10.0.2.1 udp dport 1234 counter dnat to 10.0.1.1:1234
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add dnat rules"
+ [ $ret -eq 0 ] && ret=$ksft_skip
+ return $ksft_skip
+ fi
+
+ local udpdaddr="10.0.2.1"
+ for i in 1 2;do
+ echo "PING $udpdaddr" > "$INFILE"
+ echo "PONG 10.0.1.1 step $i" | ip netns exec "$ns0" timeout 3 socat STDIO UDP4-LISTEN:1234,bind=10.0.1.1 > "$OUTFILE" 2>/dev/null &
+ local lpid=$!
+
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns0" 1234 "-u"
+
+ result=$(ip netns exec "$ns1" timeout 3 socat STDIO UDP4-SENDTO:"$udpdaddr:1234,sourceport=4321" < "$INFILE")
+ udpdaddr="10.0.1.1"
+
+ if [ "$result" != "PONG 10.0.1.1 step $i" ] ; then
+ echo "ERROR: failed to test udp $ns1 to $ns2 with dnat rule step $i, result: \"$result\"" 1>&2
+ lret=1
+ ret=1
+ fi
+
+ wait
+
+ file_cmp "$INFILE" "$OUTFILE" "udp dnat step $i" || lret=1
+
+ :> "$OUTFILE"
+ done
+
+ test $lret -eq 0 && echo "PASS: IP dnat clash $ns1:$ns2"
+
+ ip netns exec "$ns0" nft flush ruleset
+
+ return $lret
+}
+
# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
for i in "$ns0" "$ns1" "$ns2" ;do
ip netns exec "$i" nft -f /dev/stdin <<EOF
@@ -1147,6 +1217,7 @@ $test_inet_nat && test_redirect6 inet
test_port_shadowing
test_stateless_nat_ip
+test_dnat_clash
if [ $ret -ne 0 ];then
echo -n "FAIL: "
diff --git a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
index 3b81d88bdde3..9f200f80253a 100755
--- a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
+++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
@@ -88,7 +88,6 @@ for i in $(seq 1 "$maxclients");do
echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
echo netns exec "$gw" ip link set "veth$i" up
echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2
- echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".rp_filter=0
# clients have same IP addresses.
echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
@@ -178,7 +177,6 @@ fi
ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
# useful for debugging: allows to use 'ping' from clients to gateway.
ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
index 785e3875a6da..6136ceec45e0 100755
--- a/tools/testing/selftests/net/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -10,6 +10,8 @@ source lib.sh
ret=0
timeout=5
+SCTP_TEST_TIMEOUT=60
+
cleanup()
{
ip netns pids "$ns1" | xargs kill 2>/dev/null
@@ -40,7 +42,7 @@ TMPFILE3=$(mktemp)
TMPINPUT=$(mktemp)
COUNT=200
-[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=25
+[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=$((COUNT/8))
dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT"
if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
@@ -275,9 +277,11 @@ test_tcp_forward()
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2"
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2
+ local tthen=$(date +%s)
+
ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
- wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain"
+ wait_and_check_retval "$rpid" "tcp and nfqueue in forward chain" "$tthen"
kill "$nfqpid"
}
@@ -288,13 +292,14 @@ test_tcp_localhost()
ip netns exec "$nsrouter" ./nf_queue -q 3 &
local nfqpid=$!
+ local tthen=$(date +%s)
busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null
- wait "$rpid" && echo "PASS: tcp via loopback"
+ wait_and_check_retval "$rpid" "tcp via loopback" "$tthen"
kill "$nfqpid"
}
@@ -417,6 +422,23 @@ check_output_files()
fi
}
+wait_and_check_retval()
+{
+ local rpid="$1"
+ local msg="$2"
+ local tthen="$3"
+ local tnow=$(date +%s)
+
+ if wait "$rpid";then
+ echo -n "PASS: "
+ else
+ echo -n "FAIL: "
+ ret=1
+ fi
+
+ printf "%s (duration: %ds)\n" "$msg" $((tnow-tthen))
+}
+
test_sctp_forward()
{
ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
@@ -428,13 +450,14 @@ table inet sctpq {
}
}
EOF
- timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+ timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
local rpid=$!
busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
ip netns exec "$nsrouter" ./nf_queue -q 10 -G &
local nfqpid=$!
+ local tthen=$(date +%s)
ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
@@ -443,7 +466,7 @@ EOF
exit 1
fi
- wait "$rpid" && echo "PASS: sctp and nfqueue in forward chain"
+ wait_and_check_retval "$rpid" "sctp and nfqueue in forward chain" "$tthen"
kill "$nfqpid"
check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward"
@@ -462,13 +485,14 @@ EOF
# reduce test file size, software segmentation causes sk wmem increase.
dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT"
- timeout 60 ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
+ timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" &
local rpid=$!
busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2"
ip netns exec "$ns1" ./nf_queue -q 11 &
local nfqpid=$!
+ local tthen=$(date +%s)
ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
@@ -478,7 +502,7 @@ EOF
fi
# must wait before checking completeness of output file.
- wait "$rpid" && echo "PASS: sctp and nfqueue in output chain with GSO"
+ wait_and_check_retval "$rpid" "sctp and nfqueue in output chain with GSO" "$tthen"
kill "$nfqpid"
check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output"
@@ -593,6 +617,7 @@ EOF
echo "PASS: queue program exiting while packets queued"
else
echo "TAINT: queue program exiting while packets queued"
+ dmesg
ret=1
fi
}
diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh
index 4485fd7675ed..24ad41d526d9 100755
--- a/tools/testing/selftests/net/netfilter/rpath.sh
+++ b/tools/testing/selftests/net/netfilter/rpath.sh
@@ -1,8 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# return code to signal skipped test
-ksft_skip=4
+source lib.sh
# search for legacy iptables (it uses the xtables extensions
if iptables-legacy --version >/dev/null 2>&1; then
@@ -32,17 +31,10 @@ if [ -z "$iptables$ip6tables$nft" ]; then
exit $ksft_skip
fi
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-trap "ip netns del $ns1; ip netns del $ns2" EXIT
-
-# create two netns, disable rp_filter in ns2 and
-# keep IPv6 address when moving into VRF
-ip netns add "$ns1"
-ip netns add "$ns2"
-ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0
-ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0
+trap cleanup_all_ns EXIT
+
+# create two netns, keep IPv6 address when moving into VRF
+setup_ns ns1 ns2
ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1
# a standard connection between the netns, should not trigger rp filter
@@ -61,9 +53,20 @@ ip -net "$ns2" a a 192.168.42.1/24 dev d0
ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad
ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
+# avoid neighbor lookups and enable martian IPv6 pings
+ns2_hwaddr=$(ip -net "$ns2" link show dev v0 | \
+ sed -n 's, *link/ether \([^ ]*\) .*,\1,p')
+ns1_hwaddr=$(ip -net "$ns1" link show dev v0 | \
+ sed -n 's, *link/ether \([^ ]*\) .*,\1,p')
+ip -net "$ns1" neigh add fec0:42::1 lladdr "$ns2_hwaddr" nud permanent dev v0
+ip -net "$ns1" neigh add fec0:23::1 lladdr "$ns2_hwaddr" nud permanent dev v0
+ip -net "$ns2" neigh add fec0:42::2 lladdr "$ns1_hwaddr" nud permanent dev d0
+ip -net "$ns2" neigh add fec0:23::2 lladdr "$ns1_hwaddr" nud permanent dev v0
+
# firewall matches to test
[ -n "$iptables" ] && {
common='-t raw -A PREROUTING -s 192.168.0.0/16'
+ common+=' -p icmp --icmp-type echo-request'
if ! ip netns exec "$ns2" "$iptables" $common -m rpfilter;then
echo "Cannot add rpfilter rule"
exit $ksft_skip
@@ -72,6 +75,7 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
}
[ -n "$ip6tables" ] && {
common='-t raw -A PREROUTING -s fec0::/16'
+ common+=' -p icmpv6 --icmpv6-type echo-request'
if ! ip netns exec "$ns2" "$ip6tables" $common -m rpfilter;then
echo "Cannot add rpfilter rule"
exit $ksft_skip
@@ -82,8 +86,10 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
table inet t {
chain c {
type filter hook prerouting priority raw;
- ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter
- ip6 saddr fec0::/16 fib saddr . iif oif exists counter
+ ip saddr 192.168.0.0/16 icmp type echo-request \
+ fib saddr . iif oif exists counter
+ ip6 saddr fec0::/16 icmpv6 type echo-request \
+ fib saddr . iif oif exists counter
}
}
EOF
diff --git a/tools/testing/selftests/net/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c
index 21bb1cfd8a85..b282d1785c9b 100644
--- a/tools/testing/selftests/net/netfilter/sctp_collision.c
+++ b/tools/testing/selftests/net/netfilter/sctp_collision.c
@@ -9,9 +9,10 @@
int main(int argc, char *argv[])
{
struct sockaddr_in saddr = {}, daddr = {};
- int sd, ret, len = sizeof(daddr);
+ socklen_t len = sizeof(daddr);
struct timeval tv = {25, 0};
char buf[] = "hello";
+ int sd, ret;
if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n",
diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c
new file mode 100644
index 000000000000..79de163d61ab
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/udpclash.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Usage: ./udpclash <IP> <PORT>
+ *
+ * Emit THREAD_COUNT UDP packets sharing the same saddr:daddr pair.
+ *
+ * This mimics DNS resolver libraries that emit A and AAAA requests
+ * in parallel.
+ *
+ * This exercises conntrack clash resolution logic added and later
+ * refined in
+ *
+ * 71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race")
+ * ed07d9a021df ("netfilter: nf_conntrack: resolve clash for matching conntracks")
+ * 6a757c07e51f ("netfilter: conntrack: allow insertion of clashing entries")
+ */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <pthread.h>
+
+#define THREAD_COUNT 128
+
+struct thread_args {
+ const struct sockaddr_in *si_remote;
+ int sockfd;
+};
+
+static volatile int wait = 1;
+
+static void *thread_main(void *varg)
+{
+ const struct sockaddr_in *si_remote;
+ const struct thread_args *args = varg;
+ static const char msg[] = "foo";
+
+ si_remote = args->si_remote;
+
+ while (wait == 1)
+ ;
+
+ if (sendto(args->sockfd, msg, strlen(msg), MSG_NOSIGNAL,
+ (struct sockaddr *)si_remote, sizeof(*si_remote)) < 0)
+ exit(111);
+
+ return varg;
+}
+
+static int run_test(int fd, const struct sockaddr_in *si_remote)
+{
+ struct thread_args thread_args = {
+ .si_remote = si_remote,
+ .sockfd = fd,
+ };
+ pthread_t *tid = calloc(THREAD_COUNT, sizeof(pthread_t));
+ unsigned int repl_count = 0, timeout = 0;
+ int i;
+
+ if (!tid) {
+ perror("calloc");
+ return 1;
+ }
+
+ for (i = 0; i < THREAD_COUNT; i++) {
+ int err = pthread_create(&tid[i], NULL, &thread_main, &thread_args);
+
+ if (err != 0) {
+ perror("pthread_create");
+ exit(1);
+ }
+ }
+
+ wait = 0;
+
+ for (i = 0; i < THREAD_COUNT; i++)
+ pthread_join(tid[i], NULL);
+
+ while (repl_count < THREAD_COUNT) {
+ struct sockaddr_in si_repl;
+ socklen_t si_repl_len = sizeof(si_repl);
+ char repl[512];
+ ssize_t ret;
+
+ ret = recvfrom(fd, repl, sizeof(repl), MSG_NOSIGNAL,
+ (struct sockaddr *) &si_repl, &si_repl_len);
+ if (ret < 0) {
+ if (timeout++ > 5000) {
+ fputs("timed out while waiting for reply from thread\n", stderr);
+ break;
+ }
+
+ /* give reply time to pass though the stack */
+ usleep(1000);
+ continue;
+ }
+
+ if (si_repl_len != sizeof(*si_remote)) {
+ fprintf(stderr, "warning: reply has unexpected repl_len %d vs %d\n",
+ (int)si_repl_len, (int)sizeof(si_repl));
+ } else if (si_remote->sin_addr.s_addr != si_repl.sin_addr.s_addr ||
+ si_remote->sin_port != si_repl.sin_port) {
+ char a[64], b[64];
+
+ inet_ntop(AF_INET, &si_remote->sin_addr, a, sizeof(a));
+ inet_ntop(AF_INET, &si_repl.sin_addr, b, sizeof(b));
+
+ fprintf(stderr, "reply from wrong source: want %s:%d got %s:%d\n",
+ a, ntohs(si_remote->sin_port), b, ntohs(si_repl.sin_port));
+ }
+
+ repl_count++;
+ }
+
+ printf("got %d of %d replies\n", repl_count, THREAD_COUNT);
+
+ free(tid);
+
+ return repl_count == THREAD_COUNT ? 0 : 1;
+}
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_in si_local = {
+ .sin_family = AF_INET,
+ };
+ struct sockaddr_in si_remote = {
+ .sin_family = AF_INET,
+ };
+ int fd, ret;
+
+ if (argc < 3) {
+ fputs("Usage: send_udp <daddr> <dport>\n", stderr);
+ return 1;
+ }
+
+ si_remote.sin_port = htons(atoi(argv[2]));
+ si_remote.sin_addr.s_addr = inet_addr(argv[1]);
+
+ fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, IPPROTO_UDP);
+ if (fd < 0) {
+ perror("socket");
+ return 1;
+ }
+
+ if (bind(fd, (struct sockaddr *)&si_local, sizeof(si_local)) < 0) {
+ perror("bind");
+ return 1;
+ }
+
+ ret = run_test(fd, &si_remote);
+
+ close(fd);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c
index 84e29b7dffb6..51129c564d0a 100644
--- a/tools/testing/selftests/net/netlink-dumps.c
+++ b/tools/testing/selftests/net/netlink-dumps.c
@@ -12,10 +12,163 @@
#include <unistd.h>
#include <linux/genetlink.h>
+#include <linux/neighbour.h>
+#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/mqueue.h>
+#include <linux/rtnetlink.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
+
+#include <ynl.h>
+
+struct ext_ack {
+ int err;
+
+ __u32 attr_offs;
+ __u32 miss_type;
+ __u32 miss_nest;
+ const char *str;
+};
+
+enum get_ea_ret {
+ ERROR = -1,
+ NO_CTRL = 0,
+ FOUND_DONE,
+ FOUND_ERR,
+ FOUND_EXTACK,
+};
+
+static enum get_ea_ret
+nl_get_extack(char *buf, size_t n, struct ext_ack *ea)
+{
+ enum get_ea_ret ret = NO_CTRL;
+ const struct nlmsghdr *nlh;
+ const struct nlattr *attr;
+ ssize_t rem;
+
+ for (rem = n; rem > 0; NLMSG_NEXT(nlh, rem)) {
+ nlh = (struct nlmsghdr *)&buf[n - rem];
+ if (!NLMSG_OK(nlh, rem))
+ return ERROR;
+
+ if (nlh->nlmsg_type == NLMSG_ERROR)
+ ret = FOUND_ERR;
+ else if (nlh->nlmsg_type == NLMSG_DONE)
+ ret = FOUND_DONE;
+ else
+ continue;
+
+ ea->err = -*(int *)NLMSG_DATA(nlh);
+
+ if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return ret;
+
+ ynl_attr_for_each(attr, nlh, sizeof(int)) {
+ switch (ynl_attr_type(attr)) {
+ case NLMSGERR_ATTR_OFFS:
+ ea->attr_offs = ynl_attr_get_u32(attr);
+ break;
+ case NLMSGERR_ATTR_MISS_TYPE:
+ ea->miss_type = ynl_attr_get_u32(attr);
+ break;
+ case NLMSGERR_ATTR_MISS_NEST:
+ ea->miss_nest = ynl_attr_get_u32(attr);
+ break;
+ case NLMSGERR_ATTR_MSG:
+ ea->str = ynl_attr_get_str(attr);
+ break;
+ }
+ }
+
+ return FOUND_EXTACK;
+ }
+
+ return ret;
+}
+
+static const struct {
+ struct nlmsghdr nlhdr;
+ struct ndmsg ndm;
+ struct nlattr ahdr;
+ __u32 val;
+} dump_neigh_bad = {
+ .nlhdr = {
+ .nlmsg_len = sizeof(dump_neigh_bad),
+ .nlmsg_type = RTM_GETNEIGH,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP,
+ .nlmsg_seq = 1,
+ },
+ .ndm = {
+ .ndm_family = 123,
+ },
+ .ahdr = {
+ .nla_len = 4 + 4,
+ .nla_type = NDA_FLAGS_EXT,
+ },
+ .val = -1, // should fail MASK validation
+};
+
+TEST(dump_extack)
+{
+ int netlink_sock;
+ int i, cnt, ret;
+ char buf[8192];
+ int one = 1;
+ ssize_t n;
+
+ netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ ASSERT_GE(netlink_sock, 0);
+
+ n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_CAP_ACK,
+ &one, sizeof(one));
+ ASSERT_EQ(n, 0);
+ n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one));
+ ASSERT_EQ(n, 0);
+ n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_GET_STRICT_CHK,
+ &one, sizeof(one));
+ ASSERT_EQ(n, 0);
+
+ /* Dump so many times we fill up the buffer */
+ cnt = 80;
+ for (i = 0; i < cnt; i++) {
+ n = send(netlink_sock, &dump_neigh_bad,
+ sizeof(dump_neigh_bad), 0);
+ ASSERT_EQ(n, sizeof(dump_neigh_bad));
+ }
+
+ /* Read out the ENOBUFS */
+ n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT);
+ EXPECT_EQ(n, -1);
+ EXPECT_EQ(errno, ENOBUFS);
+
+ ret = NO_CTRL;
+ for (i = 0; i < cnt; i++) {
+ struct ext_ack ea = {};
+
+ n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT);
+ if (n < 0) {
+ ASSERT_GE(i, 10);
+ break;
+ }
+ ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr));
+
+ ret = nl_get_extack(buf, n, &ea);
+ /* Once we fill the buffer we'll see one ENOBUFS followed
+ * by a number of EBUSYs. Then the last recv() will finally
+ * trigger and complete the dump.
+ */
+ if (ret == FOUND_ERR && (ea.err == ENOBUFS || ea.err == EBUSY))
+ continue;
+ EXPECT_EQ(ret, FOUND_EXTACK);
+ EXPECT_EQ(ea.err, EINVAL);
+ EXPECT_EQ(ea.attr_offs,
+ sizeof(struct nlmsghdr) + sizeof(struct ndmsg));
+ }
+ /* Make sure last message was a full DONE+extack */
+ EXPECT_EQ(ret, FOUND_EXTACK);
+}
static const struct {
struct nlmsghdr nlhdr;
diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh
index 6974474c26f3..38871bdef67f 100755
--- a/tools/testing/selftests/net/netns-name.sh
+++ b/tools/testing/selftests/net/netns-name.sh
@@ -7,10 +7,12 @@ set -o pipefail
DEV=dummy-dev0
DEV2=dummy-dev1
ALT_NAME=some-alt-name
+NSIM_ADDR=2025
RET_CODE=0
cleanup() {
+ cleanup_netdevsim $NSIM_ADDR
cleanup_ns $NS $test_ns
}
@@ -25,12 +27,15 @@ setup_ns NS test_ns
#
# Test basic move without a rename
+# Use netdevsim because it has extra asserts for notifiers.
#
-ip -netns $NS link add name $DEV type dummy || fail
-ip -netns $NS link set dev $DEV netns $test_ns ||
+
+nsim=$(create_netdevsim $NSIM_ADDR $NS)
+ip -netns $NS link set dev $nsim netns $test_ns ||
fail "Can't perform a netns move"
-ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move"
-ip -netns $test_ns link del $DEV || fail
+ip -netns $test_ns link show dev $nsim >> /dev/null ||
+ fail "Device not found after move"
+cleanup_netdevsim $NSIM_ADDR
#
# Test move with a conflict
@@ -78,6 +83,16 @@ ip -netns $NS link show dev $ALT_NAME 2> /dev/null &&
fail "Can still find alt-name after move"
ip -netns $test_ns link del $DEV || fail
+#
+# Test no conflict of the same name/ifindex in different netns
+#
+ip -netns $NS link add name $DEV index 100 type dummy || fail
+ip -netns $NS link add netns $test_ns name $DEV index 100 type dummy ||
+ fail "Can create in netns without moving"
+ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found"
+ip -netns $NS link del $DEV || fail
+ip -netns $test_ns link del $DEV || fail
+
echo -ne "$(basename $0) \t\t\t\t"
if [ $RET_CODE -eq 0 ]; then
echo "[ OK ]"
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index cd8a58097448..1f5227f3d64d 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -385,7 +385,7 @@ static int get_bind_to_device(int sd, char *name, size_t len)
name[0] = '\0';
rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen);
if (rc < 0)
- log_err_errno("setsockopt(SO_BINDTODEVICE)");
+ log_err_errno("getsockopt(SO_BINDTODEVICE)");
return rc;
}
@@ -535,7 +535,7 @@ static int set_freebind(int sd, int version)
break;
case AF_INET6:
if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) {
- log_err_errno("setsockopt(IPV6_FREEBIND");
+ log_err_errno("setsockopt(IPV6_FREEBIND)");
rc = -1;
}
break;
@@ -812,7 +812,7 @@ static int convert_addr(struct sock_args *args, const char *_str,
sep++;
if (str_to_uint(sep, 1, pfx_len_max,
&args->prefix_len) != 0) {
- fprintf(stderr, "Invalid port\n");
+ fprintf(stderr, "Invalid prefix length\n");
return 1;
}
} else {
@@ -1272,7 +1272,7 @@ static int msg_loop(int client, int sd, void *addr, socklen_t alen,
}
}
- nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1;
+ nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1;
while (1) {
FD_ZERO(&rfds);
FD_SET(sd, &rfds);
@@ -1492,7 +1492,7 @@ static int lsock_init(struct sock_args *args)
sd = socket(args->version, args->type, args->protocol);
if (sd < 0) {
log_err_errno("Error opening socket");
- return -1;
+ return -1;
}
if (set_reuseaddr(sd) != 0)
@@ -1912,7 +1912,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
* waiting to be told when to continue
*/
if (read(fd, &buf, sizeof(buf)) <= 0) {
- log_err_errno("Failed to read IPC status from status");
+ log_err_errno("Failed to read IPC status from pipe");
return 1;
}
if (!buf) {
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
index 93d9d914529b..5c66421ab8aa 100755
--- a/tools/testing/selftests/net/nl_netdev.py
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -2,8 +2,9 @@
# SPDX-License-Identifier: GPL-2.0
import time
+from os import system
from lib.py import ksft_run, ksft_exit, ksft_pr
-from lib.py import ksft_eq, ksft_ge, ksft_busy_wait
+from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_busy_wait
from lib.py import NetdevFamily, NetdevSimDev, ip
@@ -18,6 +19,160 @@ def lo_check(nf) -> None:
ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0)
+def napi_list_check(nf) -> None:
+ with NetdevSimDev(queue_count=100) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 100)
+
+ for q in [50, 0, 99]:
+ for i in range(4):
+ nsim.dfs_write("queue_reset", f"{q} {i}")
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 100,
+ comment=f"queue count after reset queue {q} mode {i}")
+
+def napi_set_threaded(nf) -> None:
+ """
+ Test that verifies various cases of napi threaded
+ set and unset at napi and device level.
+ """
+ with NetdevSimDev(queue_count=2) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ # set napi threaded and verify
+ nf.napi_set({'id': napi0_id, 'threaded': "enabled"})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # check it is not set for napi1
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ ip(f"link set dev {nsim.ifname} down")
+ ip(f"link set dev {nsim.ifname} up")
+
+ # verify if napi threaded is still set
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # check it is still not set for napi1
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ # unset napi threaded and verify
+ nf.napi_set({'id': napi0_id, 'threaded': "disabled"})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+
+ # set threaded at device level
+ system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is set for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "enabled")
+ ksft_ne(napi1.get('pid'), None)
+
+ # unset threaded at device level
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+ # set napi threaded for napi0
+ nf.napi_set({'id': napi0_id, 'threaded': 1})
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+
+ # unset threaded at device level
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+def dev_set_threaded(nf) -> None:
+ """
+ Test that verifies various cases of napi threaded
+ set and unset at device level using sysfs.
+ """
+ with NetdevSimDev(queue_count=2) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 2)
+
+ napi0_id = napis[0]['id']
+ napi1_id = napis[1]['id']
+
+ # set threaded
+ system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is set for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "enabled")
+ ksft_ne(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "enabled")
+ ksft_ne(napi1.get('pid'), None)
+
+ # unset threaded
+ system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded")
+
+ # check napi threaded is unset for both napis
+ napi0 = nf.napi_get({'id': napi0_id})
+ ksft_eq(napi0['threaded'], "disabled")
+ ksft_eq(napi0.get('pid'), None)
+ napi1 = nf.napi_get({'id': napi1_id})
+ ksft_eq(napi1['threaded'], "disabled")
+ ksft_eq(napi1.get('pid'), None)
+
+def nsim_rxq_reset_down(nf) -> None:
+ """
+ Test that the queue API supports resetting a queue
+ while the interface is down. We should convert this
+ test to testing real HW once more devices support
+ queue API.
+ """
+ with NetdevSimDev(queue_count=4) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} down")
+ for i in [0, 2, 3]:
+ nsim.dfs_write("queue_reset", f"1 {i}")
+
+
def page_pool_check(nf) -> None:
with NetdevSimDev() as nsimdev:
nsim = nsimdev.nsims[0]
@@ -89,7 +244,8 @@ def page_pool_check(nf) -> None:
def main() -> None:
nf = NetdevFamily()
- ksft_run([empty_check, lo_check, page_pool_check],
+ ksft_run([empty_check, lo_check, page_pool_check, napi_list_check,
+ dev_set_threaded, napi_set_threaded, nsim_rxq_reset_down],
args=(nf, ))
ksft_exit()
diff --git a/tools/testing/selftests/net/openvswitch/Makefile b/tools/testing/selftests/net/openvswitch/Makefile
index 2f1508abc826..3fd1da2ec07d 100644
--- a/tools/testing/selftests/net/openvswitch/Makefile
+++ b/tools/testing/selftests/net/openvswitch/Makefile
@@ -2,7 +2,7 @@
top_srcdir = ../../../../..
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
TEST_PROGS := openvswitch.sh
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index cc0bfae2bafa..b327d3061ed5 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -25,6 +25,7 @@ tests="
nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT
netlink_checks ovsnl: validate netlink attrs and settings
upcall_interfaces ovs: test the upcall interfaces
+ tunnel_metadata ovs: test extraction of tunnel metadata
drop_reason drop: test drop reasons are emitted
psample psample: Sampling packets with psample"
@@ -113,13 +114,13 @@ ovs_add_dp () {
}
ovs_add_if () {
- info "Adding IF to DP: br:$2 if:$3"
- if [ "$4" != "-u" ]; then
- ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if "$2" "$3" \
- || return 1
+ info "Adding IF to DP: br:$3 if:$4 ($2)"
+ if [ "$5" != "-u" ]; then
+ ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if \
+ -t "$2" "$3" "$4" || return 1
else
python3 $ovs_base/ovs-dpctl.py add-if \
- -u "$2" "$3" >$ovs_dir/$3.out 2>$ovs_dir/$3.err &
+ -u -t "$2" "$3" "$4" >$ovs_dir/$4.out 2>$ovs_dir/$4.err &
pid=$!
on_exit "ovs_sbx $1 kill -TERM $pid 2>/dev/null"
fi
@@ -166,13 +167,15 @@ ovs_add_netns_and_veths () {
fi
if [ "$7" != "-u" ]; then
- ovs_add_if "$1" "$2" "$4" || return 1
+ ovs_add_if "$1" "netdev" "$2" "$4" || return 1
else
- ovs_add_if "$1" "$2" "$4" -u || return 1
+ ovs_add_if "$1" "netdev" "$2" "$4" -u || return 1
fi
- [ $TRACING -eq 1 ] && ovs_netns_spawn_daemon "$1" "$ns" \
- tcpdump -i any -s 65535
+ if [ $TRACING -eq 1 ]; then
+ ovs_netns_spawn_daemon "$1" "$3" tcpdump -l -i any -s 6553
+ ovs_wait grep -q "listening on any" ${ovs_dir}/stderr
+ fi
return 0
}
@@ -328,6 +331,11 @@ test_psample() {
# - drop packets and verify the right drop reason is reported
test_drop_reason() {
which perf >/dev/null 2>&1 || return $ksft_skip
+ which pahole >/dev/null 2>&1 || return $ksft_skip
+
+ ovs_drop_subsys=$(pahole -C skb_drop_reason_subsys |
+ awk '/OPENVSWITCH/ { print $3; }' |
+ tr -d ,)
sbx_add "test_drop_reason" || return $?
@@ -371,7 +379,7 @@ test_drop_reason() {
"in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop'
ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20
- ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION
+ ovs_drop_reason_count 0x${ovs_drop_subsys}0001 # OVS_DROP_FLOW_ACTION
if [[ "$?" -ne "2" ]]; then
info "Did not detect expected drops: $?"
return 1
@@ -388,7 +396,7 @@ test_drop_reason() {
ovs_drop_record_and_run \
"test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000
- ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR
+ ovs_drop_reason_count 0x${ovs_drop_subsys}0004 # OVS_DROP_EXPLICIT_ACTION_ERROR
if [[ "$?" -ne "1" ]]; then
info "Did not detect expected explicit error drops: $?"
return 1
@@ -396,7 +404,7 @@ test_drop_reason() {
ovs_drop_record_and_run \
"test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000
- ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION
+ ovs_drop_reason_count 0x${ovs_drop_subsys}0003 # OVS_DROP_EXPLICIT_ACTION
if [[ "$?" -ne "1" ]]; then
info "Did not detect expected explicit drops: $?"
return 1
@@ -749,6 +757,79 @@ test_upcall_interfaces() {
return 0
}
+ovs_add_kernel_tunnel() {
+ local sbxname=$1; shift
+ local ns=$1; shift
+ local tnl_type=$1; shift
+ local name=$1; shift
+ local addr=$1; shift
+
+ info "setting up kernel ${tnl_type} tunnel ${name}"
+ ovs_sbx "${sbxname}" ip -netns ${ns} link add dev ${name} type ${tnl_type} $* || return 1
+ on_exit "ovs_sbx ${sbxname} ip -netns ${ns} link del ${name} >/dev/null 2>&1"
+ ovs_sbx "${sbxname}" ip -netns ${ns} addr add dev ${name} ${addr} || return 1
+ ovs_sbx "${sbxname}" ip -netns ${ns} link set dev ${name} mtu 1450 up || return 1
+}
+
+test_tunnel_metadata() {
+ which arping >/dev/null 2>&1 || return $ksft_skip
+
+ sbxname="test_tunnel_metadata"
+ sbx_add "${sbxname}" || return 1
+
+ info "setting up new DP"
+ ovs_add_dp "${sbxname}" tdp0 -V 2:1 || return 1
+
+ ovs_add_netns_and_veths "${sbxname}" tdp0 tns left0 l0 \
+ 172.31.110.1/24 || return 1
+
+ info "removing veth interface from openvswitch and setting IP"
+ ovs_del_if "${sbxname}" tdp0 left0 || return 1
+ ovs_sbx "${sbxname}" ip addr add 172.31.110.2/24 dev left0 || return 1
+ ovs_sbx "${sbxname}" ip link set left0 up || return 1
+
+ info "setting up tunnel port in openvswitch"
+ ovs_add_if "${sbxname}" "vxlan" tdp0 ovs-vxlan0 -u || return 1
+ on_exit "ovs_sbx ${sbxname} ip link del ovs-vxlan0"
+ ovs_wait ip link show ovs-vxlan0 &>/dev/null || return 1
+ ovs_sbx "${sbxname}" ip link set ovs-vxlan0 up || return 1
+
+ configs=$(echo '
+ 1 172.31.221.1/24 1155332 32 set udpcsum flags\(df\|csum\)
+ 2 172.31.222.1/24 1234567 45 set noudpcsum flags\(df\)
+ 3 172.31.223.1/24 1020304 23 unset udpcsum flags\(csum\)
+ 4 172.31.224.1/24 1357986 15 unset noudpcsum' | sed '/^$/d')
+
+ while read -r i addr id ttl df csum flags; do
+ ovs_add_kernel_tunnel "${sbxname}" tns vxlan vxlan${i} ${addr} \
+ remote 172.31.110.2 id ${id} dstport 4789 \
+ ttl ${ttl} df ${df} ${csum} || return 1
+ done <<< "${configs}"
+
+ ovs_wait grep -q 'listening on upcall packet handler' \
+ ${ovs_dir}/ovs-vxlan0.out || return 1
+
+ info "sending arping"
+ for i in 1 2 3 4; do
+ ovs_sbx "${sbxname}" ip netns exec tns \
+ arping -I vxlan${i} 172.31.22${i}.2 -c 1 \
+ >${ovs_dir}/arping.stdout 2>${ovs_dir}/arping.stderr
+ done
+
+ info "checking that received decapsulated packets carry correct metadata"
+ while read -r i addr id ttl df csum flags; do
+ arp_hdr="arp\\(sip=172.31.22${i}.1,tip=172.31.22${i}.2,op=1,sha="
+ addrs="src=172.31.110.1,dst=172.31.110.2"
+ ports="tp_src=[0-9]*,tp_dst=4789"
+ tnl_md="tunnel\\(tun_id=${id},${addrs},ttl=${ttl},${ports},${flags}\\)"
+
+ ovs_sbx "${sbxname}" grep -qE "MISS upcall.*${tnl_md}.*${arp_hdr}" \
+ ${ovs_dir}/ovs-vxlan0.out || return 1
+ done <<< "${configs}"
+
+ return 0
+}
+
run_test() {
(
tname="$1"
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 8a0396bfaf99..b521e0dea506 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -1877,7 +1877,7 @@ class OvsPacket(GenericNetlinkSocket):
elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_EXECUTE:
up.execute(msg)
else:
- print("Unkonwn cmd: %d" % msg["cmd"])
+ print("Unknown cmd: %d" % msg["cmd"])
except NetlinkError as ne:
raise ne
diff --git a/tools/testing/selftests/net/ovpn/.gitignore b/tools/testing/selftests/net/ovpn/.gitignore
new file mode 100644
index 000000000000..ee44c081ca7c
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0+
+ovpn-cli
diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile
new file mode 100644
index 000000000000..dbe0388c8512
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/Makefile
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+CFLAGS = -pedantic -Wextra -Wall -Wl,--no-as-needed -g -O0 -ggdb $(KHDR_INCLUDES)
+VAR_CFLAGS = $(shell pkg-config --cflags libnl-3.0 libnl-genl-3.0 2>/dev/null)
+ifeq ($(VAR_CFLAGS),)
+VAR_CFLAGS = -I/usr/include/libnl3
+endif
+CFLAGS += $(VAR_CFLAGS)
+
+
+LDLIBS = -lmbedtls -lmbedcrypto
+VAR_LDLIBS = $(shell pkg-config --libs libnl-3.0 libnl-genl-3.0 2>/dev/null)
+ifeq ($(VAR_LDLIBS),)
+VAR_LDLIBS = -lnl-genl-3 -lnl-3
+endif
+LDLIBS += $(VAR_LDLIBS)
+
+
+TEST_FILES = common.sh
+
+TEST_PROGS := \
+ test-chachapoly.sh \
+ test-close-socket-tcp.sh \
+ test-close-socket.sh \
+ test-float.sh \
+ test-large-mtu.sh \
+ test-tcp.sh \
+ test.sh \
+# end of TEST_PROGS
+
+TEST_GEN_FILES := ovpn-cli
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/ovpn/common.sh b/tools/testing/selftests/net/ovpn/common.sh
new file mode 100644
index 000000000000..88869c675d03
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/common.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+UDP_PEERS_FILE=${UDP_PEERS_FILE:-udp_peers.txt}
+TCP_PEERS_FILE=${TCP_PEERS_FILE:-tcp_peers.txt}
+OVPN_CLI=${OVPN_CLI:-./ovpn-cli}
+ALG=${ALG:-aes}
+PROTO=${PROTO:-UDP}
+FLOAT=${FLOAT:-0}
+
+LAN_IP="11.11.11.11"
+
+create_ns() {
+ ip netns add peer${1}
+}
+
+setup_ns() {
+ MODE="P2P"
+
+ if [ ${1} -eq 0 ]; then
+ MODE="MP"
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip link add veth${p} netns peer0 type veth peer name veth${p} netns peer${p}
+
+ ip -n peer0 addr add 10.10.${p}.1/24 dev veth${p}
+ ip -n peer0 addr add fd00:0:0:${p}::1/64 dev veth${p}
+ ip -n peer0 link set veth${p} up
+
+ ip -n peer${p} addr add 10.10.${p}.2/24 dev veth${p}
+ ip -n peer${p} addr add fd00:0:0:${p}::2/64 dev veth${p}
+ ip -n peer${p} link set veth${p} up
+ done
+ fi
+
+ ip netns exec peer${1} ${OVPN_CLI} new_iface tun${1} $MODE
+ ip -n peer${1} addr add ${2} dev tun${1}
+ # add a secondary IP to peer 1, to test a LAN behind a client
+ if [ ${1} -eq 1 -a -n "${LAN_IP}" ]; then
+ ip -n peer${1} addr add ${LAN_IP} dev tun${1}
+ ip -n peer0 route add ${LAN_IP} via $(echo ${2} |sed -e s'!/.*!!') dev tun0
+ fi
+ if [ -n "${3}" ]; then
+ ip -n peer${1} link set mtu ${3} dev tun${1}
+ fi
+ ip -n peer${1} link set tun${1} up
+}
+
+add_peer() {
+ if [ "${PROTO}" == "UDP" ]; then
+ if [ ${1} -eq 0 ]; then
+ ip netns exec peer0 ${OVPN_CLI} new_multi_peer tun0 1 ${UDP_PEERS_FILE}
+
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 ${ALG} 0 \
+ data64.key
+ done
+ else
+ RADDR=$(awk "NR == ${1} {print \$2}" ${UDP_PEERS_FILE})
+ RPORT=$(awk "NR == ${1} {print \$3}" ${UDP_PEERS_FILE})
+ LPORT=$(awk "NR == ${1} {print \$5}" ${UDP_PEERS_FILE})
+ ip netns exec peer${1} ${OVPN_CLI} new_peer tun${1} ${1} ${LPORT} \
+ ${RADDR} ${RPORT}
+ ip netns exec peer${1} ${OVPN_CLI} new_key tun${1} ${1} 1 0 ${ALG} 1 \
+ data64.key
+ fi
+ else
+ if [ ${1} -eq 0 ]; then
+ (ip netns exec peer0 ${OVPN_CLI} listen tun0 1 ${TCP_PEERS_FILE} && {
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 \
+ ${ALG} 0 data64.key
+ done
+ }) &
+ sleep 5
+ else
+ ip netns exec peer${1} ${OVPN_CLI} connect tun${1} ${1} 10.10.${1}.1 1 \
+ data64.key
+ fi
+ fi
+}
+
+cleanup() {
+ # some ovpn-cli processes sleep in background so they need manual poking
+ killall $(basename ${OVPN_CLI}) 2>/dev/null || true
+
+ # netns peer0 is deleted without erasing ifaces first
+ for p in $(seq 1 10); do
+ ip -n peer${p} link set tun${p} down 2>/dev/null || true
+ ip netns exec peer${p} ${OVPN_CLI} del_iface tun${p} 2>/dev/null || true
+ done
+ for p in $(seq 1 10); do
+ ip -n peer0 link del veth${p} 2>/dev/null || true
+ done
+ for p in $(seq 0 10); do
+ ip netns del peer${p} 2>/dev/null || true
+ done
+}
+
+if [ "${PROTO}" == "UDP" ]; then
+ NUM_PEERS=${NUM_PEERS:-$(wc -l ${UDP_PEERS_FILE} | awk '{print $1}')}
+else
+ NUM_PEERS=${NUM_PEERS:-$(wc -l ${TCP_PEERS_FILE} | awk '{print $1}')}
+fi
+
+
diff --git a/tools/testing/selftests/net/ovpn/config b/tools/testing/selftests/net/ovpn/config
new file mode 100644
index 000000000000..42699740936d
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/config
@@ -0,0 +1,10 @@
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_AES=y
+CONFIG_CRYPTO_CHACHA20POLY1305=y
+CONFIG_CRYPTO_GCM=y
+CONFIG_DST_CACHE=y
+CONFIG_INET=y
+CONFIG_NET=y
+CONFIG_NET_UDP_TUNNEL=y
+CONFIG_OVPN=m
+CONFIG_STREAM_PARSER=y
diff --git a/tools/testing/selftests/net/ovpn/data64.key b/tools/testing/selftests/net/ovpn/data64.key
new file mode 100644
index 000000000000..a99e88c4e290
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/data64.key
@@ -0,0 +1,5 @@
+jRqMACN7d7/aFQNT8S7jkrBD8uwrgHbG5OQZP2eu4R1Y7tfpS2bf5RHv06Vi163CGoaIiTX99R3B
+ia9ycAH8Wz1+9PWv51dnBLur9jbShlgZ2QHLtUc4a/gfT7zZwULXuuxdLnvR21DDeMBaTbkgbai9
+uvAa7ne1liIgGFzbv+Bas4HDVrygxIxuAnP5Qgc3648IJkZ0QEXPF+O9f0n5+QIvGCxkAUVx+5K6
+KIs+SoeWXnAopELmoGSjUpFtJbagXK82HfdqpuUxT2Tnuef0/14SzVE/vNleBNu2ZbyrSAaah8tE
+BofkPJUBFY+YQcfZNM5Dgrw3i+Bpmpq/gpdg5w==
diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c
new file mode 100644
index 000000000000..0f3babf19fd0
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c
@@ -0,0 +1,2387 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel accelerator
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <time.h>
+
+#include <linux/ovpn.h>
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+#include <netlink/socket.h>
+#include <netlink/netlink.h>
+#include <netlink/genl/genl.h>
+#include <netlink/genl/family.h>
+#include <netlink/genl/ctrl.h>
+
+#include <mbedtls/base64.h>
+#include <mbedtls/error.h>
+
+#include <sys/socket.h>
+
+#include "kselftest.h"
+
+/* defines to make checkpatch happy */
+#define strscpy strncpy
+
+/* libnl < 3.5.0 does not set the NLA_F_NESTED on its own, therefore we
+ * have to explicitly do it to prevent the kernel from failing upon
+ * parsing of the message
+ */
+#define nla_nest_start(_msg, _type) \
+ nla_nest_start(_msg, (_type) | NLA_F_NESTED)
+
+/* libnl < 3.11.0 does not implement nla_get_uint() */
+uint64_t ovpn_nla_get_uint(struct nlattr *attr)
+{
+ if (nla_len(attr) == sizeof(uint32_t))
+ return nla_get_u32(attr);
+ else
+ return nla_get_u64(attr);
+}
+
+typedef int (*ovpn_nl_cb)(struct nl_msg *msg, void *arg);
+
+enum ovpn_key_direction {
+ KEY_DIR_IN = 0,
+ KEY_DIR_OUT,
+};
+
+#define KEY_LEN (256 / 8)
+#define NONCE_LEN 8
+
+#define PEER_ID_UNDEF 0x00FFFFFF
+#define MAX_PEERS 10
+
+struct nl_ctx {
+ struct nl_sock *nl_sock;
+ struct nl_msg *nl_msg;
+ struct nl_cb *nl_cb;
+
+ int ovpn_dco_id;
+};
+
+enum ovpn_cmd {
+ CMD_INVALID,
+ CMD_NEW_IFACE,
+ CMD_DEL_IFACE,
+ CMD_LISTEN,
+ CMD_CONNECT,
+ CMD_NEW_PEER,
+ CMD_NEW_MULTI_PEER,
+ CMD_SET_PEER,
+ CMD_DEL_PEER,
+ CMD_GET_PEER,
+ CMD_NEW_KEY,
+ CMD_DEL_KEY,
+ CMD_GET_KEY,
+ CMD_SWAP_KEYS,
+ CMD_LISTEN_MCAST,
+};
+
+struct ovpn_ctx {
+ enum ovpn_cmd cmd;
+
+ __u8 key_enc[KEY_LEN];
+ __u8 key_dec[KEY_LEN];
+ __u8 nonce[NONCE_LEN];
+
+ enum ovpn_cipher_alg cipher;
+
+ sa_family_t sa_family;
+
+ unsigned long peer_id;
+ unsigned long lport;
+
+ union {
+ struct sockaddr_in in4;
+ struct sockaddr_in6 in6;
+ } remote;
+
+ union {
+ struct sockaddr_in in4;
+ struct sockaddr_in6 in6;
+ } peer_ip;
+
+ bool peer_ip_set;
+
+ unsigned int ifindex;
+ char ifname[IFNAMSIZ];
+ enum ovpn_mode mode;
+ bool mode_set;
+
+ int socket;
+ int cli_sockets[MAX_PEERS];
+
+ __u32 keepalive_interval;
+ __u32 keepalive_timeout;
+
+ enum ovpn_key_direction key_dir;
+ enum ovpn_key_slot key_slot;
+ int key_id;
+
+ const char *peers_file;
+};
+
+static int ovpn_nl_recvmsgs(struct nl_ctx *ctx)
+{
+ int ret;
+
+ ret = nl_recvmsgs(ctx->nl_sock, ctx->nl_cb);
+
+ switch (ret) {
+ case -NLE_INTR:
+ fprintf(stderr,
+ "netlink received interrupt due to signal - ignoring\n");
+ break;
+ case -NLE_NOMEM:
+ fprintf(stderr, "netlink out of memory error\n");
+ break;
+ case -NLE_AGAIN:
+ fprintf(stderr,
+ "netlink reports blocking read - aborting wait\n");
+ break;
+ default:
+ if (ret)
+ fprintf(stderr, "netlink reports error (%d): %s\n",
+ ret, nl_geterror(-ret));
+ break;
+ }
+
+ return ret;
+}
+
+static struct nl_ctx *nl_ctx_alloc_flags(struct ovpn_ctx *ovpn, int cmd,
+ int flags)
+{
+ struct nl_ctx *ctx;
+ int err, ret;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx)
+ return NULL;
+
+ ctx->nl_sock = nl_socket_alloc();
+ if (!ctx->nl_sock) {
+ fprintf(stderr, "cannot allocate netlink socket\n");
+ goto err_free;
+ }
+
+ nl_socket_set_buffer_size(ctx->nl_sock, 8192, 8192);
+
+ ret = genl_connect(ctx->nl_sock);
+ if (ret) {
+ fprintf(stderr, "cannot connect to generic netlink: %s\n",
+ nl_geterror(ret));
+ goto err_sock;
+ }
+
+ /* enable Extended ACK for detailed error reporting */
+ err = 1;
+ setsockopt(nl_socket_get_fd(ctx->nl_sock), SOL_NETLINK, NETLINK_EXT_ACK,
+ &err, sizeof(err));
+
+ ctx->ovpn_dco_id = genl_ctrl_resolve(ctx->nl_sock, OVPN_FAMILY_NAME);
+ if (ctx->ovpn_dco_id < 0) {
+ fprintf(stderr, "cannot find ovpn_dco netlink component: %d\n",
+ ctx->ovpn_dco_id);
+ goto err_free;
+ }
+
+ ctx->nl_msg = nlmsg_alloc();
+ if (!ctx->nl_msg) {
+ fprintf(stderr, "cannot allocate netlink message\n");
+ goto err_sock;
+ }
+
+ ctx->nl_cb = nl_cb_alloc(NL_CB_DEFAULT);
+ if (!ctx->nl_cb) {
+ fprintf(stderr, "failed to allocate netlink callback\n");
+ goto err_msg;
+ }
+
+ nl_socket_set_cb(ctx->nl_sock, ctx->nl_cb);
+
+ genlmsg_put(ctx->nl_msg, 0, 0, ctx->ovpn_dco_id, 0, flags, cmd, 0);
+
+ if (ovpn->ifindex > 0)
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_IFINDEX, ovpn->ifindex);
+
+ return ctx;
+nla_put_failure:
+err_msg:
+ nlmsg_free(ctx->nl_msg);
+err_sock:
+ nl_socket_free(ctx->nl_sock);
+err_free:
+ free(ctx);
+ return NULL;
+}
+
+static struct nl_ctx *nl_ctx_alloc(struct ovpn_ctx *ovpn, int cmd)
+{
+ return nl_ctx_alloc_flags(ovpn, cmd, 0);
+}
+
+static void nl_ctx_free(struct nl_ctx *ctx)
+{
+ if (!ctx)
+ return;
+
+ nl_socket_free(ctx->nl_sock);
+ nlmsg_free(ctx->nl_msg);
+ nl_cb_put(ctx->nl_cb);
+ free(ctx);
+}
+
+static int ovpn_nl_cb_error(struct sockaddr_nl (*nla)__always_unused,
+ struct nlmsgerr *err, void *arg)
+{
+ struct nlmsghdr *nlh = (struct nlmsghdr *)err - 1;
+ struct nlattr *tb_msg[NLMSGERR_ATTR_MAX + 1];
+ int len = nlh->nlmsg_len;
+ struct nlattr *attrs;
+ int *ret = arg;
+ int ack_len = sizeof(*nlh) + sizeof(int) + sizeof(*nlh);
+
+ *ret = err->error;
+
+ if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return NL_STOP;
+
+ if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+ ack_len += err->msg.nlmsg_len - sizeof(*nlh);
+
+ if (len <= ack_len)
+ return NL_STOP;
+
+ attrs = (void *)((uint8_t *)nlh + ack_len);
+ len -= ack_len;
+
+ nla_parse(tb_msg, NLMSGERR_ATTR_MAX, attrs, len, NULL);
+ if (tb_msg[NLMSGERR_ATTR_MSG]) {
+ len = strnlen((char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]),
+ nla_len(tb_msg[NLMSGERR_ATTR_MSG]));
+ fprintf(stderr, "kernel error: %*s\n", len,
+ (char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]));
+ }
+
+ if (tb_msg[NLMSGERR_ATTR_MISS_NEST]) {
+ fprintf(stderr, "missing required nesting type %u\n",
+ nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_NEST]));
+ }
+
+ if (tb_msg[NLMSGERR_ATTR_MISS_TYPE]) {
+ fprintf(stderr, "missing required attribute type %u\n",
+ nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_TYPE]));
+ }
+
+ return NL_STOP;
+}
+
+static int ovpn_nl_cb_finish(struct nl_msg (*msg)__always_unused,
+ void *arg)
+{
+ int *status = arg;
+
+ *status = 0;
+ return NL_SKIP;
+}
+
+static int ovpn_nl_cb_ack(struct nl_msg (*msg)__always_unused,
+ void *arg)
+{
+ int *status = arg;
+
+ *status = 0;
+ return NL_STOP;
+}
+
+static int ovpn_nl_msg_send(struct nl_ctx *ctx, ovpn_nl_cb cb)
+{
+ int status = 1;
+
+ nl_cb_err(ctx->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &status);
+ nl_cb_set(ctx->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish,
+ &status);
+ nl_cb_set(ctx->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_ack, &status);
+
+ if (cb)
+ nl_cb_set(ctx->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, ctx);
+
+ nl_send_auto_complete(ctx->nl_sock, ctx->nl_msg);
+
+ while (status == 1)
+ ovpn_nl_recvmsgs(ctx);
+
+ if (status < 0)
+ fprintf(stderr, "failed to send netlink message: %s (%d)\n",
+ strerror(-status), status);
+
+ return status;
+}
+
+static int ovpn_parse_key(const char *file, struct ovpn_ctx *ctx)
+{
+ int idx_enc, idx_dec, ret = -1;
+ unsigned char *ckey = NULL;
+ __u8 *bkey = NULL;
+ size_t olen = 0;
+ long ckey_len;
+ FILE *fp;
+
+ fp = fopen(file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open: %s\n", file);
+ return -1;
+ }
+
+ /* get file size */
+ fseek(fp, 0L, SEEK_END);
+ ckey_len = ftell(fp);
+ rewind(fp);
+
+ /* if the file is longer, let's just read a portion */
+ if (ckey_len > 256)
+ ckey_len = 256;
+
+ ckey = malloc(ckey_len);
+ if (!ckey)
+ goto err;
+
+ ret = fread(ckey, 1, ckey_len, fp);
+ if (ret != ckey_len) {
+ fprintf(stderr,
+ "couldn't read enough data from key file: %dbytes read\n",
+ ret);
+ goto err;
+ }
+
+ olen = 0;
+ ret = mbedtls_base64_decode(NULL, 0, &olen, ckey, ckey_len);
+ if (ret != MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL) {
+ char buf[256];
+
+ mbedtls_strerror(ret, buf, sizeof(buf));
+ fprintf(stderr, "unexpected base64 error1: %s (%d)\n", buf,
+ ret);
+
+ goto err;
+ }
+
+ bkey = malloc(olen);
+ if (!bkey) {
+ fprintf(stderr, "cannot allocate binary key buffer\n");
+ goto err;
+ }
+
+ ret = mbedtls_base64_decode(bkey, olen, &olen, ckey, ckey_len);
+ if (ret) {
+ char buf[256];
+
+ mbedtls_strerror(ret, buf, sizeof(buf));
+ fprintf(stderr, "unexpected base64 error2: %s (%d)\n", buf,
+ ret);
+
+ goto err;
+ }
+
+ if (olen < 2 * KEY_LEN + NONCE_LEN) {
+ fprintf(stderr,
+ "not enough data in key file, found %zdB but needs %dB\n",
+ olen, 2 * KEY_LEN + NONCE_LEN);
+ goto err;
+ }
+
+ switch (ctx->key_dir) {
+ case KEY_DIR_IN:
+ idx_enc = 0;
+ idx_dec = 1;
+ break;
+ case KEY_DIR_OUT:
+ idx_enc = 1;
+ idx_dec = 0;
+ break;
+ default:
+ goto err;
+ }
+
+ memcpy(ctx->key_enc, bkey + KEY_LEN * idx_enc, KEY_LEN);
+ memcpy(ctx->key_dec, bkey + KEY_LEN * idx_dec, KEY_LEN);
+ memcpy(ctx->nonce, bkey + 2 * KEY_LEN, NONCE_LEN);
+
+ ret = 0;
+
+err:
+ fclose(fp);
+ free(bkey);
+ free(ckey);
+
+ return ret;
+}
+
+static int ovpn_parse_cipher(const char *cipher, struct ovpn_ctx *ctx)
+{
+ if (strcmp(cipher, "aes") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_AES_GCM;
+ else if (strcmp(cipher, "chachapoly") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_CHACHA20_POLY1305;
+ else if (strcmp(cipher, "none") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_NONE;
+ else
+ return -ENOTSUP;
+
+ return 0;
+}
+
+static int ovpn_parse_key_direction(const char *dir, struct ovpn_ctx *ctx)
+{
+ int in_dir;
+
+ in_dir = strtoll(dir, NULL, 10);
+ switch (in_dir) {
+ case KEY_DIR_IN:
+ case KEY_DIR_OUT:
+ ctx->key_dir = in_dir;
+ break;
+ default:
+ fprintf(stderr,
+ "invalid key direction provided. Can be 0 or 1 only\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int ovpn_socket(struct ovpn_ctx *ctx, sa_family_t family, int proto)
+{
+ struct sockaddr_storage local_sock = { 0 };
+ struct sockaddr_in6 *in6;
+ struct sockaddr_in *in;
+ int ret, s, sock_type;
+ size_t sock_len;
+
+ if (proto == IPPROTO_UDP)
+ sock_type = SOCK_DGRAM;
+ else if (proto == IPPROTO_TCP)
+ sock_type = SOCK_STREAM;
+ else
+ return -EINVAL;
+
+ s = socket(family, sock_type, 0);
+ if (s < 0) {
+ perror("cannot create socket");
+ return -1;
+ }
+
+ switch (family) {
+ case AF_INET:
+ in = (struct sockaddr_in *)&local_sock;
+ in->sin_family = family;
+ in->sin_port = htons(ctx->lport);
+ in->sin_addr.s_addr = htonl(INADDR_ANY);
+ sock_len = sizeof(*in);
+ break;
+ case AF_INET6:
+ in6 = (struct sockaddr_in6 *)&local_sock;
+ in6->sin6_family = family;
+ in6->sin6_port = htons(ctx->lport);
+ in6->sin6_addr = in6addr_any;
+ sock_len = sizeof(*in6);
+ break;
+ default:
+ return -1;
+ }
+
+ int opt = 1;
+
+ ret = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+
+ if (ret < 0) {
+ perror("setsockopt for SO_REUSEADDR");
+ return ret;
+ }
+
+ ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
+ if (ret < 0) {
+ perror("setsockopt for SO_REUSEPORT");
+ return ret;
+ }
+
+ if (family == AF_INET6) {
+ opt = 0;
+ if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &opt,
+ sizeof(opt))) {
+ perror("failed to set IPV6_V6ONLY");
+ return -1;
+ }
+ }
+
+ ret = bind(s, (struct sockaddr *)&local_sock, sock_len);
+ if (ret < 0) {
+ perror("cannot bind socket");
+ goto err_socket;
+ }
+
+ ctx->socket = s;
+ ctx->sa_family = family;
+ return 0;
+
+err_socket:
+ close(s);
+ return -1;
+}
+
+static int ovpn_udp_socket(struct ovpn_ctx *ctx, sa_family_t family)
+{
+ return ovpn_socket(ctx, family, IPPROTO_UDP);
+}
+
+static int ovpn_listen(struct ovpn_ctx *ctx, sa_family_t family)
+{
+ int ret;
+
+ ret = ovpn_socket(ctx, family, IPPROTO_TCP);
+ if (ret < 0)
+ return ret;
+
+ ret = listen(ctx->socket, 10);
+ if (ret < 0) {
+ perror("listen");
+ close(ctx->socket);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int ovpn_accept(struct ovpn_ctx *ctx)
+{
+ socklen_t socklen;
+ int ret;
+
+ socklen = sizeof(ctx->remote);
+ ret = accept(ctx->socket, (struct sockaddr *)&ctx->remote, &socklen);
+ if (ret < 0) {
+ perror("accept");
+ goto err;
+ }
+
+ fprintf(stderr, "Connection received!\n");
+
+ switch (socklen) {
+ case sizeof(struct sockaddr_in):
+ case sizeof(struct sockaddr_in6):
+ break;
+ default:
+ fprintf(stderr, "error: expecting IPv4 or IPv6 connection\n");
+ close(ret);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ return ret;
+err:
+ close(ctx->socket);
+ return ret;
+}
+
+static int ovpn_connect(struct ovpn_ctx *ovpn)
+{
+ socklen_t socklen;
+ int s, ret;
+
+ s = socket(ovpn->remote.in4.sin_family, SOCK_STREAM, 0);
+ if (s < 0) {
+ perror("cannot create socket");
+ return -1;
+ }
+
+ switch (ovpn->remote.in4.sin_family) {
+ case AF_INET:
+ socklen = sizeof(struct sockaddr_in);
+ break;
+ case AF_INET6:
+ socklen = sizeof(struct sockaddr_in6);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ ret = connect(s, (struct sockaddr *)&ovpn->remote, socklen);
+ if (ret < 0) {
+ perror("connect");
+ goto err;
+ }
+
+ fprintf(stderr, "connected\n");
+
+ ovpn->socket = s;
+
+ return 0;
+err:
+ close(s);
+ return ret;
+}
+
+static int ovpn_new_peer(struct ovpn_ctx *ovpn, bool is_tcp)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_NEW);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_SOCKET, ovpn->socket);
+
+ if (!is_tcp) {
+ switch (ovpn->remote.in4.sin_family) {
+ case AF_INET:
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV4,
+ ovpn->remote.in4.sin_addr.s_addr);
+ NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT,
+ ovpn->remote.in4.sin_port);
+ break;
+ case AF_INET6:
+ NLA_PUT(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV6,
+ sizeof(ovpn->remote.in6.sin6_addr),
+ &ovpn->remote.in6.sin6_addr);
+ NLA_PUT_U32(ctx->nl_msg,
+ OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID,
+ ovpn->remote.in6.sin6_scope_id);
+ NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT,
+ ovpn->remote.in6.sin6_port);
+ break;
+ default:
+ fprintf(stderr,
+ "Invalid family for remote socket address\n");
+ goto nla_put_failure;
+ }
+ }
+
+ if (ovpn->peer_ip_set) {
+ switch (ovpn->peer_ip.in4.sin_family) {
+ case AF_INET:
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_VPN_IPV4,
+ ovpn->peer_ip.in4.sin_addr.s_addr);
+ break;
+ case AF_INET6:
+ NLA_PUT(ctx->nl_msg, OVPN_A_PEER_VPN_IPV6,
+ sizeof(struct in6_addr),
+ &ovpn->peer_ip.in6.sin6_addr);
+ break;
+ default:
+ fprintf(stderr, "Invalid family for peer address\n");
+ goto nla_put_failure;
+ }
+ }
+
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_set_peer(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_SET);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_INTERVAL,
+ ovpn->keepalive_interval);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_TIMEOUT,
+ ovpn->keepalive_timeout);
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_del_peer(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_DEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_handle_peer(struct nl_msg *msg, void (*arg)__always_unused)
+{
+ struct nlattr *pattrs[OVPN_A_PEER_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+ __u16 rport = 0, lport = 0;
+
+ nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!attrs[OVPN_A_PEER]) {
+ fprintf(stderr, "no packet content in netlink message\n");
+ return NL_SKIP;
+ }
+
+ nla_parse(pattrs, OVPN_A_PEER_MAX, nla_data(attrs[OVPN_A_PEER]),
+ nla_len(attrs[OVPN_A_PEER]), NULL);
+
+ if (pattrs[OVPN_A_PEER_ID])
+ fprintf(stderr, "* Peer %u\n",
+ nla_get_u32(pattrs[OVPN_A_PEER_ID]));
+
+ if (pattrs[OVPN_A_PEER_SOCKET_NETNSID])
+ fprintf(stderr, "\tsocket NetNS ID: %d\n",
+ nla_get_s32(pattrs[OVPN_A_PEER_SOCKET_NETNSID]));
+
+ if (pattrs[OVPN_A_PEER_VPN_IPV4]) {
+ char buf[INET_ADDRSTRLEN];
+
+ inet_ntop(AF_INET, nla_data(pattrs[OVPN_A_PEER_VPN_IPV4]),
+ buf, sizeof(buf));
+ fprintf(stderr, "\tVPN IPv4: %s\n", buf);
+ }
+
+ if (pattrs[OVPN_A_PEER_VPN_IPV6]) {
+ char buf[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6, nla_data(pattrs[OVPN_A_PEER_VPN_IPV6]),
+ buf, sizeof(buf));
+ fprintf(stderr, "\tVPN IPv6: %s\n", buf);
+ }
+
+ if (pattrs[OVPN_A_PEER_LOCAL_PORT])
+ lport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_LOCAL_PORT]));
+
+ if (pattrs[OVPN_A_PEER_REMOTE_PORT])
+ rport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_REMOTE_PORT]));
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV6]) {
+ void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV6];
+ char buf[INET6_ADDRSTRLEN];
+ int scope_id = -1;
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]) {
+ void *p = pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID];
+
+ scope_id = nla_get_u32(p);
+ }
+
+ inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tRemote: %s:%hu (scope-id: %u)\n", buf, rport,
+ scope_id);
+
+ if (pattrs[OVPN_A_PEER_LOCAL_IPV6]) {
+ void *ip = pattrs[OVPN_A_PEER_LOCAL_IPV6];
+
+ inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport);
+ }
+ }
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV4]) {
+ void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV4];
+ char buf[INET_ADDRSTRLEN];
+
+ inet_ntop(AF_INET, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tRemote: %s:%hu\n", buf, rport);
+
+ if (pattrs[OVPN_A_PEER_LOCAL_IPV4]) {
+ void *p = pattrs[OVPN_A_PEER_LOCAL_IPV4];
+
+ inet_ntop(AF_INET, nla_data(p), buf, sizeof(buf));
+ fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport);
+ }
+ }
+
+ if (pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]) {
+ void *p = pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL];
+
+ fprintf(stderr, "\tKeepalive interval: %u sec\n",
+ nla_get_u32(p));
+ }
+
+ if (pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT])
+ fprintf(stderr, "\tKeepalive timeout: %u sec\n",
+ nla_get_u32(pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]));
+
+ if (pattrs[OVPN_A_PEER_VPN_RX_BYTES])
+ fprintf(stderr, "\tVPN RX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_VPN_TX_BYTES])
+ fprintf(stderr, "\tVPN TX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_VPN_RX_PACKETS])
+ fprintf(stderr, "\tVPN RX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_VPN_TX_PACKETS])
+ fprintf(stderr, "\tVPN TX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_LINK_RX_BYTES])
+ fprintf(stderr, "\tLINK RX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_LINK_TX_BYTES])
+ fprintf(stderr, "\tLINK TX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_LINK_RX_PACKETS])
+ fprintf(stderr, "\tLINK RX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_LINK_TX_PACKETS])
+ fprintf(stderr, "\tLINK TX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_PACKETS]));
+
+ return NL_SKIP;
+}
+
+static int ovpn_get_peer(struct ovpn_ctx *ovpn)
+{
+ int flags = 0, ret = -1;
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+
+ if (ovpn->peer_id == PEER_ID_UNDEF)
+ flags = NLM_F_DUMP;
+
+ ctx = nl_ctx_alloc_flags(ovpn, OVPN_CMD_PEER_GET, flags);
+ if (!ctx)
+ return -ENOMEM;
+
+ if (ovpn->peer_id != PEER_ID_UNDEF) {
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, attr);
+ }
+
+ ret = ovpn_nl_msg_send(ctx, ovpn_handle_peer);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_new_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf, *key_dir;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_NEW);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_KEY_ID, ovpn->key_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_CIPHER_ALG, ovpn->cipher);
+
+ key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_ENCRYPT_DIR);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_enc);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce);
+ nla_nest_end(ctx->nl_msg, key_dir);
+
+ key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_DECRYPT_DIR);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_dec);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce);
+ nla_nest_end(ctx->nl_msg, key_dir);
+
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_del_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_DEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_handle_key(struct nl_msg *msg, void (*arg)__always_unused)
+{
+ struct nlattr *kattrs[OVPN_A_KEYCONF_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+
+ nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!attrs[OVPN_A_KEYCONF]) {
+ fprintf(stderr, "no packet content in netlink message\n");
+ return NL_SKIP;
+ }
+
+ nla_parse(kattrs, OVPN_A_KEYCONF_MAX, nla_data(attrs[OVPN_A_KEYCONF]),
+ nla_len(attrs[OVPN_A_KEYCONF]), NULL);
+
+ if (kattrs[OVPN_A_KEYCONF_PEER_ID])
+ fprintf(stderr, "* Peer %u\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_PEER_ID]));
+ if (kattrs[OVPN_A_KEYCONF_SLOT]) {
+ fprintf(stderr, "\t- Slot: ");
+ switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])) {
+ case OVPN_KEY_SLOT_PRIMARY:
+ fprintf(stderr, "primary\n");
+ break;
+ case OVPN_KEY_SLOT_SECONDARY:
+ fprintf(stderr, "secondary\n");
+ break;
+ default:
+ fprintf(stderr, "invalid (%u)\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT]));
+ break;
+ }
+ }
+ if (kattrs[OVPN_A_KEYCONF_KEY_ID])
+ fprintf(stderr, "\t- Key ID: %u\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_KEY_ID]));
+ if (kattrs[OVPN_A_KEYCONF_CIPHER_ALG]) {
+ fprintf(stderr, "\t- Cipher: ");
+ switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])) {
+ case OVPN_CIPHER_ALG_NONE:
+ fprintf(stderr, "none\n");
+ break;
+ case OVPN_CIPHER_ALG_AES_GCM:
+ fprintf(stderr, "aes-gcm\n");
+ break;
+ case OVPN_CIPHER_ALG_CHACHA20_POLY1305:
+ fprintf(stderr, "chacha20poly1305\n");
+ break;
+ default:
+ fprintf(stderr, "invalid (%u)\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG]));
+ break;
+ }
+ }
+
+ return NL_SKIP;
+}
+
+static int ovpn_get_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_GET);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, ovpn_handle_key);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_swap_keys(struct ovpn_ctx *ovpn)
+{
+ struct nl_ctx *ctx;
+ struct nlattr *kc;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_SWAP);
+ if (!ctx)
+ return -ENOMEM;
+
+ kc = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, kc);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+/* Helper function used to easily add attributes to a rtnl message */
+static int ovpn_addattr(struct nlmsghdr *n, int maxlen, int type,
+ const void *data, int alen)
+{
+ int len = RTA_LENGTH(alen);
+ struct rtattr *rta;
+
+ if ((int)(NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len)) > maxlen) {
+ fprintf(stderr, "%s: rtnl: message exceeded bound of %d\n",
+ __func__, maxlen);
+ return -EMSGSIZE;
+ }
+
+ rta = nlmsg_tail(n);
+ rta->rta_type = type;
+ rta->rta_len = len;
+
+ if (!data)
+ memset(RTA_DATA(rta), 0, alen);
+ else
+ memcpy(RTA_DATA(rta), data, alen);
+
+ n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
+
+ return 0;
+}
+
+static struct rtattr *ovpn_nest_start(struct nlmsghdr *msg, size_t max_size,
+ int attr)
+{
+ struct rtattr *nest = nlmsg_tail(msg);
+
+ if (ovpn_addattr(msg, max_size, attr, NULL, 0) < 0)
+ return NULL;
+
+ return nest;
+}
+
+static void ovpn_nest_end(struct nlmsghdr *msg, struct rtattr *nest)
+{
+ nest->rta_len = (uint8_t *)nlmsg_tail(msg) - (uint8_t *)nest;
+}
+
+#define RT_SNDBUF_SIZE (1024 * 2)
+#define RT_RCVBUF_SIZE (1024 * 4)
+
+/* Open RTNL socket */
+static int ovpn_rt_socket(void)
+{
+ int sndbuf = RT_SNDBUF_SIZE, rcvbuf = RT_RCVBUF_SIZE, fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (fd < 0) {
+ fprintf(stderr, "%s: cannot open netlink socket\n", __func__);
+ return fd;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf,
+ sizeof(sndbuf)) < 0) {
+ fprintf(stderr, "%s: SO_SNDBUF\n", __func__);
+ close(fd);
+ return -1;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf,
+ sizeof(rcvbuf)) < 0) {
+ fprintf(stderr, "%s: SO_RCVBUF\n", __func__);
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+/* Bind socket to Netlink subsystem */
+static int ovpn_rt_bind(int fd, uint32_t groups)
+{
+ struct sockaddr_nl local = { 0 };
+ socklen_t addr_len;
+
+ local.nl_family = AF_NETLINK;
+ local.nl_groups = groups;
+
+ if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
+ fprintf(stderr, "%s: cannot bind netlink socket: %d\n",
+ __func__, errno);
+ return -errno;
+ }
+
+ addr_len = sizeof(local);
+ if (getsockname(fd, (struct sockaddr *)&local, &addr_len) < 0) {
+ fprintf(stderr, "%s: cannot getsockname: %d\n", __func__,
+ errno);
+ return -errno;
+ }
+
+ if (addr_len != sizeof(local)) {
+ fprintf(stderr, "%s: wrong address length %d\n", __func__,
+ addr_len);
+ return -EINVAL;
+ }
+
+ if (local.nl_family != AF_NETLINK) {
+ fprintf(stderr, "%s: wrong address family %d\n", __func__,
+ local.nl_family);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+typedef int (*ovpn_parse_reply_cb)(struct nlmsghdr *msg, void *arg);
+
+/* Send Netlink message and run callback on reply (if specified) */
+static int ovpn_rt_send(struct nlmsghdr *payload, pid_t peer,
+ unsigned int groups, ovpn_parse_reply_cb cb,
+ void *arg_cb)
+{
+ int len, rem_len, fd, ret, rcv_len;
+ struct sockaddr_nl nladdr = { 0 };
+ struct nlmsgerr *err;
+ struct nlmsghdr *h;
+ char buf[1024 * 16];
+ struct iovec iov = {
+ .iov_base = payload,
+ .iov_len = payload->nlmsg_len,
+ };
+ struct msghdr nlmsg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ nladdr.nl_family = AF_NETLINK;
+ nladdr.nl_pid = peer;
+ nladdr.nl_groups = groups;
+
+ payload->nlmsg_seq = time(NULL);
+
+ /* no need to send reply */
+ if (!cb)
+ payload->nlmsg_flags |= NLM_F_ACK;
+
+ fd = ovpn_rt_socket();
+ if (fd < 0) {
+ fprintf(stderr, "%s: can't open rtnl socket\n", __func__);
+ return -errno;
+ }
+
+ ret = ovpn_rt_bind(fd, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: can't bind rtnl socket\n", __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ ret = sendmsg(fd, &nlmsg, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: rtnl: error on sendmsg()\n", __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ /* prepare buffer to store RTNL replies */
+ memset(buf, 0, sizeof(buf));
+ iov.iov_base = buf;
+
+ while (1) {
+ /*
+ * iov_len is modified by recvmsg(), therefore has to be initialized before
+ * using it again
+ */
+ iov.iov_len = sizeof(buf);
+ rcv_len = recvmsg(fd, &nlmsg, 0);
+ if (rcv_len < 0) {
+ if (errno == EINTR || errno == EAGAIN) {
+ fprintf(stderr, "%s: interrupted call\n",
+ __func__);
+ continue;
+ }
+ fprintf(stderr, "%s: rtnl: error on recvmsg()\n",
+ __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ if (rcv_len == 0) {
+ fprintf(stderr,
+ "%s: rtnl: socket reached unexpected EOF\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (nlmsg.msg_namelen != sizeof(nladdr)) {
+ fprintf(stderr,
+ "%s: sender address length: %u (expected %zu)\n",
+ __func__, nlmsg.msg_namelen, sizeof(nladdr));
+ ret = -EIO;
+ goto out;
+ }
+
+ h = (struct nlmsghdr *)buf;
+ while (rcv_len >= (int)sizeof(*h)) {
+ len = h->nlmsg_len;
+ rem_len = len - sizeof(*h);
+
+ if (rem_len < 0 || len > rcv_len) {
+ if (nlmsg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "%s: truncated message\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+ fprintf(stderr, "%s: malformed message: len=%d\n",
+ __func__, len);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (h->nlmsg_type == NLMSG_DONE) {
+ ret = 0;
+ goto out;
+ }
+
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ err = (struct nlmsgerr *)NLMSG_DATA(h);
+ if (rem_len < (int)sizeof(struct nlmsgerr)) {
+ fprintf(stderr, "%s: ERROR truncated\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (err->error) {
+ fprintf(stderr, "%s: (%d) %s\n",
+ __func__, err->error,
+ strerror(-err->error));
+ ret = err->error;
+ goto out;
+ }
+
+ ret = 0;
+ if (cb) {
+ int r = cb(h, arg_cb);
+
+ if (r <= 0)
+ ret = r;
+ }
+ goto out;
+ }
+
+ if (cb) {
+ int r = cb(h, arg_cb);
+
+ if (r <= 0) {
+ ret = r;
+ goto out;
+ }
+ } else {
+ fprintf(stderr, "%s: RTNL: unexpected reply\n",
+ __func__);
+ }
+
+ rcv_len -= NLMSG_ALIGN(len);
+ h = (struct nlmsghdr *)((uint8_t *)h +
+ NLMSG_ALIGN(len));
+ }
+
+ if (nlmsg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "%s: message truncated\n", __func__);
+ continue;
+ }
+
+ if (rcv_len) {
+ fprintf(stderr, "%s: rtnl: %d not parsed bytes\n",
+ __func__, rcv_len);
+ ret = -1;
+ goto out;
+ }
+ }
+out:
+ close(fd);
+
+ return ret;
+}
+
+struct ovpn_link_req {
+ struct nlmsghdr n;
+ struct ifinfomsg i;
+ char buf[256];
+};
+
+static int ovpn_new_iface(struct ovpn_ctx *ovpn)
+{
+ struct rtattr *linkinfo, *data;
+ struct ovpn_link_req req = { 0 };
+ int ret = -1;
+
+ fprintf(stdout, "Creating interface %s with mode %u\n", ovpn->ifname,
+ ovpn->mode);
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
+ req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ req.n.nlmsg_type = RTM_NEWLINK;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_IFNAME, ovpn->ifname,
+ strlen(ovpn->ifname) + 1) < 0)
+ goto err;
+
+ linkinfo = ovpn_nest_start(&req.n, sizeof(req), IFLA_LINKINFO);
+ if (!linkinfo)
+ goto err;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_INFO_KIND, OVPN_FAMILY_NAME,
+ strlen(OVPN_FAMILY_NAME) + 1) < 0)
+ goto err;
+
+ if (ovpn->mode_set) {
+ data = ovpn_nest_start(&req.n, sizeof(req), IFLA_INFO_DATA);
+ if (!data)
+ goto err;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_OVPN_MODE,
+ &ovpn->mode, sizeof(uint8_t)) < 0)
+ goto err;
+
+ ovpn_nest_end(&req.n, data);
+ }
+
+ ovpn_nest_end(&req.n, linkinfo);
+
+ req.i.ifi_family = AF_PACKET;
+
+ ret = ovpn_rt_send(&req.n, 0, 0, NULL, NULL);
+err:
+ return ret;
+}
+
+static int ovpn_del_iface(struct ovpn_ctx *ovpn)
+{
+ struct ovpn_link_req req = { 0 };
+
+ fprintf(stdout, "Deleting interface %s ifindex %u\n", ovpn->ifname,
+ ovpn->ifindex);
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
+ req.n.nlmsg_flags = NLM_F_REQUEST;
+ req.n.nlmsg_type = RTM_DELLINK;
+
+ req.i.ifi_family = AF_PACKET;
+ req.i.ifi_index = ovpn->ifindex;
+
+ return ovpn_rt_send(&req.n, 0, 0, NULL, NULL);
+}
+
+static int nl_seq_check(struct nl_msg (*msg)__always_unused,
+ void (*arg)__always_unused)
+{
+ return NL_OK;
+}
+
+struct mcast_handler_args {
+ const char *group;
+ int id;
+};
+
+static int mcast_family_handler(struct nl_msg *msg, void *arg)
+{
+ struct mcast_handler_args *grp = arg;
+ struct nlattr *tb[CTRL_ATTR_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *mcgrp;
+ int rem_mcgrp;
+
+ nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!tb[CTRL_ATTR_MCAST_GROUPS])
+ return NL_SKIP;
+
+ nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) {
+ struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1];
+
+ nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX,
+ nla_data(mcgrp), nla_len(mcgrp), NULL);
+
+ if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] ||
+ !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID])
+ continue;
+ if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]),
+ grp->group, nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME])))
+ continue;
+ grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]);
+ break;
+ }
+
+ return NL_SKIP;
+}
+
+static int mcast_error_handler(struct sockaddr_nl (*nla)__always_unused,
+ struct nlmsgerr *err, void *arg)
+{
+ int *ret = arg;
+
+ *ret = err->error;
+ return NL_STOP;
+}
+
+static int mcast_ack_handler(struct nl_msg (*msg)__always_unused, void *arg)
+{
+ int *ret = arg;
+
+ *ret = 0;
+ return NL_STOP;
+}
+
+static int ovpn_handle_msg(struct nl_msg *msg, void *arg)
+{
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+ struct nlmsghdr *nlh = nlmsg_hdr(msg);
+ char ifname[IF_NAMESIZE];
+ int *ret = arg;
+ __u32 ifindex;
+
+ fprintf(stderr, "received message from ovpn-dco\n");
+
+ *ret = -1;
+
+ if (!genlmsg_valid_hdr(nlh, 0)) {
+ fprintf(stderr, "invalid header\n");
+ return NL_STOP;
+ }
+
+ if (nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL)) {
+ fprintf(stderr, "received bogus data from ovpn-dco\n");
+ return NL_STOP;
+ }
+
+ if (!attrs[OVPN_A_IFINDEX]) {
+ fprintf(stderr, "no ifindex in this message\n");
+ return NL_STOP;
+ }
+
+ ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]);
+ if (!if_indextoname(ifindex, ifname)) {
+ fprintf(stderr, "cannot resolve ifname for ifindex: %u\n",
+ ifindex);
+ return NL_STOP;
+ }
+
+ switch (gnlh->cmd) {
+ case OVPN_CMD_PEER_DEL_NTF:
+ fprintf(stdout, "received CMD_PEER_DEL_NTF\n");
+ break;
+ case OVPN_CMD_KEY_SWAP_NTF:
+ fprintf(stdout, "received CMD_KEY_SWAP_NTF\n");
+ break;
+ default:
+ fprintf(stderr, "received unknown command: %d\n", gnlh->cmd);
+ return NL_STOP;
+ }
+
+ *ret = 0;
+ return NL_OK;
+}
+
+static int ovpn_get_mcast_id(struct nl_sock *sock, const char *family,
+ const char *group)
+{
+ struct nl_msg *msg;
+ struct nl_cb *cb;
+ int ret, ctrlid;
+ struct mcast_handler_args grp = {
+ .group = group,
+ .id = -ENOENT,
+ };
+
+ msg = nlmsg_alloc();
+ if (!msg)
+ return -ENOMEM;
+
+ cb = nl_cb_alloc(NL_CB_DEFAULT);
+ if (!cb) {
+ ret = -ENOMEM;
+ goto out_fail_cb;
+ }
+
+ ctrlid = genl_ctrl_resolve(sock, "nlctrl");
+
+ genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0);
+
+ ret = -ENOBUFS;
+ NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family);
+
+ ret = nl_send_auto_complete(sock, msg);
+ if (ret < 0)
+ goto nla_put_failure;
+
+ ret = 1;
+
+ nl_cb_err(cb, NL_CB_CUSTOM, mcast_error_handler, &ret);
+ nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, mcast_ack_handler, &ret);
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, mcast_family_handler, &grp);
+
+ while (ret > 0)
+ nl_recvmsgs(sock, cb);
+
+ if (ret == 0)
+ ret = grp.id;
+ nla_put_failure:
+ nl_cb_put(cb);
+ out_fail_cb:
+ nlmsg_free(msg);
+ return ret;
+}
+
+static int ovpn_listen_mcast(void)
+{
+ struct nl_sock *sock;
+ struct nl_cb *cb;
+ int mcid, ret;
+
+ sock = nl_socket_alloc();
+ if (!sock) {
+ fprintf(stderr, "cannot allocate netlink socket\n");
+ ret = -ENOMEM;
+ goto err_free;
+ }
+
+ nl_socket_set_buffer_size(sock, 8192, 8192);
+
+ ret = genl_connect(sock);
+ if (ret < 0) {
+ fprintf(stderr, "cannot connect to generic netlink: %s\n",
+ nl_geterror(ret));
+ goto err_free;
+ }
+
+ mcid = ovpn_get_mcast_id(sock, OVPN_FAMILY_NAME, OVPN_MCGRP_PEERS);
+ if (mcid < 0) {
+ fprintf(stderr, "cannot get mcast group: %s\n",
+ nl_geterror(mcid));
+ goto err_free;
+ }
+
+ ret = nl_socket_add_membership(sock, mcid);
+ if (ret) {
+ fprintf(stderr, "failed to join mcast group: %d\n", ret);
+ goto err_free;
+ }
+
+ ret = 1;
+ cb = nl_cb_alloc(NL_CB_DEFAULT);
+ nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check, NULL);
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, &ret);
+ nl_cb_err(cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &ret);
+
+ while (ret == 1) {
+ int err = nl_recvmsgs(sock, cb);
+
+ if (err < 0) {
+ fprintf(stderr,
+ "cannot receive netlink message: (%d) %s\n",
+ err, nl_geterror(-err));
+ ret = -1;
+ break;
+ }
+ }
+
+ nl_cb_put(cb);
+err_free:
+ nl_socket_free(sock);
+ return ret;
+}
+
+static void usage(const char *cmd)
+{
+ fprintf(stderr,
+ "Usage %s <command> <iface> [arguments..]\n",
+ cmd);
+ fprintf(stderr, "where <command> can be one of the following\n\n");
+
+ fprintf(stderr, "* new_iface <iface> [mode]: create new ovpn interface\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tmode:\n");
+ fprintf(stderr, "\t\t- P2P for peer-to-peer mode (i.e. client)\n");
+ fprintf(stderr, "\t\t- MP for multi-peer mode (i.e. server)\n");
+
+ fprintf(stderr, "* del_iface <iface>: delete ovpn interface\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+
+ fprintf(stderr,
+ "* listen <iface> <lport> <peers_file> [ipv6]: listen for incoming peer TCP connections\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: TCP port to listen to\n");
+ fprintf(stderr,
+ "\tpeers_file: file containing one peer per line: Line format:\n");
+ fprintf(stderr, "\t\t<peer_id> <vpnaddr>\n");
+ fprintf(stderr,
+ "\tipv6: whether the socket should listen to the IPv6 wildcard address\n");
+
+ fprintf(stderr,
+ "* connect <iface> <peer_id> <raddr> <rport> [key_file]: start connecting peer of TCP-based VPN session\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the connecting peer\n");
+ fprintf(stderr, "\traddr: peer IP address to connect to\n");
+ fprintf(stderr, "\trport: peer TCP port to connect to\n");
+ fprintf(stderr,
+ "\tkey_file: file containing the symmetric key for encryption\n");
+
+ fprintf(stderr,
+ "* new_peer <iface> <peer_id> <lport> <raddr> <rport> [vpnaddr]: add new peer\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: local UDP port to bind to\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID to be used in data packets to/from this peer\n");
+ fprintf(stderr, "\traddr: peer IP address\n");
+ fprintf(stderr, "\trport: peer UDP port\n");
+ fprintf(stderr, "\tvpnaddr: peer VPN IP\n");
+
+ fprintf(stderr,
+ "* new_multi_peer <iface> <lport> <peers_file>: add multiple peers as listed in the file\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: local UDP port to bind to\n");
+ fprintf(stderr,
+ "\tpeers_file: text file containing one peer per line. Line format:\n");
+ fprintf(stderr, "\t\t<peer_id> <raddr> <rport> <vpnaddr>\n");
+
+ fprintf(stderr,
+ "* set_peer <iface> <peer_id> <keepalive_interval> <keepalive_timeout>: set peer attributes\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+ fprintf(stderr,
+ "\tkeepalive_interval: interval for sending ping messages\n");
+ fprintf(stderr,
+ "\tkeepalive_timeout: time after which a peer is timed out\n");
+
+ fprintf(stderr, "* del_peer <iface> <peer_id>: delete peer\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to delete\n");
+
+ fprintf(stderr, "* get_peer <iface> [peer_id]: retrieve peer(s) status\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID of the peer to query. All peers are returned if omitted\n");
+
+ fprintf(stderr,
+ "* new_key <iface> <peer_id> <slot> <key_id> <cipher> <key_dir> <key_file>: set data channel key\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID of the peer to configure the key for\n");
+ fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n");
+ fprintf(stderr, "\tkey_id: an ID from 0 to 7\n");
+ fprintf(stderr,
+ "\tcipher: cipher to use, supported: aes (AES-GCM), chachapoly (CHACHA20POLY1305)\n");
+ fprintf(stderr,
+ "\tkey_dir: key direction, must 0 on one host and 1 on the other\n");
+ fprintf(stderr, "\tkey_file: file containing the pre-shared key\n");
+
+ fprintf(stderr,
+ "* del_key <iface> <peer_id> [slot]: erase existing data channel key\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+ fprintf(stderr, "\tslot: slot to erase. PRIMARY if omitted\n");
+
+ fprintf(stderr,
+ "* get_key <iface> <peer_id> <slot>: retrieve non sensible key data\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to query\n");
+ fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n");
+
+ fprintf(stderr,
+ "* swap_keys <iface> <peer_id>: swap content of primary and secondary key slots\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+
+ fprintf(stderr,
+ "* listen_mcast: listen to ovpn netlink multicast messages\n");
+}
+
+static int ovpn_parse_remote(struct ovpn_ctx *ovpn, const char *host,
+ const char *service, const char *vpnip)
+{
+ int ret;
+ struct addrinfo *result;
+ struct addrinfo hints = {
+ .ai_family = ovpn->sa_family,
+ .ai_socktype = SOCK_DGRAM,
+ .ai_protocol = IPPROTO_UDP
+ };
+
+ if (host) {
+ ret = getaddrinfo(host, service, &hints, &result);
+ if (ret) {
+ fprintf(stderr, "getaddrinfo on remote error: %s\n",
+ gai_strerror(ret));
+ return -1;
+ }
+
+ if (!(result->ai_family == AF_INET &&
+ result->ai_addrlen == sizeof(struct sockaddr_in)) &&
+ !(result->ai_family == AF_INET6 &&
+ result->ai_addrlen == sizeof(struct sockaddr_in6))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ memcpy(&ovpn->remote, result->ai_addr, result->ai_addrlen);
+ }
+
+ if (vpnip) {
+ ret = getaddrinfo(vpnip, NULL, &hints, &result);
+ if (ret) {
+ fprintf(stderr, "getaddrinfo on vpnip error: %s\n",
+ gai_strerror(ret));
+ return -1;
+ }
+
+ if (!(result->ai_family == AF_INET &&
+ result->ai_addrlen == sizeof(struct sockaddr_in)) &&
+ !(result->ai_family == AF_INET6 &&
+ result->ai_addrlen == sizeof(struct sockaddr_in6))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ memcpy(&ovpn->peer_ip, result->ai_addr, result->ai_addrlen);
+ ovpn->sa_family = result->ai_family;
+
+ ovpn->peer_ip_set = true;
+ }
+
+ ret = 0;
+out:
+ freeaddrinfo(result);
+ return ret;
+}
+
+static int ovpn_parse_new_peer(struct ovpn_ctx *ovpn, const char *peer_id,
+ const char *raddr, const char *rport,
+ const char *vpnip)
+{
+ ovpn->peer_id = strtoul(peer_id, NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ return ovpn_parse_remote(ovpn, raddr, rport, vpnip);
+}
+
+static int ovpn_parse_key_slot(const char *arg, struct ovpn_ctx *ovpn)
+{
+ int slot = strtoul(arg, NULL, 10);
+
+ if (errno == ERANGE || slot < 1 || slot > 2) {
+ fprintf(stderr, "key slot out of range\n");
+ return -1;
+ }
+
+ switch (slot) {
+ case 1:
+ ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY;
+ break;
+ case 2:
+ ovpn->key_slot = OVPN_KEY_SLOT_SECONDARY;
+ break;
+ }
+
+ return 0;
+}
+
+static int ovpn_send_tcp_data(int socket)
+{
+ uint16_t len = htons(1000);
+ uint8_t buf[1002];
+ int ret;
+
+ memcpy(buf, &len, sizeof(len));
+ memset(buf + sizeof(len), 0x86, sizeof(buf) - sizeof(len));
+
+ ret = send(socket, buf, sizeof(buf), MSG_NOSIGNAL);
+
+ fprintf(stdout, "Sent %u bytes over TCP socket\n", ret);
+
+ return ret > 0 ? 0 : ret;
+}
+
+static int ovpn_recv_tcp_data(int socket)
+{
+ uint8_t buf[1002];
+ uint16_t len;
+ int ret;
+
+ ret = recv(socket, buf, sizeof(buf), MSG_NOSIGNAL);
+
+ if (ret < 2) {
+ fprintf(stderr, ">>>> Error while reading TCP data: %d\n", ret);
+ return ret;
+ }
+
+ memcpy(&len, buf, sizeof(len));
+ len = ntohs(len);
+
+ fprintf(stdout, ">>>> Received %u bytes over TCP socket, header: %u\n",
+ ret, len);
+
+ return 0;
+}
+
+static enum ovpn_cmd ovpn_parse_cmd(const char *cmd)
+{
+ if (!strcmp(cmd, "new_iface"))
+ return CMD_NEW_IFACE;
+
+ if (!strcmp(cmd, "del_iface"))
+ return CMD_DEL_IFACE;
+
+ if (!strcmp(cmd, "listen"))
+ return CMD_LISTEN;
+
+ if (!strcmp(cmd, "connect"))
+ return CMD_CONNECT;
+
+ if (!strcmp(cmd, "new_peer"))
+ return CMD_NEW_PEER;
+
+ if (!strcmp(cmd, "new_multi_peer"))
+ return CMD_NEW_MULTI_PEER;
+
+ if (!strcmp(cmd, "set_peer"))
+ return CMD_SET_PEER;
+
+ if (!strcmp(cmd, "del_peer"))
+ return CMD_DEL_PEER;
+
+ if (!strcmp(cmd, "get_peer"))
+ return CMD_GET_PEER;
+
+ if (!strcmp(cmd, "new_key"))
+ return CMD_NEW_KEY;
+
+ if (!strcmp(cmd, "del_key"))
+ return CMD_DEL_KEY;
+
+ if (!strcmp(cmd, "get_key"))
+ return CMD_GET_KEY;
+
+ if (!strcmp(cmd, "swap_keys"))
+ return CMD_SWAP_KEYS;
+
+ if (!strcmp(cmd, "listen_mcast"))
+ return CMD_LISTEN_MCAST;
+
+ return CMD_INVALID;
+}
+
+/* Send process to background and waits for signal.
+ *
+ * This helper is called at the end of commands
+ * creating sockets, so that the latter stay alive
+ * along with the process that created them.
+ *
+ * A signal is expected to be delivered in order to
+ * terminate the waiting processes
+ */
+static void ovpn_waitbg(void)
+{
+ daemon(1, 1);
+ pause();
+}
+
+static int ovpn_run_cmd(struct ovpn_ctx *ovpn)
+{
+ char peer_id[10], vpnip[INET6_ADDRSTRLEN], laddr[128], lport[10];
+ char raddr[128], rport[10];
+ int n, ret;
+ FILE *fp;
+
+ switch (ovpn->cmd) {
+ case CMD_NEW_IFACE:
+ ret = ovpn_new_iface(ovpn);
+ break;
+ case CMD_DEL_IFACE:
+ ret = ovpn_del_iface(ovpn);
+ break;
+ case CMD_LISTEN:
+ ret = ovpn_listen(ovpn, ovpn->sa_family);
+ if (ret < 0) {
+ fprintf(stderr, "cannot listen on TCP socket\n");
+ return ret;
+ }
+
+ fp = fopen(ovpn->peers_file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open file: %s\n",
+ ovpn->peers_file);
+ return -1;
+ }
+
+ int num_peers = 0;
+
+ while ((n = fscanf(fp, "%s %s\n", peer_id, vpnip)) == 2) {
+ struct ovpn_ctx peer_ctx = { 0 };
+
+ if (num_peers == MAX_PEERS) {
+ fprintf(stderr, "max peers reached!\n");
+ return -E2BIG;
+ }
+
+ peer_ctx.ifindex = ovpn->ifindex;
+ peer_ctx.sa_family = ovpn->sa_family;
+
+ peer_ctx.socket = ovpn_accept(ovpn);
+ if (peer_ctx.socket < 0) {
+ fprintf(stderr, "cannot accept connection!\n");
+ return -1;
+ }
+
+ /* store peer sockets to test TCP I/O */
+ ovpn->cli_sockets[num_peers] = peer_ctx.socket;
+
+ ret = ovpn_parse_new_peer(&peer_ctx, peer_id, NULL,
+ NULL, vpnip);
+ if (ret < 0) {
+ fprintf(stderr, "error while parsing line\n");
+ return -1;
+ }
+
+ ret = ovpn_new_peer(&peer_ctx, true);
+ if (ret < 0) {
+ fprintf(stderr,
+ "cannot add peer to VPN: %s %s\n",
+ peer_id, vpnip);
+ return ret;
+ }
+ num_peers++;
+ }
+
+ for (int i = 0; i < num_peers; i++) {
+ ret = ovpn_recv_tcp_data(ovpn->cli_sockets[i]);
+ if (ret < 0)
+ break;
+ }
+ ovpn_waitbg();
+ break;
+ case CMD_CONNECT:
+ ret = ovpn_connect(ovpn);
+ if (ret < 0) {
+ fprintf(stderr, "cannot connect TCP socket\n");
+ return ret;
+ }
+
+ ret = ovpn_new_peer(ovpn, true);
+ if (ret < 0) {
+ fprintf(stderr, "cannot add peer to VPN\n");
+ close(ovpn->socket);
+ return ret;
+ }
+
+ if (ovpn->cipher != OVPN_CIPHER_ALG_NONE) {
+ ret = ovpn_new_key(ovpn);
+ if (ret < 0) {
+ fprintf(stderr, "cannot set key\n");
+ return ret;
+ }
+ }
+
+ ret = ovpn_send_tcp_data(ovpn->socket);
+ ovpn_waitbg();
+ break;
+ case CMD_NEW_PEER:
+ ret = ovpn_udp_socket(ovpn, AF_INET6);
+ if (ret < 0)
+ return ret;
+
+ ret = ovpn_new_peer(ovpn, false);
+ ovpn_waitbg();
+ break;
+ case CMD_NEW_MULTI_PEER:
+ ret = ovpn_udp_socket(ovpn, AF_INET6);
+ if (ret < 0)
+ return ret;
+
+ fp = fopen(ovpn->peers_file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open file: %s\n",
+ ovpn->peers_file);
+ return -1;
+ }
+
+ while ((n = fscanf(fp, "%s %s %s %s %s %s\n", peer_id, laddr,
+ lport, raddr, rport, vpnip)) == 6) {
+ struct ovpn_ctx peer_ctx = { 0 };
+
+ peer_ctx.ifindex = ovpn->ifindex;
+ peer_ctx.socket = ovpn->socket;
+ peer_ctx.sa_family = AF_UNSPEC;
+
+ ret = ovpn_parse_new_peer(&peer_ctx, peer_id, raddr,
+ rport, vpnip);
+ if (ret < 0) {
+ fprintf(stderr, "error while parsing line\n");
+ return -1;
+ }
+
+ ret = ovpn_new_peer(&peer_ctx, false);
+ if (ret < 0) {
+ fprintf(stderr,
+ "cannot add peer to VPN: %s %s %s %s\n",
+ peer_id, raddr, rport, vpnip);
+ return ret;
+ }
+ }
+ ovpn_waitbg();
+ break;
+ case CMD_SET_PEER:
+ ret = ovpn_set_peer(ovpn);
+ break;
+ case CMD_DEL_PEER:
+ ret = ovpn_del_peer(ovpn);
+ break;
+ case CMD_GET_PEER:
+ if (ovpn->peer_id == PEER_ID_UNDEF)
+ fprintf(stderr, "List of peers connected to: %s\n",
+ ovpn->ifname);
+
+ ret = ovpn_get_peer(ovpn);
+ break;
+ case CMD_NEW_KEY:
+ ret = ovpn_new_key(ovpn);
+ break;
+ case CMD_DEL_KEY:
+ ret = ovpn_del_key(ovpn);
+ break;
+ case CMD_GET_KEY:
+ ret = ovpn_get_key(ovpn);
+ break;
+ case CMD_SWAP_KEYS:
+ ret = ovpn_swap_keys(ovpn);
+ break;
+ case CMD_LISTEN_MCAST:
+ ret = ovpn_listen_mcast();
+ break;
+ case CMD_INVALID:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[])
+{
+ int ret;
+
+ /* no args required for LISTEN_MCAST */
+ if (ovpn->cmd == CMD_LISTEN_MCAST)
+ return 0;
+
+ /* all commands need an ifname */
+ if (argc < 3)
+ return -EINVAL;
+
+ strscpy(ovpn->ifname, argv[2], IFNAMSIZ - 1);
+ ovpn->ifname[IFNAMSIZ - 1] = '\0';
+
+ /* all commands, except NEW_IFNAME, needs an ifindex */
+ if (ovpn->cmd != CMD_NEW_IFACE) {
+ ovpn->ifindex = if_nametoindex(ovpn->ifname);
+ if (!ovpn->ifindex) {
+ fprintf(stderr, "cannot find interface: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ }
+
+ switch (ovpn->cmd) {
+ case CMD_NEW_IFACE:
+ if (argc < 4)
+ break;
+
+ if (!strcmp(argv[3], "P2P")) {
+ ovpn->mode = OVPN_MODE_P2P;
+ } else if (!strcmp(argv[3], "MP")) {
+ ovpn->mode = OVPN_MODE_MP;
+ } else {
+ fprintf(stderr, "Cannot parse iface mode: %s\n",
+ argv[3]);
+ return -1;
+ }
+ ovpn->mode_set = true;
+ break;
+ case CMD_DEL_IFACE:
+ break;
+ case CMD_LISTEN:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ ovpn->peers_file = argv[4];
+
+ ovpn->sa_family = AF_INET;
+ if (argc > 5 && !strcmp(argv[5], "ipv6"))
+ ovpn->sa_family = AF_INET6;
+ break;
+ case CMD_CONNECT:
+ if (argc < 6)
+ return -EINVAL;
+
+ ovpn->sa_family = AF_INET;
+
+ ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[5],
+ NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Cannot parse remote peer data\n");
+ return -1;
+ }
+
+ if (argc > 6) {
+ ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY;
+ ovpn->key_id = 0;
+ ovpn->cipher = OVPN_CIPHER_ALG_AES_GCM;
+ ovpn->key_dir = KEY_DIR_OUT;
+
+ ret = ovpn_parse_key(argv[6], ovpn);
+ if (ret)
+ return -1;
+ }
+ break;
+ case CMD_NEW_PEER:
+ if (argc < 7)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[4], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ const char *vpnip = (argc > 7) ? argv[7] : NULL;
+
+ ret = ovpn_parse_new_peer(ovpn, argv[3], argv[5], argv[6],
+ vpnip);
+ if (ret < 0)
+ return -1;
+ break;
+ case CMD_NEW_MULTI_PEER:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ ovpn->peers_file = argv[4];
+ break;
+ case CMD_SET_PEER:
+ if (argc < 6)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ovpn->keepalive_interval = strtoul(argv[4], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr,
+ "keepalive interval value out of range\n");
+ return -1;
+ }
+
+ ovpn->keepalive_timeout = strtoul(argv[5], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr,
+ "keepalive interval value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_DEL_PEER:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_GET_PEER:
+ ovpn->peer_id = PEER_ID_UNDEF;
+ if (argc > 3) {
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ }
+ break;
+ case CMD_NEW_KEY:
+ if (argc < 9)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return -1;
+
+ ovpn->key_id = strtoul(argv[5], NULL, 10);
+ if (errno == ERANGE || ovpn->key_id > 2) {
+ fprintf(stderr, "key ID out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_cipher(argv[6], ovpn);
+ if (ret < 0)
+ return -1;
+
+ ret = ovpn_parse_key_direction(argv[7], ovpn);
+ if (ret < 0)
+ return -1;
+
+ ret = ovpn_parse_key(argv[8], ovpn);
+ if (ret)
+ return -1;
+ break;
+ case CMD_DEL_KEY:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return ret;
+ break;
+ case CMD_GET_KEY:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return ret;
+ break;
+ case CMD_SWAP_KEYS:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_LISTEN_MCAST:
+ break;
+ case CMD_INVALID:
+ break;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ struct ovpn_ctx ovpn;
+ int ret;
+
+ if (argc < 2) {
+ usage(argv[0]);
+ return -1;
+ }
+
+ memset(&ovpn, 0, sizeof(ovpn));
+ ovpn.sa_family = AF_UNSPEC;
+ ovpn.cipher = OVPN_CIPHER_ALG_NONE;
+
+ ovpn.cmd = ovpn_parse_cmd(argv[1]);
+ if (ovpn.cmd == CMD_INVALID) {
+ fprintf(stderr, "Error: unknown command.\n\n");
+ usage(argv[0]);
+ return -1;
+ }
+
+ ret = ovpn_parse_cmd_args(&ovpn, argc, argv);
+ if (ret < 0) {
+ fprintf(stderr, "Error: invalid arguments.\n\n");
+ if (ret == -EINVAL)
+ usage(argv[0]);
+ return ret;
+ }
+
+ ret = ovpn_run_cmd(&ovpn);
+ if (ret)
+ fprintf(stderr, "Cannot execute command: %s (%d)\n",
+ strerror(-ret), ret);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/net/ovpn/tcp_peers.txt b/tools/testing/selftests/net/ovpn/tcp_peers.txt
new file mode 100644
index 000000000000..d753eebe8716
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/tcp_peers.txt
@@ -0,0 +1,5 @@
+1 5.5.5.2
+2 5.5.5.3
+3 5.5.5.4
+4 5.5.5.5
+5 5.5.5.6
diff --git a/tools/testing/selftests/net/ovpn/test-chachapoly.sh b/tools/testing/selftests/net/ovpn/test-chachapoly.sh
new file mode 100755
index 000000000000..32504079a2b8
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-chachapoly.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+ALG="chachapoly"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh
new file mode 100755
index 000000000000..093d44772ffd
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+
+source test-close-socket.sh
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh
new file mode 100755
index 000000000000..5e48a8b67928
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ create_ns ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ setup_ns ${p} 5.5.5.$((${p} + 1))/24
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ add_peer ${p}
+done
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120
+done
+
+sleep 1
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1))
+done
+
+ip netns exec peer0 iperf3 -1 -s &
+sleep 1
+ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/test-float.sh b/tools/testing/selftests/net/ovpn/test-float.sh
new file mode 100755
index 000000000000..ba5d725e18b0
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-float.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+FLOAT="1"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-large-mtu.sh b/tools/testing/selftests/net/ovpn/test-large-mtu.sh
new file mode 100755
index 000000000000..ce2a2cb64f72
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-large-mtu.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+MTU="1500"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-tcp.sh b/tools/testing/selftests/net/ovpn/test-tcp.sh
new file mode 100755
index 000000000000..ba3f1f315a34
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-tcp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh
new file mode 100755
index 000000000000..e8acdc303307
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ create_ns ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ setup_ns ${p} 5.5.5.$((${p} + 1))/24 ${MTU}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ add_peer ${p}
+done
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120
+done
+
+sleep 1
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1))
+ ip netns exec peer0 ping -qfc 500 -s 3000 -w 3 5.5.5.$((${p} + 1))
+done
+
+# ping LAN behind client 1
+ip netns exec peer0 ping -qfc 500 -w 3 ${LAN_IP}
+
+if [ "$FLOAT" == "1" ]; then
+ # make clients float..
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip -n peer${p} addr del 10.10.${p}.2/24 dev veth${p}
+ ip -n peer${p} addr add 10.10.${p}.3/24 dev veth${p}
+ done
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer${p} ping -qfc 500 -w 3 5.5.5.1
+ done
+fi
+
+ip netns exec peer0 iperf3 -1 -s &
+sleep 1
+ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1
+
+echo "Adding secondary key and then swap:"
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 2 1 ${ALG} 0 data64.key
+ ip netns exec peer${p} ${OVPN_CLI} new_key tun${p} ${p} 2 1 ${ALG} 1 data64.key
+ ip netns exec peer${p} ${OVPN_CLI} swap_keys tun${p} ${p}
+done
+
+sleep 1
+
+echo "Querying all peers:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0
+ip netns exec peer1 ${OVPN_CLI} get_peer tun1
+
+echo "Querying peer 1:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0 1
+
+echo "Querying non-existent peer 10:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0 10 || true
+
+echo "Deleting peer 1:"
+ip netns exec peer0 ${OVPN_CLI} del_peer tun0 1
+ip netns exec peer1 ${OVPN_CLI} del_peer tun1 1
+
+echo "Querying keys:"
+for p in $(seq 2 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 1
+ ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 2
+done
+
+echo "Deleting peer while sending traffic:"
+(ip netns exec peer2 ping -qf -w 4 5.5.5.1)&
+sleep 2
+ip netns exec peer0 ${OVPN_CLI} del_peer tun0 2
+# following command fails in TCP mode
+# (both ends get conn reset when one peer disconnects)
+ip netns exec peer2 ${OVPN_CLI} del_peer tun2 2 || true
+
+echo "Deleting keys:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 1
+ ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 2
+done
+
+echo "Setting timeout to 3s MP:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 3 3 || true
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 0 0
+done
+# wait for peers to timeout
+sleep 5
+
+echo "Setting timeout to 3s P2P:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 3 3
+done
+sleep 5
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/udp_peers.txt b/tools/testing/selftests/net/ovpn/udp_peers.txt
new file mode 100644
index 000000000000..e9773ddf875c
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/udp_peers.txt
@@ -0,0 +1,6 @@
+1 10.10.1.1 1 10.10.1.2 1 5.5.5.2
+2 10.10.2.1 1 10.10.2.2 1 5.5.5.3
+3 10.10.3.1 1 10.10.3.2 1 5.5.5.4
+4 fd00:0:0:4::1 1 fd00:0:0:4::2 1 5.5.5.5
+5 fd00:0:0:5::1 1 fd00:0:0:5::2 1 5.5.5.6
+6 fd00:0:0:6::1 1 fd00:0:0:6::2 1 5.5.5.7
diff --git a/tools/testing/selftests/net/packetdrill/Makefile b/tools/testing/selftests/net/packetdrill/Makefile
index 31cfb666ba8b..ff54641493e9 100644
--- a/tools/testing/selftests/net/packetdrill/Makefile
+++ b/tools/testing/selftests/net/packetdrill/Makefile
@@ -1,9 +1,11 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_INCLUDES := ksft_runner.sh \
- defaults.sh \
- set_sysctls.py \
- ../../kselftest/ktap_helpers.sh
+TEST_INCLUDES := \
+ defaults.sh \
+ ksft_runner.sh \
+ set_sysctls.py \
+ ../../kselftest/ktap_helpers.sh \
+# end of TEST_INCLUDES
TEST_PROGS := $(wildcard *.pkt)
diff --git a/tools/testing/selftests/net/packetdrill/config b/tools/testing/selftests/net/packetdrill/config
index 0237ed98f3c0..c4a19a785521 100644
--- a/tools/testing/selftests/net/packetdrill/config
+++ b/tools/testing/selftests/net/packetdrill/config
@@ -1,6 +1,6 @@
-CONFIG_IPV6=y
-CONFIG_HZ_1000=y
CONFIG_HZ=1000
+CONFIG_HZ_1000=y
+CONFIG_IPV6=y
CONFIG_NET_NS=y
CONFIG_NET_SCH_FIFO=y
CONFIG_NET_SCH_FQ=y
diff --git a/tools/testing/selftests/net/packetdrill/defaults.sh b/tools/testing/selftests/net/packetdrill/defaults.sh
index 1095a7b22f44..37edd3dc3b07 100755
--- a/tools/testing/selftests/net/packetdrill/defaults.sh
+++ b/tools/testing/selftests/net/packetdrill/defaults.sh
@@ -51,7 +51,8 @@ sysctl -q net.ipv4.tcp_pacing_ss_ratio=200
sysctl -q net.ipv4.tcp_pacing_ca_ratio=120
sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1
-sysctl -q net.ipv4.tcp_fastopen=0x70403
+sysctl -q net.ipv4.tcp_fastopen=0x3
+# Use TFO_COOKIE in ksft_runner.sh for this key.
sysctl -q net.ipv4.tcp_fastopen_key=a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4
sysctl -q net.ipv4.tcp_syncookies=1
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
index 4071c133f29e..b34e5cf0112e 100755
--- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -3,27 +3,32 @@
source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh"
-readonly ipv4_args=('--ip_version=ipv4 '
- '--local_ip=192.168.0.1 '
- '--gateway_ip=192.168.0.1 '
- '--netmask_ip=255.255.0.0 '
- '--remote_ip=192.0.2.1 '
- '-D CMSG_LEVEL_IP=SOL_IP '
- '-D CMSG_TYPE_RECVERR=IP_RECVERR ')
-
-readonly ipv6_args=('--ip_version=ipv6 '
- '--mtu=1520 '
- '--local_ip=fd3d:0a0b:17d6::1 '
- '--gateway_ip=fd3d:0a0b:17d6:8888::1 '
- '--remote_ip=fd3d:fa7b:d17d::1 '
- '-D CMSG_LEVEL_IP=SOL_IPV6 '
- '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ')
+declare -A ip_args=(
+ [ipv4]="--ip_version=ipv4
+ --local_ip=192.168.0.1
+ --gateway_ip=192.168.0.1
+ --netmask_ip=255.255.0.0
+ --remote_ip=192.0.2.1
+ -D TFO_COOKIE=3021b9d889017eeb
+ -D TFO_COOKIE_ZERO=b7c12350a90dc8f5
+ -D CMSG_LEVEL_IP=SOL_IP
+ -D CMSG_TYPE_RECVERR=IP_RECVERR"
+ [ipv6]="--ip_version=ipv6
+ --mtu=1520
+ --local_ip=fd3d:0a0b:17d6::1
+ --gateway_ip=fd3d:0a0b:17d6:8888::1
+ --remote_ip=fd3d:fa7b:d17d::1
+ -D TFO_COOKIE=c1d1e9742a47a9bc
+ -D TFO_COOKIE_ZERO=82af1a8f9a205c34
+ -D CMSG_LEVEL_IP=SOL_IPV6
+ -D CMSG_TYPE_RECVERR=IPV6_RECVERR"
+)
if [ $# -ne 1 ]; then
ktap_exit_fail_msg "usage: $0 <script>"
exit "$KSFT_FAIL"
fi
-script="$1"
+script="$(basename $1)"
if [ -z "$(which packetdrill)" ]; then
ktap_skip_all "packetdrill not found in PATH"
@@ -31,16 +36,27 @@ if [ -z "$(which packetdrill)" ]; then
fi
declare -a optargs
+failfunc=ktap_test_fail
+
if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then
optargs+=('--tolerance_usecs=14000')
+ failfunc=ktap_test_xfail
+fi
+
+ip_versions=$(grep -E '^--ip_version=' $script | cut -d '=' -f 2)
+if [[ -z $ip_versions ]]; then
+ ip_versions="ipv4 ipv6"
+elif [[ ! "$ip_versions" =~ ^ipv[46]$ ]]; then
+ ktap_exit_fail_msg "Too many or unsupported --ip_version: $ip_versions"
+ exit "$KSFT_FAIL"
fi
ktap_print_header
-ktap_set_plan 2
+ktap_set_plan $(echo $ip_versions | wc -w)
-unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $(basename $script) > /dev/null \
- && ktap_test_pass "ipv4" || ktap_test_fail "ipv4"
-unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $(basename $script) > /dev/null \
- && ktap_test_pass "ipv6" || ktap_test_fail "ipv6"
+for ip_version in $ip_versions; do
+ unshare -n packetdrill ${ip_args[$ip_version]} ${optargs[@]} $script > /dev/null \
+ && ktap_test_pass $ip_version || $failfunc $ip_version
+done
ktap_finished
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt
new file mode 100644
index 000000000000..38535701656e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test for blocking accept.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0...0.200 accept(3, ..., ...) = 4
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +.1 write(4, ..., 2000) = 2000
+ +0 > P. 1:2001(2000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt
new file mode 100644
index 000000000000..3692ef102381
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test for blocking connect.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+
+ +.1...0.200 connect(3, ..., ...) = 0
+
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.1 < S. 0:0(0) ack 1 win 5792 <mss 1460,nop,wscale 2,nop,nop,sackOK>
+ +0 > . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
new file mode 100644
index 000000000000..657e42ca65b5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test for blocking read.
+
+--tolerance_usecs=10000
+--mss=1000
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+
+ +0...0.100 read(4, ..., 2000) = 2000
+ +.1 < P. 1:2001(2000) ack 1 win 257
+ +0 > . 1:1(0) ack 2001
+
+ +.1...0.200 read(4, ..., 2000) = 2000
+ +.1 < P. 2001:4001(2000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001
+
+ +.1 < P. 4001:6001(2000) ack 1 win 257
+ +0 > . 1:1(0) ack 6001
+ +0...0.000 read(4, ..., 1000) = 1000
+ +0...0.000 read(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt
new file mode 100644
index 000000000000..cec5a0725d95
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test for blocking write.
+--tolerance_usecs=10000
+
+`./defaults.sh
+./set_sysctls.py /proc/sys/net/ipv4/tcp_min_tso_segs=10
+`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 50000 <mss 1000,nop,wscale 0>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 50000
+ +0 accept(3, ..., ...) = 4
+
+// Kernel doubles our value -> sk->sk_sndbuf is set to 42000
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [21000], 4) = 0
+ +0 getsockopt(4, SOL_SOCKET, SO_SNDBUF, [42000], [4]) = 0
+
+// A write of 60000 does not block.
+ +0...0.300 write(4, ..., 61000) = 61000 // this write() blocks
+
+ +.1 < . 1:1(0) ack 10001 win 50000
+
+ +.1 < . 1:1(0) ack 30001 win 50000
+
+// This ACK should wakeup the write(). An ACK of 35001 does not.
+ +.1 < . 1:1(0) ack 36001 win 50000
+
+// Reset to sysctls defaults.
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt
new file mode 100644
index 000000000000..8514d6bdbb6d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test basic connection teardown where local process closes first:
+// the local process calls close() first, so we send a FIN, and receive an ACK.
+// Then we receive a FIN and ACK it.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +.01...0.011 connect(3, ..., ...) = 0
+ +0 > S 0:0(0) <...>
+ +0 < S. 0:0(0) ack 1 win 32768 <mss 1000,nop,wscale 6,nop,nop,sackOK>
+ +0 > . 1:1(0) ack 1
+
+ +0 write(3, ..., 1000) = 1000
+ +0 > P. 1:1001(1000) ack 1
+ +0 < . 1:1(0) ack 1001 win 257
+
+ +0 close(3) = 0
+ +0 > F. 1001:1001(0) ack 1
+ +0 < . 1:1(0) ack 1002 win 257
+
+ +0 < F. 1:1(0) ack 1002 win 257
+ +0 > . 1002:1002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt
new file mode 100644
index 000000000000..04103134bd99
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test to make sure no RST is being sent when close()
+// is called on a socket with SYN_SENT state.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <...>
+
+// Application decideds to close the socket in SYN_SENT state
+// Make sure no RST is sent after close().
+ +0 close(3) = 0
+
+// Receive syn-ack to trigger the send side packet examination:
+// If a RESET were sent right after close(), it would have failed with
+// a mismatched timestamp.
+ +.1 < S. 0:0(0) ack 1 win 32000 <mss 1460,nop,wscale 7>
+ +0 > R 1:1(0)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt
new file mode 100644
index 000000000000..5f3a2914213a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+// Verify behavior for the sequence: remote side sends FIN, then we close().
+// Since the remote side (client) closes first, we test our LAST_ACK code path.
+
+`./defaults.sh`
+
+// Initialize a server socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+// Client closes first.
+ +.01 < F. 1:1(0) ack 1 win 257
+ +0 > . 1:1(0) ack 2
+
+// App notices that client closed.
+ +0 read(4, ..., 1000) = 0
+
+// Then we close.
+ +.01 close(4) = 0
+ +0 > F. 1:1(0) ack 2
+
+// Client ACKs our FIN.
+ +.01 < . 2:2(0) ack 2 win 257
+
+// Verify that we send RST in response to any incoming segments
+// (because the kernel no longer has any record of this socket).
+ +.01 < . 2:2(0) ack 2 win 257
+ +0 > R 2:2(0)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt
new file mode 100644
index 000000000000..eef01d5f1118
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +.1 < . 1:1(0) ack 1 win 32792
+
+
+ +0 accept(3, ..., ...) = 4
+ +0 < . 1:1001(1000) ack 1 win 32792
+ +0 > . 1:1(0) ack 1001
+ +0 read(4, ..., 1000) = 1000
+
+// resend the payload + a FIN
+ +0 < F. 1:1001(1000) ack 1 win 32792
+// Why do we have a delay and no dsack ?
+ +0~+.04 > . 1:1(0) ack 1002
+
+ +0 close(4) = 0
+
+// According to RFC 2525, section 2.17
+// we should _not_ send an RST here, because there was no data to consume.
+ +0 > F. 1:1(0) ack 1002
diff --git a/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt
new file mode 100644
index 000000000000..c790d0af635e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test various DSACK (RFC 2883) behaviors.
+
+--mss=1000
+
+`./defaults.sh`
+
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 1024
+ +0 accept(3, ..., ...) = 4
+
+// First SACK range.
+ +0 < P. 1001:2001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop, nop, sack 1001:2001>
+
+// Check SACK coalescing (contiguous sequence).
+ +0 < P. 2001:3001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 1001:3001>
+
+// Check we have two SACK ranges for non contiguous sequences.
+ +0 < P. 4001:5001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 4001:5001 1001:3001>
+
+// Three ranges.
+ +0 < P. 7001:8001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 7001:8001 4001:5001 1001:3001>
+
+// DSACK (1001:3001) + SACK (6001:7001)
+ +0 < P. 1:6001(6000) ack 1 win 1024
+ +0 > . 1:1(0) ack 6001 <nop,nop,sack 1001:3001 7001:8001>
+
+// DSACK (7001:8001)
+ +0 < P. 6001:8001(2000) ack 1 win 1024
+ +0 > . 1:1(0) ack 8001 <nop,nop,sack 7001:8001>
+
+// DSACK for an older segment.
+ +0 < P. 1:1001(1000) ack 1 win 1024
+ +0 > . 1:1(0) ack 8001 <nop,nop,sack 1:1001>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt
new file mode 100644
index 000000000000..643baf3267cf
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test ECN: verify that Linux TCP ECN sending code uses ECT0 (not ECT1).
+//
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1 # fully enabled
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+
+// ECN handshake: send EW flags in SYN packet, E flag in SYN-ACK response
++.002 ... 0.004 connect(4, ..., ...) = 0
+
+ +0 > SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.002 < SE. 0:0(0) ack 1 win 32767 <mss 1000,nop,wscale 6,nop,nop,sackOK>
+ +0 > . 1:1(0) ack 1
+
+// Write 1 MSS.
++.002 write(4, ..., 1000) = 1000
+// Send 1 MSS with ect0.
+ +0 > [ect0] P. 1:1001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt
new file mode 100644
index 000000000000..f95b9b3c9fa1
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP does not append any data from consequent writes to the tail
+// skb created for the chunk. The large chunk itself should be packetized as
+// usual.
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on
+// the tail.
+ +0 write(4, ..., 10400) = 10400
+
+// Write another 10040B chunk with no coalescing options.
+ +0 send(4, ..., 10400, MSG_EOR) = 10400
+
+// Write a 2KB chunk. This chunk should not be appended to the packets created
+// the previous chunk.
+ +0 write(4, ..., 2000) = 2000
+
+ +0 > P. 1:10001(10000) ack 1
++.001 < . 1:1(0) ack 10001 win 514
+// Now we have enough room to send out the 2 x 400B packets out.
+ +0 > P. 10001:20801(10800) ack 1
++.001 < . 1:1(0) ack 20801 win 514
+// This 2KB packet should be sent alone.
+ +0 > P. 20801:22801(2000) ack 1
++.001 < . 1:1(0) ack 22801 win 514
diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt
new file mode 100644
index 000000000000..2ff66075288e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP does not append any data from consequent writes to the tail
+// skb created for the chunk. Also, when packets are retransmitted, they
+// will not be coalesce into the same skb.
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on
+// the tail.
+ +0 write(4, ..., 10400) = 10400
+
+// Write 10 400B chunks with no coalescing options.
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+// This chunk should not be appended to the skbs created for the previous chunk.
+ +0 write(4, ..., 10000) = 10000
+
+ +0 > P. 1:10001(10000) ack 1
++.001 < . 1:1(0) ack 10001 win 514
+// Now we have enough room to send out the 2 x 400B packets out.
+ +0 > P. 10001:10801(800) ack 1
+// The 9 remaining 400B chunks should be sent as individual packets.
+ +0 > P. 10801:11201(400) ack 1
+ +0 > P. 11201:11601(400) ack 1
+ +0 > P. 11601:12001(400) ack 1
+ +0 > P. 12001:12401(400) ack 1
+ +0 > P. 12401:12801(400) ack 1
+ +0 > P. 12801:13201(400) ack 1
+ +0 > P. 13201:13601(400) ack 1
+ +0 > P. 13601:14001(400) ack 1
+ +0 > P. 14001:14401(400) ack 1
+// The last 10KB chunk should be sent separately.
+ +0 > P. 14401:24401(10000) ack 1
+
++.001 < . 1:1(0) ack 10401 win 514
++.001 < . 1:1(0) ack 10801 win 514
++.001 < . 1:1(0) ack 11201 win 514
++.001 < . 1:1(0) ack 11601 win 514
++.001 < . 1:1(0) ack 12001 win 514 <sack 13201:14401,nop,nop>
+// TCP should fill the hole but no coalescing should happen, and all
+// retransmissions should be sent out as individual packets.
+
+// Note : This is timeout based retransmit.
+// Do not put +0 here or flakes will come back.
++.004~+.008 > P. 12001:12401(400) ack 1
+
++.001 < . 1:1(0) ack 12401 win 514 <sack 13201:14401,nop,nop>
+ +0 > P. 12401:12801(400) ack 1
+ +0 > P. 12801:13201(400) ack 1
++.001 < . 1:1(0) ack 12801 win 514 <sack 13201:14401,nop,nop>
++.001 < . 1:1(0) ack 14401 win 514
++.001 < . 1:1(0) ack 24401 win 514
diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt
new file mode 100644
index 000000000000..77039c5aac39
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP does not append any data from consequent writes to the tail
+// skb created for the chunk.
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on
+// the tail.
+ +0 write(4, ..., 10400) = 10400
+
+// Write a 400B chunk with no coalescing options.
+ +0 send(4, ..., 400, MSG_EOR) = 400
+
+// This chunk should not be appended to the skbs created for the previous chunk.
+ +0 write(4, ..., 10000) = 10000
+
+ +0 > P. 1:10001(10000) ack 1
++.001 < . 1:1(0) ack 10001 win 514
+// Now we have enough room to send out the 2 x 400B packets out.
+ +0 > P. 10001:10801(800) ack 1
+ +0 > P. 10801:20801(10000) ack 1
++.001 < . 1:1(0) ack 10401 win 514
++.001 < . 1:1(0) ack 10801 win 514
++.001 < . 1:1(0) ack 20801 win 514
diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt
new file mode 100644
index 000000000000..dd5a06250595
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP does not append any data from consequent writes to the tail
+// skb created for the chunk even though we have 10 back-to-back small
+// writes.
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on
+// the tail.
+ +0 write(4, ..., 10400) = 10400
+
+// Write 10 400B chunks with no coalescing options.
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+// This chunk should not be appended to the skbs created for the previous chunk.
+ +0 write(4, ..., 10000) = 10000
+
+ +0 > P. 1:10001(10000) ack 1
++.001 < . 1:1(0) ack 10001 win 514
+// Now we have enough room to send out the 2 x 400B packets out.
+ +0 > P. 10001:10801(800) ack 1
+// The 9 remaining 400B chunks should be sent as individual packets.
+ +0 > P. 10801:11201(400) ack 1
+ +0 > P. 11201:11601(400) ack 1
+ +0 > P. 11601:12001(400) ack 1
+ +0 > P. 12001:12401(400) ack 1
+ +0 > P. 12401:12801(400) ack 1
+ +0 > P. 12801:13201(400) ack 1
+ +0 > P. 13201:13601(400) ack 1
+ +0 > P. 13601:14001(400) ack 1
+ +0 > P. 14001:14401(400) ack 1
+// The last 10KB chunk should be sent separately.
+ +0 > P. 14401:24401(10000) ack 1
+
++.001 < . 1:1(0) ack 10401 win 514
++.001 < . 1:1(0) ack 10801 win 514
++.001 < . 1:1(0) ack 11201 win 514
++.001 < . 1:1(0) ack 11601 win 514
++.001 < . 1:1(0) ack 12001 win 514
++.001 < . 1:1(0) ack 12401 win 514
++.001 < . 1:1(0) ack 12801 win 514
++.001 < . 1:1(0) ack 13201 win 514
++.001 < . 1:1(0) ack 13601 win 514
++.001 < . 1:1(0) ack 14001 win 514
++.001 < . 1:1(0) ack 14401 win 514
++.001 < . 1:1(0) ack 24401 win 514
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt
new file mode 100644
index 000000000000..0d3c8077e830
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test PRR-slowstart implementation.
+// In this variant we test a simple case where in-flight == ssthresh
+// all the way through recovery, so during fast recovery we send one segment
+// for each segment SACKed/ACKed.
+
+// Set up config.
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+// RTT 100ms
+ +.1 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Send 10 data segments.
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+// Lost packet 1:1001.
+ +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop>
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop>
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:4001,nop,nop>
+// Enter fast recovery.
+ +0 > . 1:1001(1000) ack 1
+ +.01 %{
+assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state
+assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd
+assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh
+}%
+
+// Write some more, which we will send 1 MSS at a time,
+// as in-flight segments are SACKed or ACKed.
+ +.01 write(4, ..., 7000) = 7000
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:5001,nop,nop>
+ +0 > . 10001:11001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:6001,nop,nop>
+ +0 > . 11001:12001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:7001,nop,nop>
+ +0 > . 12001:13001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:8001,nop,nop>
+ +0 > . 13001:14001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:9001,nop,nop>
+ +0 > . 14001:15001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:10001,nop,nop>
+ +0 > . 15001:16001(1000) ack 1
+
+ +.02 < . 1:1(0) ack 10001 win 320
+ +0 > P. 16001:17001(1000) ack 1
+// Leave fast recovery.
+ +.01 %{
+assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state
+assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd
+assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh
+}%
+
+ +.03 < . 1:1(0) ack 12001 win 320
+ +.02 < . 1:1(0) ack 14001 win 320
+ +.02 < . 1:1(0) ack 16001 win 320
+ +.02 < . 1:1(0) ack 17001 win 320
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt
new file mode 100644
index 000000000000..7842a10b6967
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test PRR-slowstart implementation. The sender sends 20 packets. Packet
+// 1 to 4, and 11 to 16 are dropped.
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+ +.01 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Write 20 data segments.
+ +0 write(4, ..., 20000) = 20000
+ +0 > P. 1:10001(10000) ack 1
+
+// Receive first DUPACK, entering PRR part
+ +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop>
+ +0 > . 10001:11001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop>
+ +0 > . 11001:12001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop>
+ +0 > . 1:1001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop>
+ +0 > . 2001:3001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop>
+ +0 > . 3001:4001(1000) ack 1
+// Enter PRR CRB
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:11001,nop,nop>
+ +0 > . 12001:13001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:12001,nop,nop>
+ +0 > . 13001:14001(1000) ack 1
+// Enter PRR slow start
+ +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:12001,nop,nop>
+ +0 > P. 14001:16001(2000) ack 1
++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:12001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
+ +0 > . 16001:17001(1000) ack 1
+// inflight reaches ssthresh, goes into packet conservation mode
++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:13001,nop,nop>
+ +0 > . 17001:18001(1000) ack 1
++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:14001,nop,nop>
+ +0 > . 18001:19001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt
new file mode 100644
index 000000000000..b66d7644c3b6
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test PRR-slowstart implementation. The sender sends 20 packets. Packet
+// 1 to 4 are lost. The sender writes another 10 packets.
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+ +.01 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Send 20 data segments.
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+// Lost packet 1,2,3,4
+ +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop>
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop>
+ +0 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop>
+ +0 > . 1:1001(1000) ack 1
+ +0 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
+ +0 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop>
+ +0 > . 2001:3001(1000) ack 1
+ +0 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop>
+ +0 > . 3001:4001(1000) ack 1
+
+// Receiver ACKs all data.
+ +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:10001,nop,nop>
+ +0 < . 1:1(0) ack 2001 win 320 <sack 4001:10001,nop,nop>
+ +0 < . 1:1(0) ack 3001 win 320 <sack 4001:10001,nop,nop>
+ +0 < . 1:1(0) ack 10001 win 320
+
+// Writes another 10 packets, which the ssthresh*mss amount
+// should be sent right away
+ +.01 write(4, ..., 10000) = 10000
+ +0 > . 10001:17001(7000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt
new file mode 100644
index 000000000000..8e87bfecabb5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test PRR-slowstart implementation.
+// In this variant we verify that the sender uses SACK info on an ACK
+// below snd_una.
+
+// Set up config.
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 8>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+// RTT 10ms
+ +.01 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Send 10 data segments.
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+// Lost packet 1:1001,4001:5001,7001:8001.
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop>
+ +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop>
+ +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001 8001:9001,nop,nop>
+ +0 > . 1:1001(1000) ack 1
+
++.012 < . 1:1(0) ack 4001 win 320 <sack 8001:9001,nop,nop>
+ +0 > . 4001:7001(3000) ack 1
+
+ +0 write(4, ..., 10000) = 10000
+
+// The following ACK was reordered - delayed so that it arrives with
+// an ACK field below snd_una. Here we check that the newly-SACKed
+// 2MSS at 5001:7001 cause us to send out 2 more MSS.
++.002 < . 1:1(0) ack 3001 win 320 <sack 5001:7001,nop,nop>
+ +0 > . 7001:8001(1000) ack 1
+ +0 > . 10001:11001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt
new file mode 100644
index 000000000000..32aff9bc4052
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Test TFO_SERVER_COOKIE_NOT_REQD flag on receiving
+// SYN with data but without Fast Open cookie option.
+
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x202`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+// Since TFO_SERVER_COOKIE_NOT_REQD, a TFO socket will be created with
+// the data accepted.
+ +0 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop>
+ +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK>
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+ +0 read(4, ..., 1024) = 1000
+
+// Data After SYN will be accepted too.
+ +0 < . 1001:2001(1000) ack 1 win 5840
+ +0 > . 1:1(0) ack 2001
+
+// Should change the implementation later to set the SYN flag as well.
+ +0 read(4, ..., 1024) = 1000
+ +0 write(4, ..., 1000) = 1000
+ +0 > P. 1:1001(1000) ack 2001
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt
new file mode 100644
index 000000000000..649997a58099
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Test TFO_SERVER_WO_SOCKOPT1 without setsockopt(TCP_FASTOPEN)
+
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x402`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+ +0 read(4, ..., 512) = 10
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt
new file mode 100644
index 000000000000..4a00e0d994f2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Server w/o TCP_FASTOPEN socket option
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,FO TFO_COOKIE>
+
+// Data is ignored since TCP_FASTOPEN is not set on the listener
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+
+ +0 accept(3, ..., ...) = -1 EAGAIN (Resource temporarily unavailable)
+
+// The above should block until ack comes in below.
+ +0 < . 1:31(30) ack 1 win 5840
+ +0 accept(3, ..., ...) = 4
+
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+ +0 read(4, ..., 512) = 30
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt
new file mode 100644
index 000000000000..345ed26ff7f8
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Test that TFO-enabled server would not respond SYN-ACK with any TFO option
+// when receiving a pure SYN-data. It should respond a pure SYN-ack.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6>
+ +0 > S. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 100
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+ +0 close(3) = 0
+
+// Test ECN-setup SYN with ECN disabled because this has happened in reality
+ +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < SEW 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6>
+ +0 > S. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 100
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+ +0 close(3) = 0
+
+// Test ECN-setup SYN w/ ECN enabled
+ +0 `sysctl -q net.ipv4.tcp_ecn=2`
+ +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < SEW 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6>
+ +0 > SE. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 100
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+ +0 close(3) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt
new file mode 100644
index 000000000000..98e6f84497cd
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Test TFO server with SYN that has TFO cookie and data.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+ +0 read(4, ..., 512) = 10
+ +0 write(4, ..., 100) = 100
+ +0 > P. 1:101(100) ack 11
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt
new file mode 100644
index 000000000000..95b1047ffdd5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Basic TFO server test
+//
+// Test zero-payload packet w/ valid TFO cookie - a TFO socket will
+// still be created and accepted but read() will not return until a
+// later pkt with 10 byte.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+
+// A TFO socket is created and is writable.
+ +0 write(4, ..., 100) = 100
+ +0 > P. 1:101(100) ack 1
+ +0...0.300 read(4, ..., 512) = 10
+ +.3 < P. 1:11(10) ack 1 win 5840
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt
new file mode 100644
index 000000000000..f75efd51ed0c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// A reproducer case for a TFO SYNACK RTO undo bug in:
+// 794200d66273 ("tcp: undo cwnd on Fast Open spurious SYNACK retransmit")
+// This sequence that tickles this bug is:
+// - Fast Open server receives TFO SYN with data, sends SYNACK
+// - (client receives SYNACK and sends ACK, but ACK is lost)
+// - server app sends some data packets
+// - (N of the first data packets are lost)
+// - server receives client ACK that has a TS ECR matching first SYNACK,
+// and also SACKs suggesting the first N data packets were lost
+// - server performs undo of SYNACK RTO, then immediately enters recovery
+// - buggy behavior in 794200d66273 then performed an undo that caused
+// the connection to be in a bad state, in CA_Open with retrans_out != 0
+
+// Check that outbound TS Val ticks are as we would expect with 1000 usec per
+// timestamp tick:
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:1000(1000) win 65535 <mss 1012,sackOK,TS val 1000 ecr 0,wscale 7,nop,nop,nop,FO TFO_COOKIE>
+ +0 > S. 0:0(0) ack 1001 <mss 1460,sackOK,TS val 2000 ecr 1000,nop,wscale 8>
+ +0 accept(3, ..., ...) = 4
+
+// Application writes more data
+ +.010 write(4, ..., 10000) = 10000
+ +0 > P. 1:5001(5000) ack 1001 <nop,nop,TS val 2010 ecr 1000>
+ +0 > P. 5001:10001(5000) ack 1001 <nop,nop,TS val 2010 ecr 1000>
+ +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }%
+
+ +0 < . 1001:1001(0) ack 1 win 257 <TS val 1010 ecr 2000,sack 2001:5001>
+ +0 > P. 1:2001(2000) ack 1001 <nop,nop,TS val 2010 ecr 1010>
+ +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }%
+ +0 %{ assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd }%
+
+ +0 < . 1001:1001(0) ack 1 win 257 <TS val 1011 ecr 2000,sack 2001:6001>
+ +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }%
+ +0 %{ assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt
new file mode 100644
index 000000000000..c3cb0e8bdcf8
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Test the Experimental Option
+//
+// SYN w/ FOEXP w/o cookie must generates SYN+ACK w/ FOEXP
+// w/ a valid cookie, and the cookie must be the same one
+// with one generated by IANA FO
+
+`./defaults.sh`
+
+// Request a TFO cookie by Experimental Option
+// This must generate the same TFO_COOKIE
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FOEXP>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,FOEXP TFO_COOKIE>
+
+ +0 close(3) = 0
+
+// Test if FOEXP with a valid cookie creates a TFO socket
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FOEXP TFO_COOKIE>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+ +0 read(4, ..., 512) = 10
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt
new file mode 100644
index 000000000000..dc09f8d9a381
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send a FIN pkt with the ACK bit to a TFO socket.
+// The socket will go to TCP_CLOSE_WAIT state and data can be
+// read until the socket is closed, at which time a FIN will be sent.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+// FIN is acked and the socket goes to TCP_CLOSE_WAIT state
+// in tcp_fin() called from tcp_data_queue().
+ +0 < F. 11:11(0) ack 1 win 32792
+ +0 > . 1:1(0) ack 12
+
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+ +0 %{ assert tcpi_state == TCP_CLOSE_WAIT, tcpi_state }%
+
+ +0 read(4, ..., 512) = 10
+ +0 close(4) = 0
+ +0 > F. 1:1(0) ack 12
+ * > F. 1:1(0) ack 12
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt
new file mode 100644
index 000000000000..d5543672e2bd
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send an ICMP host_unreachable pkt to a pending SYN_RECV req.
+//
+// If it's a TFO req, the ICMP error will cause it to switch
+// to TCP_CLOSE state but remains in the acceptor queue.
+
+--ip_version=ipv4
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+// Out-of-window icmp is ignored but accounted.
+ +0 `nstat > /dev/null`
+ +0 < icmp unreachable [5000:6000(1000)]
+ +0 `nstat | grep TcpExtOutOfWindowIcmps > /dev/null`
+
+// Valid ICMP unreach.
+ +0 < icmp unreachable host_unreachable [0:10(10)]
+
+// Unlike the non-TFO case, the req is still there to be accepted.
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+// tcp_done_with_error() in tcp_v4_err() sets sk->sk_state
+// to TCP_CLOSE
+ +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+// The 1st read will succeed and return the data in SYN
+ +0 read(4, ..., 512) = 10
+
+// The 2nd read will fail.
+ +0 read(4, ..., 512) = -1 EHOSTUNREACH (No route to host)
+
+// But is no longer writable because it's in TCP_CLOSE state.
+ +0 write(4, ..., 100) = -1 EPIPE (Broken Pipe)
+
+// inbound pkt will trigger RST because the socket has been moved
+// off the TCP hash tables.
+ +0 < . 1:1(0) ack 1 win 32792
+ +0 > R 1:1(0)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt
new file mode 100644
index 000000000000..040d5547ed80
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send a RST to a TFO socket after it has been accepted.
+//
+// First read() will return all the data and this is consistent
+// with the non-TFO case. Second read will return -1
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+ +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }%
+
+// 1st read will return the data from SYN.
+// tcp_reset() sets sk->sk_err to ECONNRESET for SYN_RECV.
+ +0 < R. 11:11(0) win 32792
+ +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+// This one w/o ACK bit will cause the same effect.
+// +0 < R 11:11(0) win 32792
+// See Step 2 in tcp_validate_incoming().
+
+// found_ok_skb in tcp_recvmsg_locked()
+ +0 read(4, ..., 512) = 10
+
+// !copied && sk->sk_err -> sock_error(sk)
+ +0 read(4, ..., 512) = -1 ECONNRESET (Connection reset by peer)
+ +0 close(4) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt
new file mode 100644
index 000000000000..7f9de6c66cbd
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send a RST to a TFO socket before it is accepted.
+//
+// The socket won't go away and after it's accepted the data
+// in the SYN pkt can still be read. But that's about all that
+// the acceptor can do with the socket.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+// 1st read will return the data from SYN.
+ +0 < R. 11:11(0) win 257
+
+// This one w/o ACK bit will cause the same effect.
+// +0 < R 11:11(0) win 257
+
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+ +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+ +0 read(4, ..., 512) = 10
+ +0 read(4, ..., 512) = -1 ECONNRESET (Connection reset by peer)
+ +0 close(4) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt
new file mode 100644
index 000000000000..548a87701b5d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send a RST to a TFO socket after it is accepted.
+//
+// The socket will change to TCP_CLOSE state with pending data so
+// write() will fail. Pending data can be still be read and close()
+// won't trigger RST if data is not read
+//
+// 565b7b2d2e63 ("tcp: do not send reset to already closed sockets")
+// https://lore.kernel.org/netdev/4C1A2502.1030502@openvz.org/
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop, FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+ +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }%
+
+// tcp_done() sets sk->sk_state to TCP_CLOSE and clears tp->fastopen_rsk
+ +0 < R. 11:11(0) win 32792
+ +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+ +0 write(4, ..., 100) = -1 ECONNRESET(Connection reset by peer)
+ +0 close(4) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt
new file mode 100644
index 000000000000..20090bf77655
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Send a RST to a fully established socket with pending data before
+// it is accepted.
+//
+// The socket with pending data won't go away and can still be accepted
+// with data read. But it will be in TCP_CLOSE state.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+// Invalid cookie, so accept() fails.
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO aaaaaaaaaaaaaaaa,nop,nop>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK, FO TFO_COOKIE,nop,nop>
+
+ +0 accept(3, ..., ...) = -1 EAGAIN (Resource temporarily unavailable)
+
+// Complete 3WHS and send data and RST
+ +0 < . 1:1(0) ack 1 win 32792
+ +0 < . 1:11(10) ack 1 win 32792
+ +0 < R. 11:11(0) win 32792
+
+// A valid reset won't make the fully-established socket go away.
+// It's just that the acceptor will get a dead, unusable socket
+// in TCP_CLOSE state.
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+ +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+ +0 write(4, ..., 100) = -1 ECONNRESET(Connection reset by peer)
+ +0 read(4, ..., 512) = 10
+ +0 read(4, ..., 512) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt
new file mode 100644
index 000000000000..9f52d7de3436
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Test the server cookie is generated by aes64 encoding of remote and local
+// IP addresses with a master key specified via sockopt TCP_FASTOPEN_KEY
+//
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen_key=00000000-00000000-00000000-00000000`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+
+// Set a key of a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 (big endian).
+// This would produce a cookie of TFO_COOKIE like many other
+// tests (which the same key but set via sysctl).
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY,
+ "\xa1\xa1\xa1\xa1\xb2\xb2\xb2\xb2\xc3\xc3\xc3\xc3\xd4\xd4\xd4\xd4", 16) = 0
+
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+// Request a valid cookie TFO_COOKIE
+ +0 < S 1428932:1428942(10) win 10000 <mss 1012,nop,nop,FO,sackOK,TS val 1 ecr 0,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1428933 <mss 1460,sackOK,TS val 10000 ecr 1,nop,wscale 8,FO TFO_COOKIE,nop,nop>
+ +0 < . 1:1(0) ack 1 win 257 <nop,nop,TS val 2 ecr 10000>
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+
+ +0 close(4) = 0
+ +0 > F. 1:1(0) ack 1 <nop,nop,TS val 10001 ecr 2>
+ +0 < F. 1:1(0) ack 2 win 257 <nop,nop,TS val 3 ecr 10001>
+ +0 > . 2:2(0) ack 2 <nop,nop,TS val 10002 ecr 3>
+
+ +0 close(3) = 0
+
+// Restart the listener
+ +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+// Test setting the key in the listen state, and produces an identical cookie
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY,
+ "\xa1\xa1\xa1\xa1\xb2\xb2\xb2\xb2\xc3\xc3\xc3\xc3\xd4\xd4\xd4\xd4", 16) = 0
+
+ +0 < S 6814000:6815000(1000) win 10000 <mss 1012,nop,nop,FO TFO_COOKIE,sackOK,TS val 10 ecr 0,nop,wscale 7>
+ +0 > S. 0:0(0) ack 6815001 <mss 1460,sackOK,TS val 10000 ecr 10,nop,wscale 8>
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+ +0 < . 1001:1001(0) ack 1 win 257 <nop,nop,TS val 12 ecr 10000>
+ +0 read(4, ..., 8192) = 1000
+
+ +0 close(4) = 0
+ +0 > F. 1:1(0) ack 1001 <nop,nop,TS val 10101 ecr 12>
+ +0 < F. 1001:1001(0) ack 2 win 257 <nop,nop,TS val 112 ecr 10101>
+ +0 > . 2:2(0) ack 1002 <nop,nop,TS val 10102 ecr 112>
+
+ +0 close(3) = 0
+
+// Restart the listener
+ +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+// Test invalid key length (must be 16 bytes)
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, "", 0) = -1 (Invalid Argument)
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, "", 3) = -1 (Invalid Argument)
+
+// Previous cookie won't be accepted b/c this listener uses the global key (0-0-0-0)
+ +0 < S 6814000:6815000(1000) win 10000 <mss 1012,nop,nop,FO TFO_COOKIE,sackOK,TS val 10 ecr 0,nop,wscale 7>
+ +0 > S. 0:0(0) ack 6814001 <mss 1460,sackOK,TS val 10000 ecr 10,nop,wscale 8,FO TFO_COOKIE_ZERO,nop,nop>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt
new file mode 100644
index 000000000000..e82e06da44c9
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Close a listener socket with pending TFO child.
+// This will trigger RST pkt to go out.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+// RST pkt is generated for each not-yet-accepted TFO child.
+// inet_csk_listen_stop() -> inet_child_forget() -> tcp_disconnect()
+// -> tcp_need_reset() is true for SYN_RECV
+ +0 close(3) = 0
+ +0 > R. 1:1(0) ack 11
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt
new file mode 100644
index 000000000000..2a148bb14cbf
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,nop,nop,sackOK,nop,nop,FO TFO_COOKIE>
+ +0 > S. 0:0(0) ack 11 win 65535 <mss 1460,nop,nop,sackOK>
+
+// sk->sk_state is TCP_SYN_RECV
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }%
+
+// tcp_disconnect() sets sk->sk_state to TCP_CLOSE
+ +0 connect(4, AF_UNSPEC, ...) = 0
+ +0 > R. 1:1(0) ack 11 win 65535
+ +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }%
+
+// connect() sets sk->sk_state to TCP_SYN_SENT
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation is now in progress)
+ +0 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 %{ assert tcpi_state == TCP_SYN_SENT, tcpi_state }%
+
+// tp->fastopen_rsk must be NULL
+ +1 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt
new file mode 100644
index 000000000000..09fb63f78a0e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Close a TFO socket with unread data.
+// This will trigger a RST pkt.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+ +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }%
+
+// data_was_unread == true in __tcp_close()
+ +0 close(4) = 0
+ +0 > R. 1:1(0) ack 11
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
index df49c67645ac..e13f0eee9795 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt
@@ -1,5 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Test TCP_INQ and TCP_CM_INQ on the client side.
+
+--mss=1000
+
`./defaults.sh
`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
index 04a5e2590c62..14dd5f813d50 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt
@@ -1,5 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Test TCP_INQ and TCP_CM_INQ on the server side.
+
+--mss=1000
+
`./defaults.sh
`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt
new file mode 100644
index 000000000000..96b01eb5b7a4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test RFC 3042 "Limited Transmit": "sending a new data segment in
+// response to each of the first two duplicate acknowledgments that
+// arrive at the sender".
+// This variation tests a receiver that doesn't support SACK.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Write some data, and send the initial congestion window.
+ +0 write(4, ..., 15000) = 15000
+ +0 > P. 1:10001(10000) ack 1
+
+// Limited transmit: on first dupack, send a new data segment.
+ +.11 < . 1:1(0) ack 1 win 320
+ +0 > . 10001:11001(1000) ack 1
+
+// Limited transmit: on second dupack, send a new data segment.
+ +.01 < . 1:1(0) ack 1 win 320
+ +0 > . 11001:12001(1000) ack 1
+
+// It turned out to be reordering, not loss.
+// We have one packet newly acked (1001:3001 were DUP-ACK'd)
+// So we revert state back to Open. Slow start cwnd from 10 to 11
+// and send 11 - 9 = 2 packets
+ +.01 < . 1:1(0) ack 3001 win 320
+ +0 > P. 12001:14001(2000) ack 1
+
+ +.02 < . 1:1(0) ack 5001 win 320
+ +0 > P. 14001:15001(1000) ack 1
+
+// Client gradually ACKs all data.
+ +.02 < . 1:1(0) ack 7001 win 320
+ +.02 < . 1:1(0) ack 9001 win 320
+ +.02 < . 1:1(0) ack 11001 win 320
+ +.02 < . 1:1(0) ack 13001 win 320
+ +.02 < . 1:1(0) ack 15001 win 320
+
+// Clean up.
+ +.17 close(4) = 0
+ +0 > F. 15001:15001(0) ack 1
+ +.1 < F. 1:1(0) ack 15002 win 257
+ +0 > . 15002:15002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt
new file mode 100644
index 000000000000..642da51ec3a4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test RFC 3042 "Limited Transmit": "sending a new data segment in
+// response to each of the first two duplicate acknowledgments that
+// arrive at the sender".
+// This variation tests a receiver that supports SACK.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Write some data, and send the initial congestion window.
+ +0 write(4, ..., 15000) = 15000
+ +0 > P. 1:10001(10000) ack 1
+
+// Limited transmit: on first dupack, send a new data segment.
+ +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop>
+ +0 > . 10001:11001(1000) ack 1
+
+// Limited transmit: on second dupack, send a new data segment.
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop>
+ +0 > . 11001:12001(1000) ack 1
+
+// It turned out to be reordering, not loss.
+ +.01 < . 1:1(0) ack 3001 win 320
+ +0 > P. 12001:14001(2000) ack 1
+
+ +.02 < . 1:1(0) ack 5001 win 320
+ +0 > P. 14001:15001(1000) ack 1
+
+// Client gradually ACKs all data.
+ +.02 < . 1:1(0) ack 7001 win 320
+ +.02 < . 1:1(0) ack 9001 win 320
+ +.02 < . 1:1(0) ack 11001 win 320
+ +.02 < . 1:1(0) ack 13001 win 320
+ +.02 < . 1:1(0) ack 15001 win 320
+
+// Clean up.
+ +.17 close(4) = 0
+ +0 > F. 15001:15001(0) ack 1
+ +.1 < F. 1:1(0) ack 15002 win 257
+ +0 > . 15002:15002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt
new file mode 100644
index 000000000000..7adae7a9ef4a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// This is a test inspired by an Android client app using SSL. This
+// test verifies using TCP_NODELAY would save application latency
+// (Perhaps even better with TCP_NAGLE).
+//
+`./defaults.sh
+ethtool -K tun0 tso off gso off
+./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+
+ +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < S. 0:0(0) ack 1 win 5792 <mss 974,nop,nop,sackOK,nop,wscale 7>
+ +0 > . 1:1(0) ack 1
+
+// SSL handshake (resumed session)
+ +0 write(4, ..., 517) = 517
+ +0 > P. 1:518(517) ack 1
+ +.1 < . 1:1(0) ack 518 win 229
+
+ +0 < P. 1:144(143) ack 1 win 229
+ +0 > . 518:518(0) ack 144
+ +0 read(4, ..., 1000) = 143
+
+// Application POST header (51B) and body (2002B)
+ +0 write(4, ..., 51) = 51
+ +0 > P. 518:569(51) ack 144
+ +.03 write(4, ..., 2002) = 2002
+ +0 > . 569:1543(974) ack 144
+ +0 > P. 1543:2517(974) ack 144
+// Without disabling Nagle, this packet will not happen until the remote ACK.
+ +0 > P. 2517:2571(54) ack 144
+
+ +.1 < . 1:1(0) ack 2571 win 229
+
+// Reset sysctls
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt
new file mode 100644
index 000000000000..fa9c01813996
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test the MSG_MORE flag will correctly corks the tiny writes
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+// Disable Nagle by default on this socket.
+ +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+
+// Test the basic case: MSG_MORE overwrites TCP_NODELAY and enables Nagle.
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 40}], msg_flags=0}, MSG_MORE) = 40
+ +.21~+.215 > P. 1:41(40) ack 1
+ +.01 < . 1:1(0) ack 41 win 257
+
+// Test unsetting MSG_MORE releases the packet
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 100}], msg_flags=0}, MSG_MORE) = 100
++.005 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 160}], msg_flags=0}, MSG_MORE) = 160
+ +.01 sendmsg(4, {msg_name(...)=...,
+ msg_iov(3)=[{..., 100}, {..., 200}, {..., 195}],
+ msg_flags=0}, MSG_MORE) = 495
++.008 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 5}], msg_flags=0}, 0) = 5
+ +0 > P. 41:801(760) ack 1
+ +.02 < . 1:1(0) ack 801 win 257
+
+
+// Test >MSS write will unleash MSS packets but hold on the remaining data.
+ +.1 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 3100}], msg_flags=0}, MSG_MORE) = 3100
+ +0 > . 801:3801(3000) ack 1
++.003 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 50}], msg_flags=0}, MSG_MORE) = 50
+
+ +.01 < . 1:1(0) ack 2801 win 257
+// Err... we relase the remaining right after the ACK? note that PUSH is reset
+ +0 > . 3801:3951(150) ack 1
+
+// Test we'll hold on the subsequent writes when inflight (3801:3951) > 0
++.001 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 1}], msg_flags=0}, MSG_MORE) = 1
++.002 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 2}], msg_flags=0}, MSG_MORE) = 2
++.003 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 3}], msg_flags=0}, MSG_MORE) = 3
++.004 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 4}], msg_flags=0}, MSG_MORE) = 4
+ +.02 < . 1:1(0) ack 3951 win 257
+ +0 > . 3951:3961(10) ack 1
+ +.02 < . 1:1(0) ack 3961 win 257
+
+
+// Test the case a MSG_MORE send followed by a write flushes the data
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 20}], msg_flags=0}, MSG_MORE) = 20
+ +.05 write(4, ..., 20) = 20
+ +0 > P. 3961:4001(40) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt
new file mode 100644
index 000000000000..0ddec5f7dc1a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP_CORK and TCP_NODELAY sockopt behavior
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+// Set TCP_CORK sockopt to hold small packets
+ +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0
+
+ +0 write(4, ..., 40) = 40
+ +.05 write(4, ..., 40) = 40
+
+// Unset TCP_CORK should push pending bytes out
+ +.01 setsockopt(4, SOL_TCP, TCP_CORK, [0], 4) = 0
+ +0 > P. 1:81(80) ack 1
+ +.01 < . 1:1(0) ack 81 win 257
+
+// Set TCP_CORK sockopt to hold small packets
+ +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0
+
+ +0 write(4, ..., 40) = 40
+ +.05 write(4, ..., 40) = 40
+
+// Set TCP_NODELAY sockopt should push pending bytes out
+ +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+ +0 > P. 81:161(80) ack 1
+ +.01 < . 1:1(0) ack 161 win 257
+
+// Set MSG_MORE to hold small packets
+ +0 send(4, ..., 40, MSG_MORE) = 40
+ +.05 send(4, ..., 40, MSG_MORE) = 40
+
+// Set TCP_NODELAY sockopt should push pending bytes out
+ +.01 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+ +0 > . 161:241(80) ack 1
+ +.01 < . 1:1(0) ack 241 win 257
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt
new file mode 100644
index 000000000000..09aabc775e80
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"`
+
+// Test that a not-yet-accepted socket does not change
+// its initial sk_rcvbuf (tcp_rmem[1]) when receiving ooo packets.
+
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 < . 2001:41001(39000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001>
+ +0 < . 41001:101001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001>
+ +0 < . 1:1001(1000) ack 1 win 257
+ +0 > . 1:1(0) ack 1001 <nop,nop,sack 2001:101001>
+ +0 < . 1001:2001(1000) ack 1 win 257
+ +0 > . 1:1(0) ack 101001
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 %{ assert SK_MEMINFO_RCVBUF == 131072, SK_MEMINFO_RCVBUF }%
+
+ +0 close(4) = 0
+ +0 close(3) = 0
+
+// Test that ooo packets for accepted sockets do increase sk_rcvbuf
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < . 2001:41001(39000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001>
+ +0 < . 41001:101001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001>
+
+ +0 %{ assert SK_MEMINFO_RCVBUF > 131072, SK_MEMINFO_RCVBUF }%
+
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
new file mode 100644
index 000000000000..7e6bc5fb0c8d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh
+sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"`
+
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < . 2001:11001(9000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:11001>
+
+// check that ooo packet properly updates tcpi_rcv_mss
+ +0 %{ assert tcpi_rcv_mss == 1000, tcpi_rcv_mss }%
+
+ +0 < . 11001:21001(10000) ack 1 win 257
+ +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:21001>
+
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
new file mode 100644
index 000000000000..3848b419e68c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+ 0 `nstat -n`
+
+// Establish a connection.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < P. 1:4001(4000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +0 < P. 4001:54001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +1 < P. 5001:55001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW
+ +0 < P. 70001:80001(10000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001 win 5000
+
+ +0 read(4, ..., 100000) = 4000
+
+// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd
+ +0 < P. 4001:54001(50000) ack 1 win 257
+ +0 > . 1:1(0) ack 54001 win 0
+
+// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times.
++0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
new file mode 100644
index 000000000000..f575c0ff89da
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+--mss=1000
+
+`./defaults.sh`
+
+ 0 `nstat -n`
+
+// Establish a connection.
+ +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 < P. 1:20001(20000) ack 1 win 257
+ +.04 > . 1:1(0) ack 20001 win 18000
+
+ +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0
+ +0 < P. 20001:80001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 20001 win 18000
+
+ +0 read(4, ..., 20000) = 20000
+// A too big packet is accepted if the receive queue is empty
+ +0 < P. 20001:80001(60000) ack 1 win 257
+ +0 > . 1:1(0) ack 80001 win 0
+
diff --git a/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt
new file mode 100644
index 000000000000..47550df124ce
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// Test SYN+ACK RTX with 1s RTO.
+//
+`./defaults.sh
+ ./set_sysctls.py /proc/sys/net/ipv4/tcp_rto_max_ms=1000`
+
+//
+// Test 1: TFO SYN+ACK
+//
+ 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0
+
+ +0 < S 0:10(10) win 1000 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop>
+ +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+// RTO must be capped to 1s
+ +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+ +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+ +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK>
+
+ +0 < . 11:11(0) ack 1 win 1000 <mss 1460,nop,nop,sackOK>
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }%
+
+ +0 close(4) = 0
+ +0 close(3) = 0
+
+
+//
+// Test 2: non-TFO SYN+ACK
+//
+ +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 1000 <mss 1460,sackOK,nop,nop>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+
+// RTO must be capped to 1s
+ +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+
+ +0 < . 1:1(0) ack 1 win 1000 <mss 1460,nop,nop,sackOK>
+ +0 accept(3, ..., ...) = 4
+ +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }%
+
+ +0 close(4) = 0
+ +0 close(3) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt
new file mode 100644
index 000000000000..310ef31518da
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+// Verify that setsockopt calls that force a route refresh do not
+// cause problems matching SACKs with packets in the write queue.
+// This variant tests IP_TOS.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_IP, IP_MTU_DISCOVER, [IP_PMTUDISC_DONT], 1) = 0
+ +0...0.010 connect(3, ..., ...) = 0
+
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 65535 <mss 1460,nop,wscale 2,nop,nop,sackOK>
+ +0 > . 1:1(0) ack 1
+
+ +.01 write(3, ..., 5840) = 5840
+ +0 > P. 1:5841(5840) ack 1
+ +.01 < . 1:1(0) ack 5841 win 65535
+
+ +.01 write(3, ..., 5840) = 5840
+ +0 > P. 5841:11681(5840) ack 1
+ +.01 < . 1:1(0) ack 11681 win 65535
+
+ +.01 write(3, ..., 14600) = 14600
+ +0 > P. 11681:26281(14600) ack 1
+
+// Try the socket option that we know can force a route refresh.
+ +0 setsockopt(3, SOL_IP, IP_TOS, [4], 1) = 0
+// Then revert to avoid routing/mangling/etc implications of that setting.
+ +0 setsockopt(3, SOL_IP, IP_TOS, [0], 1) = 0
+
+// Verify that we do not retransmit the SACKed segments.
+ +.01 < . 1:1(0) ack 13141 win 65535 <sack 16061:17521 20441:26281,nop,nop>
+ +0 > . 13141:16061(2920) ack 1
+ +0 > P. 17521:20441(2920) ack 1
+ +.01 < . 1:1(0) ack 26281 win 65535
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt
new file mode 100644
index 000000000000..f185e1ac57ea
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb.
+// This variant tests non-FACK SACK with SACKs coming in the order
+// 2 6 8 3 9, to test what happens when we get a new SACKed range
+// (for packet 3) that is on the right of an existing SACKed range
+// (for packet 2).
+
+`./defaults.sh`
+
+// Establish a connection and send 10 MSS.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 1024
+ +0 accept(3, ..., ...) = 4
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+ +.1 < . 1:1(0) ack 1 win 257 <sack 2001:3001,nop,nop>
++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001,nop,nop>
++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001 8001:9001,nop,nop>
+
+// 3 SACKed packets, so we enter Fast Recovery.
+ +0 > . 1:1001(1000) ack 1
+ +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }%
+ +0 %{ assert tcpi_lost == 6, tcpi_lost }%
+
+// SACK for 3001:4001.
+// This SACK for an adjacent range causes the sender to
+// shift the newly-SACKed range onto the previous skb.
++.007 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:9001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
+ +0 %{ assert tcpi_lost == 5, tcpi_lost }%
+ +0 %{ assert tcpi_reordering == 6, tcpi_reordering }% // 8001:9001 -> 3001:4001 is 6
+
+// SACK for 9001:10001.
+ +.01 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop>
+ +0 %{ assert tcpi_lost == 5, tcpi_lost }%
+
+// ACK for 1:1001 as packets from t=0.303 arrive.
++.083 < . 1:1(0) ack 1001 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop>
+ +0 %{ assert tcpi_lost == 4,tcpi_lost }%
+
+// ACK for 1:4001 as packets from t=0.310 arrive.
++.017 < . 1:1(0) ack 4001 win 257 <sack 6001:7001 8001:10001,nop,nop>
+ +0 %{ assert tcpi_lost == 3,tcpi_lost }%
+
+// ACK for 1:7001 as packets from t=0.320 arrive.
+ +.01 < . 1:1(0) ack 7001 win 257 <sack 8001:10001,nop,nop>
+
+// ACK for all data as packets from t=0.403 arrive.
+ +.1 < . 1:1(0) ack 10001 win 257
+ +0 %{
+assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state
+assert tcpi_unacked == 0, tcpi_unacked
+assert tcpi_sacked == 0, tcpi_sacked
+assert tcpi_lost == 0, tcpi_lost
+assert tcpi_retrans == 0, tcpi_retrans
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt
new file mode 100644
index 000000000000..0093b4973934
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb.
+// This variant tests the case where we mark packets 0-4 lost, then
+// get a SACK for 3, and then a SACK for 4.
+
+`./defaults.sh`
+
+// Establish a connection and send 10 MSS.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 1024
+ +0 accept(3, ..., ...) = 4
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+// SACK for 7001:8001. Using RACK we delay the fast retransmit.
+ +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop>
+// RACK reordering timer
++.027 > . 1:1001(1000) ack 1
+ +0 %{
+assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state
+assert tcpi_lost == 7, tcpi_lost # RACK thinks 1:7001 are lost
+assert tcpi_reordering == 3, tcpi_reordering
+}%
+
+// SACK for 3001:4001.
++.002 < . 1:1(0) ack 1 win 257 <sack 3001:4001 7001:8001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
+ +0 %{
+assert tcpi_lost == 6, tcpi_lost # since 3001:4001 is no longer lost
+assert tcpi_reordering == 5, tcpi_reordering # 7001:8001 -> 3001:4001
+}%
+
+// SACK for 4001:5001.
+// This SACK for an adjacent range causes the sender to
+// shift the newly-SACKed range onto the previous skb.
+// It uses the RFC3517 algorithm to mark 1:3001 lost
+// because >=3 higher-sequence packets are SACKed.
++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:8001,nop,nop>
+ +0 > . 2001:3001(1000) ack 1
+ +0 %{
+assert tcpi_lost == 5,tcpi_lost # SACK/RFC3517 thinks 1:3001 are lost
+}%
+
+// SACK for 8001:9001.
++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:9001,nop,nop>
+
+// SACK for 9001:10001.
++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:10001,nop,nop>
+ +0 > . 5001:6001(1000) ack 1
+
+// To simplify clean-up, say we get an ACK for all data.
+ +.1 < . 1:1(0) ack 10001 win 257
+ +0 %{
+assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state
+assert tcpi_unacked == 0, tcpi_unacked
+assert tcpi_sacked == 0, tcpi_sacked
+assert tcpi_lost == 0, tcpi_lost
+assert tcpi_retrans == 0, tcpi_retrans
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt
new file mode 100644
index 000000000000..980a832dc81c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb.
+// This variant tests the case where we mark packets 0-4 lost, then
+// get a SACK for 5, and then a SACK for 6.
+
+`./defaults.sh`
+
+// Establish a connection and send 10 MSS.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 1024
+ +0 accept(3, ..., ...) = 4
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+// SACK for 7001:8001. Using RACK we delay a fast retransmit.
+ +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop>
++.027 > . 1:1001(1000) ack 1
+ +0 %{
+assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state
+assert tcpi_lost == 7,tcpi_lost # RACK thinks 1:7001 are lost
+assert tcpi_reordering == 3, tcpi_reordering
+}%
+
+// SACK for 5001:6001.
+ +0 < . 1:1(0) ack 1 win 257 <sack 5001:6001 7001:8001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
+ +0 %{
+assert tcpi_lost == 6, tcpi_lost
+assert tcpi_reordering == 3, tcpi_reordering # 7001:8001 -> 5001:6001 is 3
+}%
+
+// SACK for 6001:7001.
+// This SACK for an adjacent range causes the sender to
+// shift the newly-SACKed range onto the previous skb.
+ +0 < . 1:1(0) ack 1 win 257 <sack 5001:8001,nop,nop>
+ +0 > . 2001:3001(1000) ack 1
+ +0 %{ assert tcpi_lost == 5, tcpi_lost }%
+
+// SACK for 8001:9001.
+ +0 < . 1:1(0) ack 1 win 257 <sack 5001:9001,nop,nop>
+ +0 > . 3001:4001(1000) ack 1
+
+// SACK for 9001:10001.
+ +0 < . 1:1(0) ack 1 win 257 <sack 5001:10001,nop,nop>
+ +0 > . 4001:5001(1000) ack 1
+
+// To simplify clean-up, say we get an ACK for all data.
+ +.1 < . 1:1(0) ack 10001 win 257
+ +0 %{
+assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state
+assert tcpi_unacked == 0, tcpi_unacked
+assert tcpi_sacked == 0, tcpi_sacked
+assert tcpi_lost == 0, tcpi_lost
+assert tcpi_retrans == 0, tcpi_retrans
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt
new file mode 100644
index 000000000000..6740859a1360
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+// Simplest possible test of open() and then sendfile().
+// We write some zeroes into a file (since packetdrill expects payloads
+// to be all zeroes) and then open() the file, then use sendfile()
+// and verify that the correct number of zeroes goes out.
+
+`./defaults.sh
+/bin/rm -f /tmp/testfile
+/bin/dd bs=1 count=5 if=/dev/zero of=/tmp/testfile status=none
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 open("/tmp/testfile", O_RDONLY) = 5
+ +0 sendfile(4, 5, [0], 5) = 5
+ +0 > P. 1:6(5) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt
new file mode 100644
index 000000000000..0cbd43253236
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+`./defaults.sh`
+
+// Initialize a server socket
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_IP, IP_FREEBIND, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+// Connection should get accepted
+ +0 < S 0:0(0) win 32972 <mss 1460,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <...>
+ +0 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+
+ +0 pipe([5, 6]) = 0
+ +0 < U. 1:101(100) ack 1 win 257 urg 100
+ +0 splice(4, NULL, 6, NULL, 99, 0) = 99
+ +0 splice(4, NULL, 6, NULL, 1, 0) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt
new file mode 100644
index 000000000000..8940726a3ec2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP fastopen behavior with NULL as buffer pointer, but a non-zero
+// buffer length.
+`./defaults.sh
+./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0`
+
+// Cache warmup: send a Fast Open cookie request
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
++0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation is now in progress)
++0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO,nop,nop>
++0 < S. 123:123(0) ack 1 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6,FO abcd1234,nop,nop>
++0 > . 1:1(0) ack 1
++0 close(3) = 0
++0 > F. 1:1(0) ack 1
++0 < F. 1:1(0) ack 2 win 92
++0 > . 2:2(0) ack 2
+
+// Test with MSG_FASTOPEN without TCP_FASTOPEN_CONNECT.
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
++0 sendto(4, NULL, 1, MSG_FASTOPEN, ..., ...) = -1
++0 close(4) = 0
+
+// Test with TCP_FASTOPEN_CONNECT without MSG_FASTOPEN.
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5
++0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0
++0 setsockopt(5, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
++0 connect(5, ..., ...) = 0
++0 sendto(5, NULL, 1, 0, ..., ...) = -1
++0 close(5) = 0
+
+// Test with both TCP_FASTOPEN_CONNECT and MSG_FASTOPEN.
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 6
++0 fcntl(6, F_SETFL, O_RDWR|O_NONBLOCK) = 0
++0 setsockopt(6, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
++0 connect(6, ..., ...) = 0
++0 sendto(6, NULL, 1, MSG_FASTOPEN, ..., ...) = -1
++0 close(6) = 0
+
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
new file mode 100644
index 000000000000..454441e7ecff
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that we correctly skip zero-length IOVs.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(4)=[{..., 0}, {..., 40}, {..., 0}, {..., 20}],
+ msg_flags=0}, 0) = 60
+ +0 > P. 1:61(60) ack 1
+ +.01 < . 1:1(0) ack 61 win 257
+
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(4)=[{..., 0}, {..., 0}, {..., 0}, {..., 0}],
+ msg_flags=0}, MSG_ZEROCOPY) = 0
+
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(4)=[{..., 0}, {..., 10}, {..., 0}, {..., 50}],
+ msg_flags=0}, MSG_ZEROCOPY) = 60
+ +0 > P. 61:121(60) ack 1
+ +.01 < . 1:1(0) ack 121 win 257
diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt
new file mode 100644
index 000000000000..59f5903f285c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test kernel behavior with NULL as buffer pointer
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.2 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 write(4, NULL, 1000) = -1 EFAULT (Bad address)
+ +0 send(4, NULL, 1000, 0) = -1 EFAULT (Bad address)
+ +0 sendto(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address)
+
+ +0 < . 1:1001(1000) ack 1 win 200
+ +0 read(4, NULL, 1000) = -1 EFAULT (Bad address)
+ +0 recv(4, NULL, 1000, 0) = -1 EFAULT (Bad address)
+ +0 recvfrom(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt
new file mode 100644
index 000000000000..d7fdb43a8e89
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test tcpi_last_data_recv for active session
+`./defaults.sh`
+
+// Create a socket and set it to non-blocking.
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
++0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.030 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8>
++0 > . 1:1(0) ack 1
+
++1 %{ assert 990 <= tcpi_last_data_recv <= 1010, tcpi_last_data_recv }%
+
++0 < . 1:1001(1000) ack 1 win 300
++0 > . 1:1(0) ack 1001
+
++0 %{ assert tcpi_last_data_recv <= 10, tcpi_last_data_recv }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt
new file mode 100644
index 000000000000..a9bcd46f6cb6
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test rwnd limited time in tcp_info for client side.
+
+`./defaults.sh`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+
+// Server advertises 0 receive window.
+ +.01 < S. 0:0(0) ack 1 win 0 <mss 1000,nop,nop,sackOK>
+
+ +0 > . 1:1(0) ack 1
+ +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+ +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking
+
+// Make sure that initial rwnd limited time is 0.
+ +0 %{ assert tcpi_rwnd_limited == 0, tcpi_rwnd_limited }%
+
+// Receive window limited time starts here.
+ +0 write(3, ..., 1000) = 1000
+
+// Check that rwnd limited time in tcp_info is around 0.1s.
+ +.1 %{ assert 98000 <= tcpi_rwnd_limited <= 110000, tcpi_rwnd_limited }%
+
+// Server opens the receive window.
+ +.1 < . 1:1(0) ack 1 win 2000
+
+// Check that rwnd limited time in tcp_info is around 0.2s.
+ +0 %{ assert 198000 <= tcpi_rwnd_limited <= 210000, tcpi_rwnd_limited }%
+
+ +0 > P. 1:1001(1000) ack 1
+
+// Server advertises a very small receive window.
+ +.03 < . 1:1(0) ack 1001 win 10
+
+// Receive window limited time starts again.
+ +0 write(3, ..., 1000) = 1000
+
+// Server opens the receive window again.
+ +.1 < . 1:1(0) ack 1001 win 2000
+// Check that rwnd limited time in tcp_info is around 0.3s
+// and busy time is 0.3 + 0.03 (server opened small window temporarily).
+ +0 %{ assert 298000 <= tcpi_rwnd_limited <= 310000, tcpi_rwnd_limited;\
+ assert 328000 <= tcpi_busy_time <= 340000, tcpi_busy_time;\
+}%
+
+ +0 > P. 1001:2001(1000) ack 1
+ +.02 < . 1:1(0) ack 2001 win 2000
+ +0 %{ assert 348000 <= tcpi_busy_time <= 360000, tcpi_busy_time }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt
new file mode 100644
index 000000000000..f0de2acd0f8e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test send-buffer-limited time in tcp_info for client side.
+`./defaults.sh`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8>
+ +0 > . 1:1(0) ack 1
+ +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+ +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking
+ +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [10000], 4) = 0
+ +0 getsockopt(3, SOL_SOCKET, SO_SNDBUF, [20000], [4]) = 0
+
+ +.09...0.14 write(3, ..., 150000) = 150000
+
+ +.01 < . 1:1(0) ack 10001 win 10000
+
+ +.01 < . 1:1(0) ack 30001 win 10000
+
+// cwnd goes from 40(60KB) to 80(120KB), and that we hit the tiny sndbuf limit 10KB
+ +.01 < . 1:1(0) ack 70001 win 10000
+
+ +.02 < . 1:1(0) ack 95001 win 10000
+ +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited; \
+ assert 49000 <= tcpi_busy_time <= 52000, tcpi_busy_time; \
+ assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }%
+
+// This ack frees up enough buffer so we are no longer
+// buffer limited (socket flag SOCK_NOSPACE is cleared)
+ +.02 < . 1:1(0) ack 150001 win 10000
+ +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited;\
+ assert 69000 <= tcpi_busy_time <= 73000, tcpi_busy_time;\
+ assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt
new file mode 100644
index 000000000000..2087ec0c746a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that tx timestamping sends timestamps only for
+// the last byte of each sendmsg.
+`./defaults.sh
+`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+// Establish connection and verify that there was no error.
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 20000 <mss 1000,nop,nop,sackOK>
+ +0 > . 1:1(0) ack 1
+ +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+ +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking
+
+ +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING,
+ [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_ID], 4) = 0
+
+ +0 write(3, ..., 11000) = 11000
+ +0 > P. 1:10001(10000) ack 1
+ +.01 < . 1:1(0) ack 10001 win 4000
+ +0 > P. 10001:11001(1000) ack 1
+ +.01 < . 1:1(0) ack 11001 win 4000
+
+// Make sure that internal TCP timestamps are not overwritten and we have sane
+// RTT measurement.
+ +0 %{
+assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt
+}%
+
+// SCM_TSTAMP_SCHED for the last byte should be received almost immediately
+// once 10001 is acked at t=20ms.
+// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...)
+// is called after when SYN is acked. So, we expect the last byte of the first
+// chunk to have a timestamp key of 10999 (i.e., 11000 - 1).
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=20000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SCHED,
+ ee_data=10999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SND for the last byte should be received almost immediately
+// once 10001 is acked at t=20ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=20000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SND,
+ ee_data=10999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_ACK for the last byte should be received at t=30ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=30000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_ACK,
+ ee_data=10999}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt
new file mode 100644
index 000000000000..876024a31110
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test tx timestamping for partial writes (IPv4).
+`./defaults.sh
+`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+// Establish connection and verify that there was no error.
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 2000 <mss 1000,sackOK,TS val 700 ecr 100,nop,wscale 7>
+ +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700>
+ +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+
+ +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [1000], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING,
+ [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_ID], 4) = 0
+
+// We have a partial write.
+ +0 write(3, ..., 10000) = 2964
+ +0 > . 1:989(988) ack 1 <nop,nop,TS val 110 ecr 700>
+ +0 > P. 989:1977(988) ack 1 <nop,nop,TS val 110 ecr 700>
+ +.01 < . 1:1(0) ack 1977 win 92 <nop,nop,TS val 800 ecr 200>
+ +0 > P. 1977:2965(988) ack 1 <nop,nop,TS val 114 ecr 800>
+ +.01 < . 1:1(0) ack 2965 win 92 <nop,nop,TS val 800 ecr 200>
+
+// Make sure that internal TCP timestamps are not overwritten and we have sane
+// RTT measurement.
+ +0 %{
+assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt
+}%
+
+// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately
+// after the first ack at t=20ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=20000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SCHED,
+ ee_data=2963}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SND for the first chunk should be received almost immediately
+// after the first ack at t=20ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=20000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SND,
+ ee_data=2963}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_ACK for the first chunk should be received after the last ack at
+// t=30ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=30000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_ACK,
+ ee_data=2963}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt
new file mode 100644
index 000000000000..84d94780e6be
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test tx timestamping for server-side (IPv4).
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_TIMESTAMPING,
+ [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_ID], 4) = 0
+
+// Write two 2KB chunks.
+// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...)
+// is called after when SYN is acked. So, we expect the last byte of the first
+// and the second chunks to have timestamp keys of 1999 (i.e., 2000 - 1) and
+// 3999 (i.e., 4000 - 1) respectively.
+ +0 write(4, ..., 2000) = 2000
+ +0 write(4, ..., 2000) = 2000
+ +0 > P. 1:2001(2000) ack 1
+ +0 > P. 2001:4001(2000) ack 1
+ +.01 < . 1:1(0) ack 2001 win 514
+ +.01 < . 1:1(0) ack 4001 win 514
+
+// Make sure that internal TCP timestamps are not overwritten and we have sane
+// RTT measurement.
+ +0 %{
+assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt
+}%
+
+// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately
+// after write at t=10ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=10000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SCHED,
+ ee_data=1999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SND for the first chunk should be received almost immediately
+// after write at t=10ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=10000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SND,
+ ee_data=1999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SCHED for the second chunk should be received almost immediately
+// after that at t=10ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=10000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SCHED,
+ ee_data=3999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SND for the second chunk should be received almost immediately
+// after that at t=10ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=10000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SND,
+ ee_data=3999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_ACK for the first chunk should be received at t=20ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=20000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_ACK,
+ ee_data=1999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_ACK for the second chunk should be received at t=30ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=30000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_ACK,
+ ee_data=3999}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt
new file mode 100644
index 000000000000..e61424a7bd0a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that we send FIN packet with correct TSval
+--tcp_ts_tick_usecs=1000
+--tolerance_usecs=7000
+
+`./defaults.sh`
+
+// Create a socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+// Establish a connection.
+ +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0>
+ +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100>
+ +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100>
+ +0 accept(3, ..., ...) = 4
+
+ +1 close(4) = 0
+// Check that FIN TSval is updated properly, one second has passed since last sent packet.
+ +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1200 ecr 200>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt
new file mode 100644
index 000000000000..174ce9a1bfc0
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that we reject TS val updates on a packet with invalid ACK sequence
+
+`./defaults.sh
+`
+
+// Create a socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+// Establish a connection.
+ +.1 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0>
+ +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100>
+ +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100>
+ +0 accept(3, ..., ...) = 4
+
+// bad packet with high tsval (its ACK sequence is above our sndnxt)
+ +0 < F. 1:1(0) ack 9999 win 20000 <nop,nop,TS val 200000 ecr 100>
+
+
+ +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100>
+ +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt
new file mode 100644
index 000000000000..2e3b3bb7493a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that we send RST packet with correct TSval
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh`
+
+// Create a socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+// Establish a connection.
+ +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0>
+ +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100>
+ +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100>
+ +0 accept(3, ..., ...) = 4
+
+ +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100>
+ +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201>
+
+ +1 close(4) = 0
+// Check that RST TSval is updated properly, one second has passed since last sent packet.
+ +0 > R. 1:1(0) ack 1001 <nop,nop,TS val 1200 ecr 201>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
new file mode 100644
index 000000000000..6882b8240a8a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+
+ +0 < S 0:0(0) win 0 <mss 1460>
+ +0 > S. 0:0(0) ack 1 <mss 1460>
+
+ +.1 < . 1:1(0) ack 1 win 65530
+ +0 accept(3, ..., ...) = 4
+
+ +0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0
+ +0 write(4, ..., 24) = 24
+ +0 > P. 1:25(24) ack 1
+ +.1 < . 1:1(0) ack 25 win 65530
+ +0 %{ assert tcpi_probes == 0, tcpi_probes; \
+ assert tcpi_backoff == 0, tcpi_backoff }%
+
+// install a qdisc dropping all packets
+ +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0`
+
+ +0 write(4, ..., 24) = 24
+ // When qdisc is congested we retry every 500ms
+ // (TCP_RESOURCE_PROBE_INTERVAL) and therefore
+ // we retry 6 times before hitting 3s timeout.
+ // First verify that the connection is alive:
++3 write(4, ..., 24) = 24
+
+ // Now verify that shortly after that the socket is dead:
++1 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out)
+
+ +0 %{ assert tcpi_probes == 6, tcpi_probes; \
+ assert tcpi_backoff == 0, tcpi_backoff }%
+ +0 close(4) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt
new file mode 100644
index 000000000000..2efe02bfba9c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+`./defaults.sh`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +.1 < . 1:1(0) ack 1 win 32792
+
+
+ +0 accept(3, ..., ...) = 4
+
+// Okay, we received nothing, and decide to close this idle socket.
+// We set TCP_USER_TIMEOUT to 3 seconds because really it is not worth
+// trying hard to cleanly close this flow, at the price of keeping
+// a TCP structure in kernel for about 1 minute !
+ +2 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0
+ +0 close(4) = 0
+
+ +0 > F. 1:1(0) ack 1
+ +.3~+.400 > F. 1:1(0) ack 1
+ +.3~+.400 > F. 1:1(0) ack 1
+ +.6~+.800 > F. 1:1(0) ack 1
+
+// We finally receive something from the peer, but it is way too late
+// Our socket vanished because TCP_USER_TIMEOUT was really small
+ +0 < . 1:2(1) ack 1 win 32792
+ +0 > R 1:1(0)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt
new file mode 100644
index 000000000000..8bd60226ccfc
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Verify that established connections drop a segment without the ACK flag set.
+
+`./defaults.sh`
+
+// Create a socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+// Establish a connection.
+ +0 < S 0:0(0) win 20000 <mss 1000,sackOK,nop,nop>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +.01 < . 1:1(0) ack 1 win 20000
+ +0 accept(3, ..., ...) = 4
+
+// Receive a segment with no flags set, verify that it's not enqueued.
+ +.01 < - 1:1001(1000) win 20000
+ +0 ioctl(4, SIOCINQ, [0]) = 0
+
+// Receive a segment with ACK flag set, verify that it is enqueued.
+ +.01 < . 1:1001(1000) ack 1 win 20000
+ +0 ioctl(4, SIOCINQ, [1000]) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
index a82c8899d36b..0a0700afdaa3 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt
@@ -4,6 +4,8 @@
// send a packet with MSG_ZEROCOPY and receive the notification ID
// repeat and verify IDs are consecutive
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
index c01915e7f4a1..df91675d2991 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt
@@ -3,6 +3,8 @@
//
// send multiple packets, then read one range of all notifications.
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
index 6509882932e9..2963cfcb14df 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
// Minimal client-side zerocopy test
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
index 2cd78755cb2a..ea0c2fa73c2d 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt
@@ -7,6 +7,8 @@
// First send on a closed socket and wait for (absent) notification.
// Then connect and send and verify that notification nr. is zero.
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
index 7671c20e01cf..4df978a9b82e 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt
@@ -7,6 +7,9 @@
// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
// is correctly fired only once, when EPOLLET is set. send another packet with
// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
index fadc480fdb7f..36b6edc4858c 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt
@@ -8,6 +8,9 @@
// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR
// is correctly fired only once, when EPOLLET is set. send another packet with
// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
index 5bfa0d1d2f4a..1bea6f3b4558 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt
@@ -8,6 +8,9 @@
// is correctly fired only once, when EPOLLONESHOT is set. send another packet
// with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and
// confirm that EPOLLERR is correctly set.
+
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
index 4a73bbf46961..e27c21ff5d18 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt
@@ -8,6 +8,8 @@
// one will have no data in the initial send. On return 0 the
// zerocopy notification counter is not incremented. Verify this too.
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
// Send a FastOpen request, no cookie yet so no data in SYN
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
index 36086c5877ce..b1fa77c77dfa 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt
@@ -4,6 +4,8 @@
// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the
// kernel returns the notification ID.
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh
./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
index 672f817faca0..2f5317d0a9fa 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt
@@ -7,6 +7,8 @@
// because each iovec element becomes a frag
// 3) the PSH bit is set on an skb when it runs out of fragments
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
index a9a1ac0aea4f..9d5272c6b207 100644
--- a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
+++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt
@@ -4,6 +4,8 @@
// verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy
// packets of all sizes, including the smallest payload, 1B.
+--send_omit_free // do not reuse send buffers with zerocopy
+
`./defaults.sh`
0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 66be7699c72c..a3323c21f001 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -205,7 +205,6 @@
# Check that PMTU exceptions are created for both paths.
source lib.sh
-source net_helper.sh
PAUSE_ON_FAIL=no
VERBOSE=0
@@ -1090,10 +1089,11 @@ cleanup() {
cleanup_all_ns
- ip link del veth_A-C 2>/dev/null
- ip link del veth_A-R1 2>/dev/null
- cleanup_del_ovs_internal
- cleanup_del_ovs_vswitchd
+ [ -e "/sys/class/net/veth_A-C" ] && ip link del veth_A-C
+ [ -e "/sys/class/net/veth_A-R1" ] && ip link del veth_A-R1
+ [ -e "/sys/class/net/ovs_br0" ] && cleanup_del_ovs_internal
+ [ -e "/sys/class/net/ovs_br0" ] && cleanup_del_ovs_vswitchd
+
rm -f "$tmpoutfile"
}
diff --git a/tools/testing/selftests/net/proc_net_pktgen.c b/tools/testing/selftests/net/proc_net_pktgen.c
new file mode 100644
index 000000000000..fab3b5c2e25d
--- /dev/null
+++ b/tools/testing/selftests/net/proc_net_pktgen.c
@@ -0,0 +1,690 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * proc_net_pktgen: kselftest for /proc/net/pktgen interface
+ *
+ * Copyright (c) 2025 Peter Seiderer <ps.report@gmx.net>
+ *
+ */
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "kselftest_harness.h"
+
+static const char ctrl_cmd_stop[] = "stop";
+static const char ctrl_cmd_start[] = "start";
+static const char ctrl_cmd_reset[] = "reset";
+
+static const char wrong_ctrl_cmd[] = "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789";
+
+static const char thr_cmd_add_loopback_0[] = "add_device lo@0";
+static const char thr_cmd_rm_loopback_0[] = "rem_device_all";
+
+static const char wrong_thr_cmd[] = "forsureawrongcommand";
+static const char legacy_thr_cmd[] = "max_before_softirq";
+
+static const char wrong_dev_cmd[] = "forsurewrongcommand";
+static const char dev_cmd_min_pkt_size_0[] = "min_pkt_size";
+static const char dev_cmd_min_pkt_size_1[] = "min_pkt_size ";
+static const char dev_cmd_min_pkt_size_2[] = "min_pkt_size 0";
+static const char dev_cmd_min_pkt_size_3[] = "min_pkt_size 1";
+static const char dev_cmd_min_pkt_size_4[] = "min_pkt_size 100";
+static const char dev_cmd_min_pkt_size_5[] = "min_pkt_size=1001";
+static const char dev_cmd_min_pkt_size_6[] = "min_pkt_size =2002";
+static const char dev_cmd_min_pkt_size_7[] = "min_pkt_size= 3003";
+static const char dev_cmd_min_pkt_size_8[] = "min_pkt_size = 4004";
+static const char dev_cmd_max_pkt_size_0[] = "max_pkt_size 200";
+static const char dev_cmd_pkt_size_0[] = "pkt_size 300";
+static const char dev_cmd_imix_weights_0[] = "imix_weights 0,7 576,4 1500,1";
+static const char dev_cmd_imix_weights_1[] = "imix_weights 101,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20";
+static const char dev_cmd_imix_weights_2[] = "imix_weights 100,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20 121,21";
+static const char dev_cmd_imix_weights_3[] = "imix_weights";
+static const char dev_cmd_imix_weights_4[] = "imix_weights ";
+static const char dev_cmd_imix_weights_5[] = "imix_weights 0";
+static const char dev_cmd_imix_weights_6[] = "imix_weights 0,";
+static const char dev_cmd_debug_0[] = "debug 1";
+static const char dev_cmd_debug_1[] = "debug 0";
+static const char dev_cmd_frags_0[] = "frags 100";
+static const char dev_cmd_delay_0[] = "delay 100";
+static const char dev_cmd_delay_1[] = "delay 2147483647";
+static const char dev_cmd_rate_0[] = "rate 0";
+static const char dev_cmd_rate_1[] = "rate 100";
+static const char dev_cmd_ratep_0[] = "ratep 0";
+static const char dev_cmd_ratep_1[] = "ratep 200";
+static const char dev_cmd_udp_src_min_0[] = "udp_src_min 1";
+static const char dev_cmd_udp_dst_min_0[] = "udp_dst_min 2";
+static const char dev_cmd_udp_src_max_0[] = "udp_src_max 3";
+static const char dev_cmd_udp_dst_max_0[] = "udp_dst_max 4";
+static const char dev_cmd_clone_skb_0[] = "clone_skb 1";
+static const char dev_cmd_clone_skb_1[] = "clone_skb 0";
+static const char dev_cmd_count_0[] = "count 100";
+static const char dev_cmd_src_mac_count_0[] = "src_mac_count 100";
+static const char dev_cmd_dst_mac_count_0[] = "dst_mac_count 100";
+static const char dev_cmd_burst_0[] = "burst 0";
+static const char dev_cmd_node_0[] = "node 100";
+static const char dev_cmd_xmit_mode_0[] = "xmit_mode start_xmit";
+static const char dev_cmd_xmit_mode_1[] = "xmit_mode netif_receive";
+static const char dev_cmd_xmit_mode_2[] = "xmit_mode queue_xmit";
+static const char dev_cmd_xmit_mode_3[] = "xmit_mode nonsense";
+static const char dev_cmd_flag_0[] = "flag UDPCSUM";
+static const char dev_cmd_flag_1[] = "flag !UDPCSUM";
+static const char dev_cmd_flag_2[] = "flag nonsense";
+static const char dev_cmd_dst_min_0[] = "dst_min 101.102.103.104";
+static const char dev_cmd_dst_0[] = "dst 101.102.103.104";
+static const char dev_cmd_dst_max_0[] = "dst_max 201.202.203.204";
+static const char dev_cmd_dst6_0[] = "dst6 2001:db38:1234:0000:0000:0000:0000:0000";
+static const char dev_cmd_dst6_min_0[] = "dst6_min 2001:db8:1234:0000:0000:0000:0000:0000";
+static const char dev_cmd_dst6_max_0[] = "dst6_max 2001:db8:1234:0000:0000:0000:0000:0000";
+static const char dev_cmd_src6_0[] = "src6 2001:db38:1234:0000:0000:0000:0000:0000";
+static const char dev_cmd_src_min_0[] = "src_min 101.102.103.104";
+static const char dev_cmd_src_max_0[] = "src_max 201.202.203.204";
+static const char dev_cmd_dst_mac_0[] = "dst_mac 01:02:03:04:05:06";
+static const char dev_cmd_src_mac_0[] = "src_mac 11:12:13:14:15:16";
+static const char dev_cmd_clear_counters_0[] = "clear_counters";
+static const char dev_cmd_flows_0[] = "flows 100";
+static const char dev_cmd_spi_0[] = "spi 100";
+static const char dev_cmd_flowlen_0[] = "flowlen 100";
+static const char dev_cmd_queue_map_min_0[] = "queue_map_min 1";
+static const char dev_cmd_queue_map_max_0[] = "queue_map_max 2";
+static const char dev_cmd_mpls_0[] = "mpls 00000001";
+static const char dev_cmd_mpls_1[] = "mpls 00000001,000000f2";
+static const char dev_cmd_mpls_2[] = "mpls 00000f00,00000f01,00000f02,00000f03,00000f04,00000f05,00000f06,00000f07,00000f08,00000f09,00000f0a,00000f0b,00000f0c,00000f0d,00000f0e,00000f0f";
+static const char dev_cmd_mpls_3[] = "mpls 00000f00,00000f01,00000f02,00000f03,00000f04,00000f05,00000f06,00000f07,00000f08,00000f09,00000f0a,00000f0b,00000f0c,00000f0d,00000f0e,00000f0f,00000f10";
+static const char dev_cmd_vlan_id_0[] = "vlan_id 1";
+static const char dev_cmd_vlan_p_0[] = "vlan_p 1";
+static const char dev_cmd_vlan_cfi_0[] = "vlan_cfi 1";
+static const char dev_cmd_vlan_id_1[] = "vlan_id 4096";
+static const char dev_cmd_svlan_id_0[] = "svlan_id 1";
+static const char dev_cmd_svlan_p_0[] = "svlan_p 1";
+static const char dev_cmd_svlan_cfi_0[] = "svlan_cfi 1";
+static const char dev_cmd_svlan_id_1[] = "svlan_id 4096";
+static const char dev_cmd_tos_0[] = "tos 0";
+static const char dev_cmd_tos_1[] = "tos 0f";
+static const char dev_cmd_tos_2[] = "tos 0ff";
+static const char dev_cmd_traffic_class_0[] = "traffic_class f0";
+static const char dev_cmd_skb_priority_0[] = "skb_priority 999";
+
+FIXTURE(proc_net_pktgen) {
+ int ctrl_fd;
+ int thr_fd;
+ int dev_fd;
+};
+
+FIXTURE_SETUP(proc_net_pktgen) {
+ int r;
+ ssize_t len;
+
+ r = system("modprobe pktgen");
+ ASSERT_EQ(r, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?");
+
+ self->ctrl_fd = open("/proc/net/pktgen/pgctrl", O_RDWR);
+ ASSERT_GE(self->ctrl_fd, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?");
+
+ self->thr_fd = open("/proc/net/pktgen/kpktgend_0", O_RDWR);
+ ASSERT_GE(self->thr_fd, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?");
+
+ len = write(self->thr_fd, thr_cmd_add_loopback_0, sizeof(thr_cmd_add_loopback_0));
+ ASSERT_EQ(len, sizeof(thr_cmd_add_loopback_0)) TH_LOG("device lo@0 already registered?");
+
+ self->dev_fd = open("/proc/net/pktgen/lo@0", O_RDWR);
+ ASSERT_GE(self->dev_fd, 0) TH_LOG("device entry for lo@0 missing?");
+}
+
+FIXTURE_TEARDOWN(proc_net_pktgen) {
+ int ret;
+ ssize_t len;
+
+ ret = close(self->dev_fd);
+ EXPECT_EQ(ret, 0);
+
+ len = write(self->thr_fd, thr_cmd_rm_loopback_0, sizeof(thr_cmd_rm_loopback_0));
+ EXPECT_EQ(len, sizeof(thr_cmd_rm_loopback_0));
+
+ ret = close(self->thr_fd);
+ EXPECT_EQ(ret, 0);
+
+ ret = close(self->ctrl_fd);
+ EXPECT_EQ(ret, 0);
+}
+
+TEST_F(proc_net_pktgen, wrong_ctrl_cmd) {
+ for (int i = 0; i <= sizeof(wrong_ctrl_cmd); i++) {
+ ssize_t len;
+
+ len = write(self->ctrl_fd, wrong_ctrl_cmd, i);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+ }
+}
+
+TEST_F(proc_net_pktgen, ctrl_cmd) {
+ ssize_t len;
+
+ len = write(self->ctrl_fd, ctrl_cmd_stop, sizeof(ctrl_cmd_stop));
+ EXPECT_EQ(len, sizeof(ctrl_cmd_stop));
+
+ len = write(self->ctrl_fd, ctrl_cmd_stop, sizeof(ctrl_cmd_stop) - 1);
+ EXPECT_EQ(len, sizeof(ctrl_cmd_stop) - 1);
+
+ len = write(self->ctrl_fd, ctrl_cmd_start, sizeof(ctrl_cmd_start));
+ EXPECT_EQ(len, sizeof(ctrl_cmd_start));
+
+ len = write(self->ctrl_fd, ctrl_cmd_start, sizeof(ctrl_cmd_start) - 1);
+ EXPECT_EQ(len, sizeof(ctrl_cmd_start) - 1);
+
+ len = write(self->ctrl_fd, ctrl_cmd_reset, sizeof(ctrl_cmd_reset));
+ EXPECT_EQ(len, sizeof(ctrl_cmd_reset));
+
+ len = write(self->ctrl_fd, ctrl_cmd_reset, sizeof(ctrl_cmd_reset) - 1);
+ EXPECT_EQ(len, sizeof(ctrl_cmd_reset) - 1);
+}
+
+TEST_F(proc_net_pktgen, wrong_thr_cmd) {
+ for (int i = 0; i <= sizeof(wrong_thr_cmd); i++) {
+ ssize_t len;
+
+ len = write(self->thr_fd, wrong_thr_cmd, i);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+ }
+}
+
+TEST_F(proc_net_pktgen, legacy_thr_cmd) {
+ for (int i = 0; i <= sizeof(legacy_thr_cmd); i++) {
+ ssize_t len;
+
+ len = write(self->thr_fd, legacy_thr_cmd, i);
+ if (i < (sizeof(legacy_thr_cmd) - 1)) {
+ /* incomplete command string */
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ /* complete command string without/with trailing '\0' */
+ EXPECT_EQ(len, i);
+ }
+ }
+}
+
+TEST_F(proc_net_pktgen, wrong_dev_cmd) {
+ for (int i = 0; i <= sizeof(wrong_dev_cmd); i++) {
+ ssize_t len;
+
+ len = write(self->dev_fd, wrong_dev_cmd, i);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+ }
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_min_pkt_size) {
+ ssize_t len;
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_0, sizeof(dev_cmd_min_pkt_size_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_0));
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_0, sizeof(dev_cmd_min_pkt_size_0) - 1);
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_0) - 1);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_1, sizeof(dev_cmd_min_pkt_size_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_1));
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_1, sizeof(dev_cmd_min_pkt_size_1) - 1);
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_1) - 1);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_2, sizeof(dev_cmd_min_pkt_size_2));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_2));
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_2, sizeof(dev_cmd_min_pkt_size_2) - 1);
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_2) - 1);
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_3, sizeof(dev_cmd_min_pkt_size_3));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_3));
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_4, sizeof(dev_cmd_min_pkt_size_4));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_4));
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_5, sizeof(dev_cmd_min_pkt_size_5));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_5));
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_6, sizeof(dev_cmd_min_pkt_size_6));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_6));
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_7, sizeof(dev_cmd_min_pkt_size_7));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_7));
+
+ len = write(self->dev_fd, dev_cmd_min_pkt_size_8, sizeof(dev_cmd_min_pkt_size_8));
+ EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_8));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_max_pkt_size) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_max_pkt_size_0, sizeof(dev_cmd_max_pkt_size_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_max_pkt_size_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_pkt_size) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_pkt_size_0, sizeof(dev_cmd_pkt_size_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_pkt_size_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_imix_weights) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_imix_weights_0, sizeof(dev_cmd_imix_weights_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_imix_weights_0));
+
+ len = write(self->dev_fd, dev_cmd_imix_weights_1, sizeof(dev_cmd_imix_weights_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_imix_weights_1));
+
+ len = write(self->dev_fd, dev_cmd_imix_weights_2, sizeof(dev_cmd_imix_weights_2));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, E2BIG);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3) - 1);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4) - 1);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5) - 1);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* with trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* without trailing '\0' */
+ len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6) - 1);
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_debug) {
+ ssize_t len;
+
+ /* debug on */
+ len = write(self->dev_fd, dev_cmd_debug_0, sizeof(dev_cmd_debug_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_debug_0));
+
+ /* debug off */
+ len = write(self->dev_fd, dev_cmd_debug_1, sizeof(dev_cmd_debug_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_debug_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_frags) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_frags_0, sizeof(dev_cmd_frags_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_frags_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_delay) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_delay_0, sizeof(dev_cmd_delay_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_delay_0));
+
+ len = write(self->dev_fd, dev_cmd_delay_1, sizeof(dev_cmd_delay_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_delay_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_rate) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_rate_0, sizeof(dev_cmd_rate_0));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ len = write(self->dev_fd, dev_cmd_rate_1, sizeof(dev_cmd_rate_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_rate_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_ratep) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_ratep_0, sizeof(dev_cmd_ratep_0));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ len = write(self->dev_fd, dev_cmd_ratep_1, sizeof(dev_cmd_ratep_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_ratep_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_udp_src_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_udp_src_min_0, sizeof(dev_cmd_udp_src_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_udp_src_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_udp_dst_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_udp_dst_min_0, sizeof(dev_cmd_udp_dst_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_udp_dst_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_udp_src_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_udp_src_max_0, sizeof(dev_cmd_udp_src_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_udp_src_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_udp_dst_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_udp_dst_max_0, sizeof(dev_cmd_udp_dst_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_udp_dst_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_clone_skb) {
+ ssize_t len;
+
+ /* clone_skb on (gives EOPNOTSUPP on lo device) */
+ len = write(self->dev_fd, dev_cmd_clone_skb_0, sizeof(dev_cmd_clone_skb_0));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, EOPNOTSUPP);
+
+ /* clone_skb off */
+ len = write(self->dev_fd, dev_cmd_clone_skb_1, sizeof(dev_cmd_clone_skb_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_clone_skb_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_count) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_count_0, sizeof(dev_cmd_count_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_count_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src_mac_count) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_src_mac_count_0, sizeof(dev_cmd_src_mac_count_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_src_mac_count_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst_mac_count) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst_mac_count_0, sizeof(dev_cmd_dst_mac_count_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst_mac_count_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_burst) {
+ ssize_t len;
+
+ /* burst off */
+ len = write(self->dev_fd, dev_cmd_burst_0, sizeof(dev_cmd_burst_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_burst_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_node) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_node_0, sizeof(dev_cmd_node_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_node_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_xmit_mode) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_xmit_mode_0, sizeof(dev_cmd_xmit_mode_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_0));
+
+ len = write(self->dev_fd, dev_cmd_xmit_mode_1, sizeof(dev_cmd_xmit_mode_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_1));
+
+ len = write(self->dev_fd, dev_cmd_xmit_mode_2, sizeof(dev_cmd_xmit_mode_2));
+ EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_2));
+
+ len = write(self->dev_fd, dev_cmd_xmit_mode_3, sizeof(dev_cmd_xmit_mode_3));
+ EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_3));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_flag) {
+ ssize_t len;
+
+ /* flag UDPCSUM on */
+ len = write(self->dev_fd, dev_cmd_flag_0, sizeof(dev_cmd_flag_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_flag_0));
+
+ /* flag UDPCSUM off */
+ len = write(self->dev_fd, dev_cmd_flag_1, sizeof(dev_cmd_flag_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_flag_1));
+
+ /* flag invalid */
+ len = write(self->dev_fd, dev_cmd_flag_2, sizeof(dev_cmd_flag_2));
+ EXPECT_EQ(len, sizeof(dev_cmd_flag_2));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst_min_0, sizeof(dev_cmd_dst_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst_0, sizeof(dev_cmd_dst_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst_max_0, sizeof(dev_cmd_dst_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst6) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst6_0, sizeof(dev_cmd_dst6_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst6_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst6_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst6_min_0, sizeof(dev_cmd_dst6_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst6_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst6_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst6_max_0, sizeof(dev_cmd_dst6_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst6_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src6) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_src6_0, sizeof(dev_cmd_src6_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_src6_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_src_min_0, sizeof(dev_cmd_src_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_src_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_src_max_0, sizeof(dev_cmd_src_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_src_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_dst_mac) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_dst_mac_0, sizeof(dev_cmd_dst_mac_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_dst_mac_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_src_mac) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_src_mac_0, sizeof(dev_cmd_src_mac_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_src_mac_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_clear_counters) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_clear_counters_0, sizeof(dev_cmd_clear_counters_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_clear_counters_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_flows) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_flows_0, sizeof(dev_cmd_flows_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_flows_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_spi) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_spi_0, sizeof(dev_cmd_spi_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_spi_0)) TH_LOG("CONFIG_XFRM not enabled?");
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_flowlen) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_flowlen_0, sizeof(dev_cmd_flowlen_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_flowlen_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_queue_map_min) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_queue_map_min_0, sizeof(dev_cmd_queue_map_min_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_queue_map_min_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_queue_map_max) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_queue_map_max_0, sizeof(dev_cmd_queue_map_max_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_queue_map_max_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_mpls) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_mpls_0, sizeof(dev_cmd_mpls_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_mpls_0));
+
+ len = write(self->dev_fd, dev_cmd_mpls_1, sizeof(dev_cmd_mpls_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_mpls_1));
+
+ len = write(self->dev_fd, dev_cmd_mpls_2, sizeof(dev_cmd_mpls_2));
+ EXPECT_EQ(len, sizeof(dev_cmd_mpls_2));
+
+ len = write(self->dev_fd, dev_cmd_mpls_3, sizeof(dev_cmd_mpls_3));
+ EXPECT_EQ(len, -1);
+ EXPECT_EQ(errno, E2BIG);
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_vlan_id) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_vlan_id_0, sizeof(dev_cmd_vlan_id_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_vlan_id_0));
+
+ len = write(self->dev_fd, dev_cmd_vlan_p_0, sizeof(dev_cmd_vlan_p_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_vlan_p_0));
+
+ len = write(self->dev_fd, dev_cmd_vlan_cfi_0, sizeof(dev_cmd_vlan_cfi_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_vlan_cfi_0));
+
+ len = write(self->dev_fd, dev_cmd_vlan_id_1, sizeof(dev_cmd_vlan_id_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_vlan_id_1));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_svlan_id) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_svlan_id_0, sizeof(dev_cmd_svlan_id_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_svlan_id_0));
+
+ len = write(self->dev_fd, dev_cmd_svlan_p_0, sizeof(dev_cmd_svlan_p_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_svlan_p_0));
+
+ len = write(self->dev_fd, dev_cmd_svlan_cfi_0, sizeof(dev_cmd_svlan_cfi_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_svlan_cfi_0));
+
+ len = write(self->dev_fd, dev_cmd_svlan_id_1, sizeof(dev_cmd_svlan_id_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_svlan_id_1));
+}
+
+
+TEST_F(proc_net_pktgen, dev_cmd_tos) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_tos_0, sizeof(dev_cmd_tos_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_tos_0));
+
+ len = write(self->dev_fd, dev_cmd_tos_1, sizeof(dev_cmd_tos_1));
+ EXPECT_EQ(len, sizeof(dev_cmd_tos_1));
+
+ len = write(self->dev_fd, dev_cmd_tos_2, sizeof(dev_cmd_tos_2));
+ EXPECT_EQ(len, sizeof(dev_cmd_tos_2));
+}
+
+
+TEST_F(proc_net_pktgen, dev_cmd_traffic_class) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_traffic_class_0, sizeof(dev_cmd_traffic_class_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_traffic_class_0));
+}
+
+TEST_F(proc_net_pktgen, dev_cmd_skb_priority) {
+ ssize_t len;
+
+ len = write(self->dev_fd, dev_cmd_skb_priority_0, sizeof(dev_cmd_skb_priority_0));
+ EXPECT_EQ(len, sizeof(dev_cmd_skb_priority_0));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 84c524357075..ab8d8b7e6cb0 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -54,7 +54,7 @@
#include <unistd.h>
#include "psock_lib.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#define RING_NUM_FRAMES 20
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h
index 6e4fef560873..067265b0a554 100644
--- a/tools/testing/selftests/net/psock_lib.h
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -22,10 +22,6 @@
#define PORT_BASE 8000
-#ifndef __maybe_unused
-# define __maybe_unused __attribute__ ((__unused__))
-#endif
-
static __maybe_unused void pair_udp_setfilter(int fd)
{
/* the filter below checks for all of the following conditions that
diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c
index 404a2ce759ab..7caf3135448d 100644
--- a/tools/testing/selftests/net/psock_tpacket.c
+++ b/tools/testing/selftests/net/psock_tpacket.c
@@ -12,7 +12,7 @@
*
* Datapath:
* Open a pair of packet sockets and send resp. receive an a priori known
- * packet pattern accross the sockets and check if it was received resp.
+ * packet pattern across the sockets and check if it was received resp.
* sent correctly. Fanout in combination with RX_RING is currently not
* tested here.
*
@@ -22,6 +22,7 @@
* - TPACKET_V3: RX_RING
*/
+#undef NDEBUG
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
@@ -33,7 +34,6 @@
#include <ctype.h>
#include <fcntl.h>
#include <unistd.h>
-#include <bits/wordsize.h>
#include <net/ethernet.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
@@ -46,7 +46,7 @@
#include "psock_lib.h"
-#include "../kselftest.h"
+#include "kselftest.h"
#ifndef bug_on
# define bug_on(cond) assert(!(cond))
@@ -785,7 +785,7 @@ static int test_kernel_bit_width(void)
static int test_user_bit_width(void)
{
- return __WORDSIZE;
+ return sizeof(long) * 8;
}
static const char *tpacket_str[] = {
diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile
index 1803c39dbacb..762845cc973c 100644
--- a/tools/testing/selftests/net/rds/Makefile
+++ b/tools/testing/selftests/net/rds/Makefile
@@ -3,11 +3,16 @@
all:
@echo mk_build_dir="$(shell pwd)" > include.sh
-TEST_PROGS := run.sh \
- test.py
+TEST_PROGS := run.sh
-TEST_FILES := include.sh
+TEST_FILES := \
+ include.sh \
+ test.py \
+# end of TEST_FILES
-EXTRA_CLEAN := /tmp/rds_logs include.sh
+EXTRA_CLEAN := \
+ include.sh \
+ /tmp/rds_logs \
+# end of EXTRA_CLEAN
include ../../lib.mk
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
index 066efd30e294..5aad27a0d13a 100644
--- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
@@ -22,7 +22,7 @@
#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
struct reuse_opts {
int reuseaddr[2];
@@ -112,7 +112,7 @@ TEST(reuseaddr_ports_exhausted_reusable_same_euid)
ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
if (opts->reuseport[0] && opts->reuseport[1]) {
- EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened.");
+ EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets successfully listened.");
} else {
EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations.");
}
diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c
index b8475cb29be7..1c43401a1c80 100644
--- a/tools/testing/selftests/net/reuseport_addr_any.c
+++ b/tools/testing/selftests/net/reuseport_addr_any.c
@@ -9,7 +9,6 @@
#include <arpa/inet.h>
#include <errno.h>
#include <error.h>
-#include <linux/dccp.h>
#include <linux/in.h>
#include <linux/unistd.h>
#include <stdbool.h>
@@ -21,10 +20,6 @@
#include <sys/socket.h>
#include <unistd.h>
-#ifndef SOL_DCCP
-#define SOL_DCCP 269
-#endif
-
static const char *IP4_ADDR = "127.0.0.1";
static const char *IP6_ADDR = "::1";
static const char *IP4_MAPPED6 = "::ffff:127.0.0.1";
@@ -86,15 +81,6 @@ static void build_rcv_fd(int family, int proto, int *rcv_fds, int count,
if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
error(1, errno, "tcp: failed to listen on receive port");
- else if (proto == SOCK_DCCP) {
- if (setsockopt(rcv_fds[i], SOL_DCCP,
- DCCP_SOCKOPT_SERVICE,
- &(int) {htonl(42)}, sizeof(int)))
- error(1, errno, "failed to setsockopt");
-
- if (listen(rcv_fds[i], 10))
- error(1, errno, "dccp: failed to listen on receive port");
- }
}
}
@@ -148,11 +134,6 @@ static int connect_and_send(int family, int proto)
if (fd < 0)
error(1, errno, "failed to create send socket");
- if (proto == SOCK_DCCP &&
- setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE,
- &(int){htonl(42)}, sizeof(int)))
- error(1, errno, "failed to setsockopt");
-
if (bind(fd, saddr, sz))
error(1, errno, "failed to bind send socket");
@@ -175,7 +156,7 @@ static int receive_once(int epfd, int proto)
if (i < 0)
error(1, errno, "epoll_wait failed");
- if (proto == SOCK_STREAM || proto == SOCK_DCCP) {
+ if (proto == SOCK_STREAM) {
fd = accept(ev.data.fd, NULL, NULL);
if (fd < 0)
error(1, errno, "failed to accept");
@@ -243,20 +224,6 @@ static void run_one_test(int fam_send, int fam_rcv, int proto,
static void test_proto(int proto, const char *proto_str)
{
- if (proto == SOCK_DCCP) {
- int test_fd;
-
- test_fd = socket(AF_INET, proto, 0);
- if (test_fd < 0) {
- if (errno == ESOCKTNOSUPPORT) {
- fprintf(stderr, "DCCP not supported: skipping DCCP tests\n");
- return;
- } else
- error(1, errno, "failed to create a DCCP socket");
- }
- close(test_fd);
- }
-
fprintf(stderr, "%s IPv4 ... ", proto_str);
run_one_test(AF_INET, AF_INET, proto, IP4_ADDR);
@@ -271,7 +238,6 @@ int main(void)
{
test_proto(SOCK_DGRAM, "UDP");
test_proto(SOCK_STREAM, "TCP");
- test_proto(SOCK_DCCP, "DCCP");
fprintf(stderr, "SUCCESS\n");
return 0;
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
index 65aea27d761c..b6634d6da3d6 100644
--- a/tools/testing/selftests/net/reuseport_bpf.c
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -24,7 +24,7 @@
#include <sys/resource.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
struct test_params {
int recv_family;
diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c
index c9ba36aa688e..2ffd957ffb15 100644
--- a/tools/testing/selftests/net/reuseport_bpf_numa.c
+++ b/tools/testing/selftests/net/reuseport_bpf_numa.c
@@ -23,7 +23,7 @@
#include <unistd.h>
#include <numa.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static const int PORT = 8888;
diff --git a/tools/testing/selftests/net/route_hint.sh b/tools/testing/selftests/net/route_hint.sh
new file mode 100755
index 000000000000..2db01ece0cc1
--- /dev/null
+++ b/tools/testing/selftests/net/route_hint.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test ensures directed broadcast routes use dst hint mechanism
+
+source lib.sh
+
+CLIENT_IP4="192.168.0.1"
+SERVER_IP4="192.168.0.2"
+BROADCAST_ADDRESS="192.168.0.255"
+
+setup() {
+ setup_ns CLIENT_NS SERVER_NS
+
+ ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}"
+
+ ip -net "${CLIENT_NS}" link set link0 up
+ ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}/24" dev link0
+
+ ip -net "${SERVER_NS}" link set link1 up
+ ip -net "${SERVER_NS}" addr add "${SERVER_IP4}/24" dev link1
+
+ ip netns exec "${CLIENT_NS}" ethtool -K link0 tcp-segmentation-offload off
+ ip netns exec "${SERVER_NS}" sh -c "echo 500000000 > /sys/class/net/link1/gro_flush_timeout"
+ ip netns exec "${SERVER_NS}" sh -c "echo 1 > /sys/class/net/link1/napi_defer_hard_irqs"
+ ip netns exec "${SERVER_NS}" ethtool -K link1 generic-receive-offload on
+}
+
+cleanup() {
+ ip -net "${SERVER_NS}" link del link1
+ cleanup_ns "${CLIENT_NS}" "${SERVER_NS}"
+}
+
+directed_bcast_hint_test()
+{
+ local rc=0
+
+ echo "Testing for directed broadcast route hint"
+
+ orig_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd')
+ ip netns exec "${CLIENT_NS}" mausezahn link0 -a own -b bcast -A "${CLIENT_IP4}" \
+ -B "${BROADCAST_ADDRESS}" -c1 -t tcp "sp=1-100,dp=1234,s=1,a=0" -p 5 -q
+ sleep 1
+ new_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd')
+
+ res=$(echo "${new_in_brd} - ${orig_in_brd}" | bc)
+
+ if [ "${res}" -lt 100 ]; then
+ echo "[ OK ]"
+ rc="${ksft_pass}"
+ else
+ echo "[FAIL] expected in_brd to be under 100, got ${res}"
+ rc="${ksft_fail}"
+ fi
+
+ return "${rc}"
+}
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit "${ksft_skip}"
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+ echo "SKIP: Could not run test without jq tool"
+ exit "${ksft_skip}"
+fi
+
+if [ ! -x "$(command -v bc)" ]; then
+ echo "SKIP: Could not run test without bc tool"
+ exit "${ksft_skip}"
+fi
+
+trap cleanup EXIT
+
+setup
+
+directed_bcast_hint_test
+exit $?
diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh
index 4287a8529890..b200019b3c80 100755
--- a/tools/testing/selftests/net/rps_default_mask.sh
+++ b/tools/testing/selftests/net/rps_default_mask.sh
@@ -54,16 +54,16 @@ cleanup
echo 1 > /proc/sys/net/core/rps_default_mask
setup
-chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK
+chk_rps "changing rps_default_mask doesn't affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK
echo 3 > /proc/sys/net/core/rps_default_mask
-chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0
+chk_rps "changing rps_default_mask doesn't affect existing netns" $NETNS lo 0
ip link add name $VETH type veth peer netns $NETNS name $VETH
ip link set dev $VETH up
ip -n $NETNS link set dev $VETH up
-chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3
-chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0
+chk_rps "changing rps_default_mask affects newly created devices" "" $VETH 3
+chk_rps "changing rps_default_mask doesn't affect newly child netns[II]" $NETNS $VETH 0
ip link del dev $VETH
ip netns del $NETNS
@@ -72,8 +72,8 @@ chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0
ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1
ip link add name $VETH type veth peer netns $NETNS name $VETH
-chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK
+chk_rps "changing rps_default_mask in child ns doesn't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK
chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1
-chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0
+chk_rps "changing rps_default_mask in child ns doesn't affect existing devices" $NETNS lo 0
exit $ret
diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py
new file mode 100755
index 000000000000..e9ad5e88da97
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_exit, ksft_run, ksft_ge, RtnlAddrFamily
+import socket
+
+IPV4_ALL_HOSTS_MULTICAST = b'\xe0\x00\x00\x01'
+
+def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None:
+ """
+ Verify that at least one interface has the IPv4 all-hosts multicast address.
+ At least the loopback interface should have this address.
+ """
+
+ addresses = rtnl.getmulticast({"ifa-family": socket.AF_INET}, dump=True)
+
+ all_host_multicasts = [
+ addr for addr in addresses if addr['multicast'] == IPV4_ALL_HOSTS_MULTICAST
+ ]
+
+ ksft_ge(len(all_host_multicasts), 1,
+ "No interface found with the IPv4 all-hosts multicast address")
+
+def main() -> None:
+ rtnl = RtnlAddrFamily()
+ ksft_run([dump_mcaddr_check], args=(rtnl, ))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 7f05b5f9b76f..248c2b91fe42 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -8,6 +8,7 @@ ALL_TESTS="
kci_test_polrouting
kci_test_route_get
kci_test_addrlft
+ kci_test_addrlft_route_cleanup
kci_test_promote_secondaries
kci_test_tc
kci_test_gre
@@ -21,6 +22,7 @@ ALL_TESTS="
kci_test_vrf
kci_test_encap
kci_test_macsec
+ kci_test_macsec_vlan
kci_test_ipsec
kci_test_ipsec_offload
kci_test_fdb_get
@@ -29,6 +31,8 @@ ALL_TESTS="
kci_test_bridge_parent_id
kci_test_address_proto
kci_test_enslave_bonding
+ kci_test_mngtmpaddr
+ kci_test_operstate
"
devdummy="test-dummy0"
@@ -44,6 +48,7 @@ check_err()
if [ $ret -eq 0 ]; then
ret=$1
fi
+ [ -n "$2" ] && echo "$2"
}
# same but inverted -- used when command must fail for test to pass
@@ -289,6 +294,17 @@ kci_test_route_get()
end_test "PASS: route get"
}
+check_addr_not_exist()
+{
+ dev=$1
+ addr=$2
+ if ip addr show dev $dev | grep -q $addr; then
+ return 1
+ else
+ return 0
+ fi
+}
+
kci_test_addrlft()
{
for i in $(seq 10 100) ;do
@@ -296,9 +312,10 @@ kci_test_addrlft()
run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
done
- sleep 5
- run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy"
- if [ $? -eq 0 ]; then
+ slowwait 5 check_addr_not_exist "$devdummy" "10.23.11."
+ if [ $? -eq 1 ]; then
+ # troubleshoot the reason for our failure
+ run_cmd ip addr show dev "$devdummy"
check_err 1
end_test "FAIL: preferred_lft addresses remaining"
return
@@ -307,8 +324,32 @@ kci_test_addrlft()
end_test "PASS: preferred_lft addresses have expired"
}
+kci_test_addrlft_route_cleanup()
+{
+ local ret=0
+ local test_addr="2001:db8:99::1/64"
+ local test_prefix="2001:db8:99::/64"
+
+ run_cmd ip -6 addr add $test_addr dev "$devdummy" valid_lft 300 preferred_lft 300
+ run_cmd_grep "$test_prefix proto kernel" ip -6 route show dev "$devdummy"
+ run_cmd ip -6 addr del $test_addr dev "$devdummy"
+ run_cmd_grep_fail "$test_prefix" ip -6 route show dev "$devdummy"
+
+ if [ $ret -ne 0 ]; then
+ end_test "FAIL: route not cleaned up when address with valid_lft deleted"
+ return 1
+ fi
+
+ end_test "PASS: route cleaned up when address with valid_lft deleted"
+}
+
kci_test_promote_secondaries()
{
+ run_cmd ifconfig "$devdummy"
+ if [ $ret -ne 0 ]; then
+ end_test "SKIP: ifconfig not installed"
+ return $ksft_skip
+ fi
promote=$(sysctl -n net.ipv4.conf.$devdummy.promote_secondaries)
sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=1
@@ -505,7 +546,7 @@ kci_test_encap_fou()
run_cmd_fail ip -netns "$testns" fou del port 9999
run_cmd ip -netns "$testns" fou del port 7777
if [ $ret -ne 0 ]; then
- end_test "FAIL: fou"s
+ end_test "FAIL: fou"
return 1
fi
@@ -559,6 +600,41 @@ kci_test_macsec()
end_test "PASS: macsec"
}
+# Test __dev_set_rx_mode call from dev_uc_add under addr_list_lock spinlock.
+# Make sure __dev_set_promiscuity is not grabbing (sleeping) netdev instance
+# lock.
+# https://lore.kernel.org/netdev/2aff4342b0f5b1539c02ffd8df4c7e58dd9746e7.camel@nvidia.com/
+kci_test_macsec_vlan()
+{
+ msname="test_macsec1"
+ vlanname="test_vlan1"
+ local ret=0
+ run_cmd_grep "^Usage: ip macsec" ip macsec help
+ if [ $? -ne 0 ]; then
+ end_test "SKIP: macsec: iproute2 too old"
+ return $ksft_skip
+ fi
+ run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
+ if [ $ret -ne 0 ];then
+ end_test "FAIL: can't add macsec interface, skipping test"
+ return 1
+ fi
+
+ run_cmd ip link set dev "$msname" up
+ ip link add link "$msname" name "$vlanname" type vlan id 1
+ ip link set dev "$vlanname" address 00:11:22:33:44:88
+ ip link set dev "$vlanname" up
+ run_cmd ip link del dev "$vlanname"
+ run_cmd ip link del dev "$msname"
+
+ if [ $ret -ne 0 ];then
+ end_test "FAIL: macsec_vlan"
+ return 1
+ fi
+
+ end_test "PASS: macsec_vlan"
+}
+
#-------------------------------------------------------------------
# Example commands
# ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
@@ -671,6 +747,11 @@ kci_test_ipsec_offload()
sysfsf=$sysfsd/ipsec
sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
probed=false
+ esp4_offload_probed_default=false
+
+ if lsmod | grep -q esp4_offload; then
+ esp4_offload_probed_default=true
+ fi
if ! mount | grep -q debugfs; then
mount -t debugfs none /sys/kernel/debug/ &> /dev/null
@@ -764,6 +845,7 @@ EOF
fi
# clean up any leftovers
+ ! "$esp4_offload_probed_default" && lsmod | grep -q esp4_offload && rmmod esp4_offload
echo 0 > /sys/bus/netdevsim/del_device
$probed && rmmod netdevsim
@@ -1146,6 +1228,12 @@ do_test_address_proto()
local ret=0
local err
+ run_cmd_grep 'proto' ip address help
+ if [ $? -ne 0 ];then
+ end_test "SKIP: addr proto ${what}: iproute2 too old"
+ return $ksft_skip
+ fi
+
ip address add dev "$devdummy" "$addr3"
check_err $?
proto=$(address_get_proto "$addr3")
@@ -1239,6 +1327,132 @@ kci_test_enslave_bonding()
ip netns del "$testns"
}
+# Called to validate the addresses on $IFNAME:
+#
+# 1. Every `temporary` address must have a matching `mngtmpaddr`
+# 2. Every `mngtmpaddr` address must have some un`deprecated` `temporary`
+#
+# If the mngtmpaddr or tempaddr checking failed, return 0 and stop slowwait
+validate_mngtmpaddr()
+{
+ local dev=$1
+ local prefix=""
+ local addr_list=$(ip -j -n $testns addr show dev ${dev})
+ local temp_addrs=$(echo ${addr_list} | \
+ jq -r '.[].addr_info[] | select(.temporary == true) | .local')
+ local mng_prefixes=$(echo ${addr_list} | \
+ jq -r '.[].addr_info[] | select(.mngtmpaddr == true) | .local' | \
+ cut -d: -f1-4 | tr '\n' ' ')
+ local undep_prefixes=$(echo ${addr_list} | \
+ jq -r '.[].addr_info[] | select(.temporary == true and .deprecated != true) | .local' | \
+ cut -d: -f1-4 | tr '\n' ' ')
+
+ # 1. All temporary addresses (temp and dep) must have a matching mngtmpaddr
+ for address in ${temp_addrs}; do
+ prefix=$(echo ${address} | cut -d: -f1-4)
+ if [[ ! " ${mng_prefixes} " =~ " $prefix " ]]; then
+ check_err 1 "FAIL: Temporary $address with no matching mngtmpaddr!";
+ return 0
+ fi
+ done
+
+ # 2. All mngtmpaddr addresses must have a temporary address (not dep)
+ for prefix in ${mng_prefixes}; do
+ if [[ ! " ${undep_prefixes} " =~ " $prefix " ]]; then
+ check_err 1 "FAIL: No undeprecated temporary in $prefix!";
+ return 0
+ fi
+ done
+
+ return 1
+}
+
+kci_test_mngtmpaddr()
+{
+ local ret=0
+
+ setup_ns testns
+ if [ $? -ne 0 ]; then
+ end_test "SKIP mngtmpaddr tests: cannot add net namespace $testns"
+ return $ksft_skip
+ fi
+
+ # 1. Create a dummy Ethernet interface
+ run_cmd ip -n $testns link add ${devdummy} type dummy
+ run_cmd ip -n $testns link set ${devdummy} up
+ run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.use_tempaddr=1
+ run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.temp_prefered_lft=10
+ run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.temp_valid_lft=25
+ run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.max_desync_factor=1
+
+ # 2. Create several mngtmpaddr addresses on that interface.
+ # with temp_*_lft configured to be pretty short (10 and 35 seconds
+ # for prefer/valid respectively)
+ for i in $(seq 1 9); do
+ run_cmd ip -n $testns addr add 2001:db8:7e57:${i}::1/64 mngtmpaddr dev ${devdummy}
+ done
+
+ # 3. Confirm that a preferred temporary address exists for each mngtmpaddr
+ # address at all times, polling once per second for 30 seconds.
+ slowwait 30 validate_mngtmpaddr ${devdummy}
+
+ # 4. Delete each mngtmpaddr address, one at a time (alternating between
+ # deleting and merely un-mngtmpaddr-ing), and confirm that the other
+ # mngtmpaddr addresses still have preferred temporaries.
+ for i in $(seq 1 9); do
+ (( $i % 4 == 0 )) && mng_flag="mngtmpaddr" || mng_flag=""
+ if (( $i % 2 == 0 )); then
+ run_cmd ip -n $testns addr del 2001:db8:7e57:${i}::1/64 $mng_flag dev ${devdummy}
+ else
+ run_cmd ip -n $testns addr change 2001:db8:7e57:${i}::1/64 dev ${devdummy}
+ fi
+ # the temp addr should be deleted
+ validate_mngtmpaddr ${devdummy}
+ done
+
+ if [ $ret -ne 0 ]; then
+ end_test "FAIL: mngtmpaddr add/remove incorrect"
+ else
+ end_test "PASS: mngtmpaddr add/remove correctly"
+ fi
+
+ ip netns del "$testns"
+ return $ret
+}
+
+kci_test_operstate()
+{
+ local ret=0
+
+ # Check that it is possible to set operational state during device
+ # creation and that it is preserved when the administrative state of
+ # the device is toggled.
+ run_cmd ip link add name vx0 up state up type vxlan id 10010 dstport 4789
+ run_cmd_grep "state UP" ip link show dev vx0
+ run_cmd ip link set dev vx0 down
+ run_cmd_grep "state DOWN" ip link show dev vx0
+ run_cmd ip link set dev vx0 up
+ run_cmd_grep "state UP" ip link show dev vx0
+
+ run_cmd ip link del dev vx0
+
+ # Check that it is possible to set the operational state of the device
+ # after creation.
+ run_cmd ip link add name vx0 up type vxlan id 10010 dstport 4789
+ run_cmd_grep "state UNKNOWN" ip link show dev vx0
+ run_cmd ip link set dev vx0 state up
+ run_cmd_grep "state UP" ip link show dev vx0
+
+ run_cmd ip link del dev vx0
+
+ if [ "$ret" -ne 0 ]; then
+ end_test "FAIL: operstate"
+ return 1
+ fi
+
+ end_test "PASS: operstate"
+}
+
kci_test_rtnl()
{
local current_test
@@ -1272,6 +1486,8 @@ usage: ${0##*/} OPTS
EOF
}
+require_command jq
+
#check for needed privileges
if [ "$(id -u)" -ne 0 ];then
end_test "SKIP: Need root privileges"
diff --git a/tools/testing/selftests/net/rtnetlink_notification.sh b/tools/testing/selftests/net/rtnetlink_notification.sh
new file mode 100755
index 000000000000..3f9780232bd6
--- /dev/null
+++ b/tools/testing/selftests/net/rtnetlink_notification.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking rtnetlink notification callpaths, and get as much
+# coverage as possible.
+#
+# set -e
+
+ALL_TESTS="
+ kci_test_mcast_addr_notification
+ kci_test_anycast_addr_notification
+"
+
+source lib.sh
+test_dev="test-dummy1"
+
+kci_test_mcast_addr_notification()
+{
+ RET=0
+ local tmpfile
+ local monitor_pid
+ local match_result
+
+ tmpfile=$(mktemp)
+ defer rm "$tmpfile"
+
+ ip monitor maddr > $tmpfile &
+ monitor_pid=$!
+ defer kill_process "$monitor_pid"
+
+ sleep 1
+
+ if [ ! -e "/proc/$monitor_pid" ]; then
+ RET=$ksft_skip
+ log_test "mcast addr notification: iproute2 too old"
+ return $RET
+ fi
+
+ ip link add name "$test_dev" type dummy
+ check_err $? "failed to add dummy interface"
+ ip link set "$test_dev" up
+ check_err $? "failed to set dummy interface up"
+ ip link del dev "$test_dev"
+ check_err $? "Failed to delete dummy interface"
+ sleep 1
+
+ # There should be 4 line matches as follows.
+ # 13: test-dummy1    inet6 mcast ff02::1 scope global 
+ # 13: test-dummy1    inet mcast 224.0.0.1 scope global 
+ # Deleted 13: test-dummy1    inet mcast 224.0.0.1 scope global 
+ # Deleted 13: test-dummy1    inet6 mcast ff02::1 scope global 
+ match_result=$(grep -cE "$test_dev.*(224.0.0.1|ff02::1)" "$tmpfile")
+ if [ "$match_result" -ne 4 ]; then
+ RET=$ksft_fail
+ fi
+ log_test "mcast addr notification: Expected 4 matches, got $match_result"
+ return $RET
+}
+
+kci_test_anycast_addr_notification()
+{
+ RET=0
+ local tmpfile
+ local monitor_pid
+ local match_result
+
+ tmpfile=$(mktemp)
+ defer rm "$tmpfile"
+
+ ip monitor acaddress > "$tmpfile" &
+ monitor_pid=$!
+ defer kill_process "$monitor_pid"
+ sleep 1
+
+ if [ ! -e "/proc/$monitor_pid" ]; then
+ RET=$ksft_skip
+ log_test "anycast addr notification: iproute2 too old"
+ return "$RET"
+ fi
+
+ ip link add name "$test_dev" type dummy
+ check_err $? "failed to add dummy interface"
+ ip link set "$test_dev" up
+ check_err $? "failed to set dummy interface up"
+ sysctl -qw net.ipv6.conf."$test_dev".forwarding=1
+ ip link del dev "$test_dev"
+ check_err $? "Failed to delete dummy interface"
+ sleep 1
+
+ # There should be 2 line matches as follows.
+ # 9: dummy2 inet6 any fe80:: scope global
+ # Deleted 9: dummy2 inet6 any fe80:: scope global
+ match_result=$(grep -cE "$test_dev.*(fe80::)" "$tmpfile")
+ if [ "$match_result" -ne 2 ]; then
+ RET=$ksft_fail
+ fi
+ log_test "anycast addr notification: Expected 2 matches, got $match_result"
+ return "$RET"
+}
+
+#check for needed privileges
+if [ "$(id -u)" -ne 0 ];then
+ RET=$ksft_skip
+ log_test "need root privileges"
+ exit $RET
+fi
+
+require_command ip
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index 16ac4df55fdb..b81ed0352d6c 100644
--- a/tools/testing/selftests/net/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
@@ -18,7 +18,7 @@
#include <linux/net_tstamp.h>
#include <linux/errqueue.h>
-#include "../kselftest.h"
+#include "kselftest.h"
struct options {
int so_timestamp;
diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c
index f02f1f95d227..a04dac0b8027 100644
--- a/tools/testing/selftests/net/sctp_hello.c
+++ b/tools/testing/selftests/net/sctp_hello.c
@@ -29,7 +29,6 @@ static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len
static int do_client(int argc, char *argv[])
{
struct sockaddr_storage ss;
- char buf[] = "hello";
int csk, ret, len;
if (argc < 5) {
@@ -56,16 +55,10 @@ static int do_client(int argc, char *argv[])
set_addr(&ss, argv[3], argv[4], &len);
ret = connect(csk, (struct sockaddr *)&ss, len);
- if (ret < 0) {
- printf("failed to connect to peer\n");
+ if (ret < 0)
return -1;
- }
- ret = send(csk, buf, strlen(buf) + 1, 0);
- if (ret < 0) {
- printf("failed to send msg %d\n", ret);
- return -1;
- }
+ recv(csk, NULL, 0, 0);
close(csk);
return 0;
@@ -75,7 +68,6 @@ int main(int argc, char *argv[])
{
struct sockaddr_storage ss;
int lsk, csk, ret, len;
- char buf[20];
if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
printf("%s server|client ...\n", argv[0]);
@@ -125,11 +117,6 @@ int main(int argc, char *argv[])
return -1;
}
- ret = recv(csk, buf, sizeof(buf), 0);
- if (ret <= 0) {
- printf("failed to recv msg %d\n", ret);
- return -1;
- }
close(csk);
close(lsk);
diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh
index c854034b6aa1..667b211aa8a1 100755
--- a/tools/testing/selftests/net/sctp_vrf.sh
+++ b/tools/testing/selftests/net/sctp_vrf.sh
@@ -20,9 +20,9 @@ setup() {
modprobe sctp_diag
setup_ns CLIENT_NS1 CLIENT_NS2 SERVER_NS
- ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
- ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
- ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
+ ip net exec $CLIENT_NS1 sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip net exec $CLIENT_NS2 sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip net exec $SERVER_NS sysctl -wq net.ipv6.conf.default.accept_dad=0
ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1
ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2
@@ -62,17 +62,40 @@ setup() {
}
cleanup() {
- ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
+ wait_client $CLIENT_NS1
+ wait_client $CLIENT_NS2
+ stop_server
cleanup_ns $CLIENT_NS1 $CLIENT_NS2 $SERVER_NS
}
-wait_server() {
+start_server() {
local IFACE=$1
local CNT=0
- until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \
- grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do
- [ $((CNT++)) = "20" ] && { RET=3; return $RET; }
+ ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP $SERVER_PORT $IFACE &
+ disown
+ until ip netns exec $SERVER_NS ss -SlH | grep -q "$IFACE"; do
+ [ $((CNT++)) -eq 30 ] && { RET=3; return $RET; }
+ sleep 0.1
+ done
+}
+
+stop_server() {
+ local CNT=0
+
+ ip netns exec $SERVER_NS pkill sctp_hello
+ while ip netns exec $SERVER_NS ss -SaH | grep -q .; do
+ [ $((CNT++)) -eq 30 ] && break
+ sleep 0.1
+ done
+}
+
+wait_client() {
+ local CLIENT_NS=$1
+ local CNT=0
+
+ while ip netns exec $CLIENT_NS ss -SaH | grep -q .; do
+ [ $((CNT++)) -eq 30 ] && break
sleep 0.1
done
}
@@ -81,14 +104,12 @@ do_test() {
local CLIENT_NS=$1
local IFACE=$2
- ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
- ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
- $SERVER_PORT $IFACE 2>&1 >/dev/null &
- disown
- wait_server $IFACE || return $RET
+ start_server $IFACE || return $RET
timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \
- $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT
RET=$?
+ wait_client $CLIENT_NS
+ stop_server
return $RET
}
@@ -96,25 +117,21 @@ do_testx() {
local IFACE1=$1
local IFACE2=$2
- ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
- ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
- $SERVER_PORT $IFACE1 2>&1 >/dev/null &
- disown
- wait_server $IFACE1 || return $RET
- ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
- $SERVER_PORT $IFACE2 2>&1 >/dev/null &
- disown
- wait_server $IFACE2 || return $RET
+ start_server $IFACE1 || return $RET
+ start_server $IFACE2 || return $RET
timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \
- $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT && \
timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \
- $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT
RET=$?
+ wait_client $CLIENT_NS1
+ wait_client $CLIENT_NS2
+ stop_server
return $RET
}
testup() {
- ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null
+ ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=1
echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y "
do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; }
echo "[PASS]"
@@ -123,7 +140,7 @@ testup() {
do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; }
echo "[PASS]"
- ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null
+ ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=0
echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N "
do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; }
echo "[PASS]"
@@ -160,7 +177,7 @@ testup() {
do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; }
echo "[PASS]"
- echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N "
+ echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, Y "
do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; }
echo "[PASS]"
}
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh
deleted file mode 100644
index 2070b57849de..000000000000
--- a/tools/testing/selftests/net/setup_loopback.sh
+++ /dev/null
@@ -1,120 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
-readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
-readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
-readonly HARD_IRQS="$(< ${IRQ_PATH})"
-readonly server_ns=$(mktemp -u server-XXXXXXXX)
-readonly client_ns=$(mktemp -u client-XXXXXXXX)
-
-netdev_check_for_carrier() {
- local -r dev="$1"
-
- for i in {1..5}; do
- carrier="$(cat /sys/class/net/${dev}/carrier)"
- if [[ "${carrier}" -ne 1 ]] ; then
- echo "carrier not ready yet..." >&2
- sleep 1
- else
- echo "carrier ready" >&2
- break
- fi
- done
- echo "${carrier}"
-}
-
-# Assumes that there is no existing ipvlan device on the physical device
-setup_loopback_environment() {
- local dev="$1"
-
- # Fail hard if cannot turn on loopback mode for current NIC
- ethtool -K "${dev}" loopback on || exit 1
- sleep 1
-
- # Check for the carrier
- carrier=$(netdev_check_for_carrier ${dev})
- if [[ "${carrier}" -ne 1 ]] ; then
- echo "setup_loopback_environment failed"
- exit 1
- fi
-}
-
-setup_macvlan_ns(){
- local -r link_dev="$1"
- local -r ns_name="$2"
- local -r ns_dev="$3"
- local -r ns_mac="$4"
- local -r addr="$5"
-
- ip link add link "${link_dev}" dev "${ns_dev}" \
- address "${ns_mac}" type macvlan
- exit_code=$?
- if [[ "${exit_code}" -ne 0 ]]; then
- echo "setup_macvlan_ns failed"
- exit $exit_code
- fi
-
- [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
- ip link set dev "${ns_dev}" netns "${ns_name}"
- ip -netns "${ns_name}" link set dev "${ns_dev}" up
- if [[ -n "${addr}" ]]; then
- ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}"
- fi
-
- sleep 1
-}
-
-cleanup_macvlan_ns(){
- while (( $# >= 2 )); do
- ns_name="$1"
- ns_dev="$2"
- ip -netns "${ns_name}" link del dev "${ns_dev}"
- ip netns del "${ns_name}"
- shift 2
- done
-}
-
-cleanup_loopback(){
- local -r dev="$1"
-
- ethtool -K "${dev}" loopback off
- sleep 1
-
- # Check for the carrier
- carrier=$(netdev_check_for_carrier ${dev})
- if [[ "${carrier}" -ne 1 ]] ; then
- echo "setup_loopback_environment failed"
- exit 1
- fi
-}
-
-setup_interrupt() {
- # Use timer on host to trigger the network stack
- # Also disable device interrupt to not depend on NIC interrupt
- # Reduce test flakiness caused by unexpected interrupts
- echo 100000 >"${FLUSH_PATH}"
- echo 50 >"${IRQ_PATH}"
-}
-
-setup_ns() {
- # Set up server_ns namespace and client_ns namespace
- setup_macvlan_ns "${dev}" ${server_ns} server "${SERVER_MAC}"
- setup_macvlan_ns "${dev}" ${client_ns} client "${CLIENT_MAC}"
-}
-
-cleanup_ns() {
- cleanup_macvlan_ns ${server_ns} server ${client_ns} client
-}
-
-setup() {
- setup_loopback_environment "${dev}"
- setup_interrupt
-}
-
-cleanup() {
- cleanup_loopback "${dev}"
-
- echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
- echo "${HARD_IRQS}" >"${IRQ_PATH}"
-}
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh
deleted file mode 100644
index 1f78a87f6f37..000000000000
--- a/tools/testing/selftests/net/setup_veth.sh
+++ /dev/null
@@ -1,44 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-readonly server_ns=$(mktemp -u server-XXXXXXXX)
-readonly client_ns=$(mktemp -u client-XXXXXXXX)
-
-setup_veth_ns() {
- local -r link_dev="$1"
- local -r ns_name="$2"
- local -r ns_dev="$3"
- local -r ns_mac="$4"
-
- [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
- echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
- ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535
- ip -netns "${ns_name}" link set dev "${ns_dev}" up
-
- ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off
-}
-
-setup_ns() {
- # Set up server_ns namespace and client_ns namespace
- ip link add name server type veth peer name client
-
- setup_veth_ns "${dev}" ${server_ns} server "${SERVER_MAC}"
- setup_veth_ns "${dev}" ${client_ns} client "${CLIENT_MAC}"
-}
-
-cleanup_ns() {
- local ns_name
-
- for ns_name in ${client_ns} ${server_ns}; do
- [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}"
- done
-}
-
-setup() {
- # no global init setup step needed
- :
-}
-
-cleanup() {
- cleanup_ns
-}
diff --git a/tools/testing/selftests/net/sk_so_peek_off.c b/tools/testing/selftests/net/sk_so_peek_off.c
index d87dd8d8d491..2a3f5c604f52 100644
--- a/tools/testing/selftests/net/sk_so_peek_off.c
+++ b/tools/testing/selftests/net/sk_so_peek_off.c
@@ -8,7 +8,7 @@
#include <sys/types.h>
#include <netinet/in.h>
#include <arpa/inet.h>
-#include "../kselftest.h"
+#include "kselftest.h"
static char *afstr(int af, int proto)
{
diff --git a/tools/testing/selftests/net/skf_net_off.c b/tools/testing/selftests/net/skf_net_off.c
new file mode 100644
index 000000000000..1fdf61d6cd7f
--- /dev/null
+++ b/tools/testing/selftests/net/skf_net_off.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Open a tun device.
+ *
+ * [modifications: use IFF_NAPI_FRAGS, add sk filter]
+ *
+ * Expects the device to have been configured previously, e.g.:
+ * sudo ip tuntap add name tap1 mode tap
+ * sudo ip link set tap1 up
+ * sudo ip link set dev tap1 addr 02:00:00:00:00:01
+ * sudo ip -6 addr add fdab::1 peer fdab::2 dev tap1 nodad
+ *
+ * And to avoid premature pskb_may_pull:
+ *
+ * sudo ethtool -K tap1 gro off
+ * sudo bash -c 'echo 0 > /proc/sys/net/ipv4/ip_early_demux'
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if.h>
+#include <linux/if_packet.h>
+#include <linux/if_tun.h>
+#include <linux/ipv6.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/poll.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+static bool cfg_do_filter;
+static bool cfg_do_frags;
+static int cfg_dst_port = 8000;
+static char *cfg_ifname;
+
+static int tun_open(const char *tun_name)
+{
+ struct ifreq ifr = {0};
+ int fd, ret;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (fd == -1)
+ error(1, errno, "open /dev/net/tun");
+
+ ifr.ifr_flags = IFF_TAP;
+ if (cfg_do_frags)
+ ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS;
+
+ strncpy(ifr.ifr_name, tun_name, IFNAMSIZ - 1);
+
+ ret = ioctl(fd, TUNSETIFF, &ifr);
+ if (ret)
+ error(1, ret, "ioctl TUNSETIFF");
+
+ return fd;
+}
+
+static void sk_set_filter(int fd)
+{
+ const int offset_proto = offsetof(struct ip6_hdr, ip6_nxt);
+ const int offset_dport = sizeof(struct ip6_hdr) + offsetof(struct udphdr, dest);
+
+ /* Filter UDP packets with destination port cfg_dst_port */
+ struct sock_filter filter_code[] = {
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_NET_OFF + offset_proto),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 2),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, SKF_NET_OFF + offset_dport),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dst_port, 1, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+ };
+
+ struct sock_fprog filter = {
+ sizeof(filter_code) / sizeof(filter_code[0]),
+ filter_code,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter)))
+ error(1, errno, "setsockopt attach filter");
+}
+
+static int raw_open(void)
+{
+ int fd;
+
+ fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP);
+ if (fd == -1)
+ error(1, errno, "socket raw (udp)");
+
+ if (cfg_do_filter)
+ sk_set_filter(fd);
+
+ return fd;
+}
+
+static void tun_write(int fd)
+{
+ const char eth_src[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 };
+ const char eth_dst[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 };
+ struct tun_pi pi = {0};
+ struct ipv6hdr ip6h = {0};
+ struct udphdr uh = {0};
+ struct ethhdr eth = {0};
+ uint32_t payload;
+ struct iovec iov[5];
+ int ret;
+
+ pi.proto = htons(ETH_P_IPV6);
+
+ memcpy(eth.h_source, eth_src, sizeof(eth_src));
+ memcpy(eth.h_dest, eth_dst, sizeof(eth_dst));
+ eth.h_proto = htons(ETH_P_IPV6);
+
+ ip6h.version = 6;
+ ip6h.payload_len = htons(sizeof(uh) + sizeof(uint32_t));
+ ip6h.nexthdr = IPPROTO_UDP;
+ ip6h.hop_limit = 8;
+ if (inet_pton(AF_INET6, "fdab::2", &ip6h.saddr) != 1)
+ error(1, errno, "inet_pton src");
+ if (inet_pton(AF_INET6, "fdab::1", &ip6h.daddr) != 1)
+ error(1, errno, "inet_pton src");
+
+ uh.source = htons(8000);
+ uh.dest = htons(cfg_dst_port);
+ uh.len = ip6h.payload_len;
+ uh.check = 0;
+
+ payload = htonl(0xABABABAB); /* Covered in IPv6 length */
+
+ iov[0].iov_base = &pi;
+ iov[0].iov_len = sizeof(pi);
+ iov[1].iov_base = &eth;
+ iov[1].iov_len = sizeof(eth);
+ iov[2].iov_base = &ip6h;
+ iov[2].iov_len = sizeof(ip6h);
+ iov[3].iov_base = &uh;
+ iov[3].iov_len = sizeof(uh);
+ iov[4].iov_base = &payload;
+ iov[4].iov_len = sizeof(payload);
+
+ ret = writev(fd, iov, sizeof(iov) / sizeof(iov[0]));
+ if (ret <= 0)
+ error(1, errno, "writev");
+}
+
+static void raw_read(int fd)
+{
+ struct timeval tv = { .tv_usec = 100 * 1000 };
+ struct msghdr msg = {0};
+ struct iovec iov[2];
+ struct udphdr uh;
+ uint32_t payload[2];
+ int ret;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+ error(1, errno, "setsockopt rcvtimeo udp");
+
+ iov[0].iov_base = &uh;
+ iov[0].iov_len = sizeof(uh);
+
+ iov[1].iov_base = payload;
+ iov[1].iov_len = sizeof(payload);
+
+ msg.msg_iov = iov;
+ msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]);
+
+ ret = recvmsg(fd, &msg, 0);
+ if (ret <= 0)
+ error(1, errno, "read raw");
+ if (ret != sizeof(uh) + sizeof(payload[0]))
+ error(1, errno, "read raw: len=%d\n", ret);
+
+ fprintf(stderr, "raw recv: 0x%x\n", payload[0]);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "fFi:")) != -1) {
+ switch (c) {
+ case 'f':
+ cfg_do_filter = true;
+ printf("bpf filter enabled\n");
+ break;
+ case 'F':
+ cfg_do_frags = true;
+ printf("napi frags mode enabled\n");
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ default:
+ error(1, 0, "unknown option %c", optopt);
+ break;
+ }
+ }
+
+ if (!cfg_ifname)
+ error(1, 0, "must specify tap interface name (-i)");
+}
+
+int main(int argc, char **argv)
+{
+ int fdt, fdr;
+
+ parse_opts(argc, argv);
+
+ fdr = raw_open();
+ fdt = tun_open(cfg_ifname);
+
+ tun_write(fdt);
+ raw_read(fdr);
+
+ if (close(fdt))
+ error(1, errno, "close tun");
+ if (close(fdr))
+ error(1, errno, "close udp");
+
+ fprintf(stderr, "OK\n");
+ return 0;
+}
+
diff --git a/tools/testing/selftests/net/skf_net_off.sh b/tools/testing/selftests/net/skf_net_off.sh
new file mode 100755
index 000000000000..5da5066fb465
--- /dev/null
+++ b/tools/testing/selftests/net/skf_net_off.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly NS="ns-$(mktemp -u XXXXXX)"
+
+cleanup() {
+ ip netns del $NS
+}
+
+ip netns add $NS
+trap cleanup EXIT
+
+ip -netns $NS link set lo up
+ip -netns $NS tuntap add name tap1 mode tap
+ip -netns $NS link set tap1 up
+ip -netns $NS link set dev tap1 addr 02:00:00:00:00:01
+ip -netns $NS -6 addr add fdab::1 peer fdab::2 dev tap1 nodad
+ip netns exec $NS ethtool -K tap1 gro off
+
+# disable early demux, else udp_v6_early_demux pulls udp header into linear
+ip netns exec $NS sysctl -w net.ipv4.ip_early_demux=0
+
+echo "no filter"
+ip netns exec $NS ./skf_net_off -i tap1
+
+echo "filter, linear skb (-f)"
+ip netns exec $NS ./skf_net_off -i tap1 -f
+
+echo "filter, fragmented skb (-f) (-F)"
+ip netns exec $NS ./skf_net_off -i tap1 -f -F
diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c
index e9fa14e10732..4740701f1a9a 100644
--- a/tools/testing/selftests/net/so_incoming_cpu.c
+++ b/tools/testing/selftests/net/so_incoming_cpu.c
@@ -9,7 +9,7 @@
#include <sys/socket.h>
#include <sys/sysinfo.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
FIXTURE(so_incoming_cpu)
{
diff --git a/tools/testing/selftests/net/so_rcv_listener.c b/tools/testing/selftests/net/so_rcv_listener.c
new file mode 100644
index 000000000000..bc5841192aa6
--- /dev/null
+++ b/tools/testing/selftests/net/so_rcv_listener.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <netdb.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+#ifndef SO_RCVPRIORITY
+#define SO_RCVPRIORITY 82
+#endif
+
+struct options {
+ __u32 val;
+ int name;
+ int rcvname;
+ const char *host;
+ const char *service;
+} opt;
+
+static void __attribute__((noreturn)) usage(const char *bin)
+{
+ printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin);
+ printf("Options:\n"
+ "\t\t-M val Test SO_RCVMARK\n"
+ "\t\t-P val Test SO_RCVPRIORITY\n"
+ "");
+ exit(EXIT_FAILURE);
+}
+
+static void parse_args(int argc, char *argv[])
+{
+ int o;
+
+ while ((o = getopt(argc, argv, "M:P:")) != -1) {
+ switch (o) {
+ case 'M':
+ opt.val = atoi(optarg);
+ opt.name = SO_MARK;
+ opt.rcvname = SO_RCVMARK;
+ break;
+ case 'P':
+ opt.val = atoi(optarg);
+ opt.name = SO_PRIORITY;
+ opt.rcvname = SO_RCVPRIORITY;
+ break;
+ default:
+ usage(argv[0]);
+ break;
+ }
+ }
+
+ if (optind != argc - 2)
+ usage(argv[0]);
+
+ opt.host = argv[optind];
+ opt.service = argv[optind + 1];
+}
+
+int main(int argc, char *argv[])
+{
+ int err = 0;
+ int recv_fd = -1;
+ int ret_value = 0;
+ __u32 recv_val;
+ struct cmsghdr *cmsg;
+ char cbuf[CMSG_SPACE(sizeof(__u32))];
+ char recv_buf[CMSG_SPACE(sizeof(__u32))];
+ struct iovec iov[1];
+ struct msghdr msg;
+ struct sockaddr_in recv_addr4;
+ struct sockaddr_in6 recv_addr6;
+
+ parse_args(argc, argv);
+
+ int family = strchr(opt.host, ':') ? AF_INET6 : AF_INET;
+
+ recv_fd = socket(family, SOCK_DGRAM, IPPROTO_UDP);
+ if (recv_fd < 0) {
+ perror("Can't open recv socket");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ err = setsockopt(recv_fd, SOL_SOCKET, opt.rcvname, &opt.val, sizeof(opt.val));
+ if (err < 0) {
+ perror("Recv setsockopt error");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ if (family == AF_INET) {
+ memset(&recv_addr4, 0, sizeof(recv_addr4));
+ recv_addr4.sin_family = family;
+ recv_addr4.sin_port = htons(atoi(opt.service));
+
+ if (inet_pton(family, opt.host, &recv_addr4.sin_addr) <= 0) {
+ perror("Invalid IPV4 address");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ err = bind(recv_fd, (struct sockaddr *)&recv_addr4, sizeof(recv_addr4));
+ } else {
+ memset(&recv_addr6, 0, sizeof(recv_addr6));
+ recv_addr6.sin6_family = family;
+ recv_addr6.sin6_port = htons(atoi(opt.service));
+
+ if (inet_pton(family, opt.host, &recv_addr6.sin6_addr) <= 0) {
+ perror("Invalid IPV6 address");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ err = bind(recv_fd, (struct sockaddr *)&recv_addr6, sizeof(recv_addr6));
+ }
+
+ if (err < 0) {
+ perror("Recv bind error");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ iov[0].iov_base = recv_buf;
+ iov[0].iov_len = sizeof(recv_buf);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+
+ err = recvmsg(recv_fd, &msg, 0);
+ if (err < 0) {
+ perror("Message receive error");
+ ret_value = -errno;
+ goto cleanup;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == opt.name) {
+ recv_val = *(__u32 *)CMSG_DATA(cmsg);
+ printf("Received value: %u\n", recv_val);
+
+ if (recv_val != opt.val) {
+ fprintf(stderr, "Error: expected value: %u, got: %u\n",
+ opt.val, recv_val);
+ ret_value = -EINVAL;
+ }
+ goto cleanup;
+ }
+ }
+
+ fprintf(stderr, "Error: No matching cmsg received\n");
+ ret_value = -ENOMSG;
+
+cleanup:
+ if (recv_fd >= 0)
+ close(recv_fd);
+
+ return ret_value;
+}
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index 8457b7ccbc09..b76df1efc2ef 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -174,7 +174,7 @@ static int do_recv_errqueue_timeout(int fdt)
msg.msg_controllen = sizeof(control);
while (1) {
- const char *reason;
+ const char *reason = NULL;
ret = recvmsg(fdt, &msg, MSG_ERRQUEUE);
if (ret == -1 && errno == EAGAIN)
diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c
index db1aeb8c5d1e..9e270548dad8 100644
--- a/tools/testing/selftests/net/socket.c
+++ b/tools/testing/selftests/net/socket.c
@@ -7,7 +7,7 @@
#include <sys/socket.h>
#include <netinet/in.h>
-#include "../kselftest.h"
+#include "kselftest.h"
struct socket_testcase {
int domain;
@@ -39,6 +39,7 @@ static int run_tests(void)
{
char err_string1[ERR_STRING_SZ];
char err_string2[ERR_STRING_SZ];
+ const char *msg1, *msg2;
int i, err;
err = 0;
@@ -56,13 +57,13 @@ static int run_tests(void)
errno == -s->expect)
continue;
- strerror_r(-s->expect, err_string1, ERR_STRING_SZ);
- strerror_r(errno, err_string2, ERR_STRING_SZ);
+ msg1 = strerror_r(-s->expect, err_string1, ERR_STRING_SZ);
+ msg2 = strerror_r(errno, err_string2, ERR_STRING_SZ);
fprintf(stderr, "socket(%d, %d, %d) expected "
"err (%s) got (%s)\n",
s->domain, s->type, s->protocol,
- err_string1, err_string2);
+ msg1, msg2);
err = -1;
break;
@@ -70,12 +71,12 @@ static int run_tests(void)
close(fd);
if (s->expect < 0) {
- strerror_r(errno, err_string1, ERR_STRING_SZ);
+ msg1 = strerror_r(errno, err_string1, ERR_STRING_SZ);
fprintf(stderr, "socket(%d, %d, %d) expected "
"success got err (%s)\n",
s->domain, s->type, s->protocol,
- err_string1);
+ msg1);
err = -1;
break;
diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
index 02d617040793..a5e959a080bb 100755
--- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -285,11 +285,6 @@ setup_hs()
ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
-
ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
ip -netns ${hsname} link set ${rtveth} netns ${rtname}
ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad
diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
index 79fb81e63c59..a649dba3cb77 100755
--- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -250,11 +250,6 @@ setup_hs()
eval local rtname=\${rt_${rid}}
local rtveth=veth-t${tid}
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
-
ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
ip -netns ${hsname} link set ${rtveth} netns ${rtname}
ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0
diff --git a/tools/testing/selftests/net/srv6_end_flavors_test.sh b/tools/testing/selftests/net/srv6_end_flavors_test.sh
index 50563443a4ad..318487eda671 100755
--- a/tools/testing/selftests/net/srv6_end_flavors_test.sh
+++ b/tools/testing/selftests/net/srv6_end_flavors_test.sh
@@ -399,7 +399,7 @@ __get_srv6_rtcfg_id()
# Given the description of a router <id:op> as an input, the function returns
# the <op> token which represents the operation (e.g. End behavior with or
-# withouth flavors) configured for the node.
+# without flavors) configured for the node.
# Note that when the operation represents an End behavior with a list of
# flavors, the output is the ordered version of that list.
@@ -480,7 +480,7 @@ setup_rt_local_sids()
# all SIDs start with a common locator. Routes and SRv6 Endpoint
- # behavior instaces are grouped together in the 'localsid' table.
+ # behavior instances are grouped together in the 'localsid' table.
ip -netns "${nsname}" -6 rule \
add to "${LOCATOR_SERVICE}::/16" \
lookup "${LOCALSID_TABLE_ID}" prio 999
diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
index 87e414cc417c..4bc135e5c22c 100755
--- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
@@ -245,10 +245,8 @@
# that adopted in the use cases already examined (of course, it is necessary to
# consider the different SIDs/C-SIDs).
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
readonly DUMMY_DEVNAME="dum0"
readonly VRF_TID=100
readonly VRF_DEVNAME="vrf-${VRF_TID}"
@@ -376,32 +374,18 @@ test_command_or_ksft_skip()
fi
}
-get_nodename()
-{
- local name="$1"
-
- echo "${name}-${RDMSUFF}"
-}
-
get_rtname()
{
local rtid="$1"
- get_nodename "rt-${rtid}"
+ echo "rt_${rtid}"
}
get_hsname()
{
local hsid="$1"
- get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
- local name="$1"
-
- ip netns add "${name}"
+ echo "hs_${hsid}"
}
create_router()
@@ -410,8 +394,7 @@ create_router()
local nsname
nsname="$(get_rtname "${rtid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
create_host()
@@ -420,28 +403,12 @@ create_host()
local nsname
nsname="$(get_hsname "${hsid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
cleanup()
{
- local nsname
- local i
-
- # destroy routers
- for i in ${ROUTERS}; do
- nsname="$(get_rtname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
- # destroy hosts
- for i in ${HOSTS}; do
- nsname="$(get_hsname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
+ cleanup_all_ns
# check whether the setup phase was completed successfully or not. In
# case of an error during the setup phase of the testing environment,
@@ -462,10 +429,10 @@ add_link_rt_pairs()
local nsname
local neigh_nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
- neigh_nsname="$(get_rtname "${neigh}")"
+ eval neigh_nsname=\${$(get_rtname "${neigh}")}
ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -497,7 +464,7 @@ setup_rt_networking()
local devname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -518,9 +485,6 @@ setup_rt_networking()
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
}
@@ -596,7 +560,7 @@ setup_rt_local_sids()
local lcnode_func_prefix
local lcblock_prefix
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -630,7 +594,7 @@ setup_rt_local_sids()
dev "${DUMMY_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
@@ -668,8 +632,8 @@ __setup_l3vpn()
local rtsrc_nsname
local rtdst_nsname
- rtsrc_nsname="$(get_rtname "${src}")"
- rtdst_nsname="$(get_rtname "${dst}")"
+ eval rtsrc_nsname=\${$(get_rtname "${src}")}
+ eval rtdst_nsname=\${$(get_rtname "${dst}")}
container="${LCBLOCK_ADDR}"
@@ -744,8 +708,8 @@ setup_hs()
local hsname
local rtname
- hsname="$(get_hsname "${hs}")"
- rtname="$(get_rtname "${rt}")"
+ eval hsname=\${$(get_hsname "${hs}")}
+ eval rtname=\${$(get_rtname "${rt}")}
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -791,11 +755,6 @@ setup_hs()
ip netns exec "${rtname}" \
sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec "${rtname}" \
- sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
}
@@ -880,7 +839,7 @@ check_rt_connectivity()
local prefix
local rtsrc_nsname
- rtsrc_nsname="$(get_rtname "${rtsrc}")"
+ eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
@@ -903,7 +862,7 @@ check_hs_ipv6_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -915,7 +874,7 @@ check_hs_ipv4_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
@@ -1025,7 +984,7 @@ rt_x_nextcsid_end_behavior_test()
local nsname
local ret
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
__nextcsid_end_behavior_test "${nsname}" "add" "${blen}" "${flen}"
ret="$?"
diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
index c79cb8ede17f..34b781a2ae74 100755
--- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
@@ -72,6 +72,9 @@
# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in
# the selftest network.
#
+# In addition, every router interface connecting rt-x to rt-y is assigned an
+# IPv6 link-local address fe80::x:y/64.
+#
# Local SID/C-SID table
# =====================
#
@@ -287,10 +290,8 @@
# packet using the SRv6 End.DT46 behavior (associated with the SID fcff:1::d46)
# and sends it to the host hs-1.
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
readonly DUMMY_DEVNAME="dum0"
readonly VRF_TID=100
readonly VRF_DEVNAME="vrf-${VRF_TID}"
@@ -418,32 +419,18 @@ test_command_or_ksft_skip()
fi
}
-get_nodename()
-{
- local name="$1"
-
- echo "${name}-${RDMSUFF}"
-}
-
get_rtname()
{
local rtid="$1"
- get_nodename "rt-${rtid}"
+ echo "rt_${rtid}"
}
get_hsname()
{
local hsid="$1"
- get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
- local name="$1"
-
- ip netns add "${name}"
+ echo "hs_${hsid}"
}
create_router()
@@ -452,15 +439,12 @@ create_router()
local nsname
nsname="$(get_rtname "${rtid}")"
+ setup_ns "${nsname}"
- __create_namespace "${nsname}"
-
+ eval nsname=\${$(get_rtname "${rtid}")}
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
}
@@ -470,29 +454,12 @@ create_host()
local nsname
nsname="$(get_hsname "${hsid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
cleanup()
{
- local nsname
- local i
-
- # destroy routers
- for i in ${ROUTERS}; do
- nsname="$(get_rtname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
- # destroy hosts
- for i in ${HOSTS}; do
- nsname="$(get_hsname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
+ cleanup_all_ns
# check whether the setup phase was completed successfully or not. In
# case of an error during the setup phase of the testing environment,
# the selftest is considered as "skipped".
@@ -512,10 +479,10 @@ add_link_rt_pairs()
local nsname
local neigh_nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
- neigh_nsname="$(get_rtname "${neigh}")"
+ eval neigh_nsname=\${$(get_rtname "${neigh}")}
ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -547,7 +514,7 @@ setup_rt_networking()
local devname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -557,6 +524,9 @@ setup_rt_networking()
ip -netns "${nsname}" addr \
add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+ ip -netns "${nsname}" addr \
+ add "fe80::${rt}:${neigh}/64" dev "${devname}" nodad
+
ip -netns "${nsname}" link set "${devname}" up
done
@@ -631,7 +601,7 @@ set_end_x_nextcsid()
local rt="$1"
local adj="$2"
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
net_prefix="$(get_network_prefix "${rt}" "${adj}")"
lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
@@ -645,12 +615,33 @@ set_end_x_nextcsid()
nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
}
+set_end_x_ll_nextcsid()
+{
+ local rt="$1"
+ local adj="$2"
+
+ eval nsname=\${$(get_rtname "${rt}")}
+ lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
+ nh6_ll_addr="fe80::${adj}:${rt}"
+ oifname="veth-rt-${rt}-${adj}"
+
+ # enabled NEXT-C-SID SRv6 End.X behavior via an IPv6 link-local nexthop
+ # address (note that "dev" is the dummy dum0 device chosen for the sake
+ # of simplicity).
+ ip -netns "${nsname}" -6 route \
+ replace "${lcnode_func_prefix}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End.X nh6 "${nh6_ll_addr}" \
+ oif "${oifname}" flavors next-csid lblen "${LCBLOCK_BLEN}" \
+ nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
+}
+
set_underlay_sids_reachability()
{
local rt="$1"
local rt_neighs="$2"
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -685,12 +676,12 @@ setup_rt_local_sids()
local lcnode_func_prefix
local lcblock_prefix
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
set_underlay_sids_reachability "${rt}" "${rt_neighs}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
@@ -728,8 +719,8 @@ __setup_l3vpn()
local rtsrc_nsname
local rtdst_nsname
- rtsrc_nsname="$(get_rtname "${src}")"
- rtdst_nsname="$(get_rtname "${dst}")"
+ eval rtsrc_nsname=\${$(get_rtname "${src}")}
+ eval rtdst_nsname=\${$(get_rtname "${dst}")}
container="${LCBLOCK_ADDR}"
@@ -804,8 +795,8 @@ setup_hs()
local hsname
local rtname
- hsname="$(get_hsname "${hs}")"
- rtname="$(get_rtname "${rt}")"
+ eval hsname=\${$(get_hsname "${hs}")}
+ eval rtname=\${$(get_rtname "${rt}")}
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -851,11 +842,6 @@ setup_hs()
ip netns exec "${rtname}" \
sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec "${rtname}" \
- sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
}
@@ -947,7 +933,7 @@ check_rt_connectivity()
local prefix
local rtsrc_nsname
- rtsrc_nsname="$(get_rtname "${rtsrc}")"
+ eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
@@ -970,7 +956,7 @@ check_hs_ipv6_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -982,7 +968,7 @@ check_hs_ipv4_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
@@ -1057,6 +1043,27 @@ host_vpn_tests()
check_and_log_hs_ipv4_connectivity 1 2
check_and_log_hs_ipv4_connectivity 2 1
+
+ # Setup the adjacencies in the SRv6 aware routers using IPv6 link-local
+ # addresses.
+ # - rt-3 SRv6 End.X adjacency with rt-4
+ # - rt-4 SRv6 End.X adjacency with rt-1
+ set_end_x_ll_nextcsid 3 4
+ set_end_x_ll_nextcsid 4 1
+
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6), link-local"
+
+ check_and_log_hs_ipv6_connectivity 1 2
+ check_and_log_hs_ipv6_connectivity 2 1
+
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4), link-local"
+
+ check_and_log_hs_ipv4_connectivity 1 2
+ check_and_log_hs_ipv4_connectivity 2 1
+
+ # Restore the previous adjacencies.
+ set_end_x_nextcsid 3 4
+ set_end_x_nextcsid 4 1
}
__nextcsid_end_x_behavior_test()
@@ -1093,7 +1100,7 @@ rt_x_nextcsid_end_x_behavior_test()
local nsname
local ret
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
__nextcsid_end_x_behavior_test "${nsname}" "add" "${blen}" "${flen}"
ret="$?"
diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
index 28a775654b92..6a68c7eff1dc 100755
--- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
@@ -166,10 +166,8 @@
# hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d)
#
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
readonly VRF_TID=100
readonly VRF_DEVNAME="vrf-${VRF_TID}"
readonly RT2HS_DEVNAME="veth-t${VRF_TID}"
@@ -248,32 +246,18 @@ test_command_or_ksft_skip()
fi
}
-get_nodename()
-{
- local name="$1"
-
- echo "${name}-${RDMSUFF}"
-}
-
get_rtname()
{
local rtid="$1"
- get_nodename "rt-${rtid}"
+ echo "rt_${rtid}"
}
get_hsname()
{
local hsid="$1"
- get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
- local name="$1"
-
- ip netns add "${name}"
+ echo "hs_${hsid}"
}
create_router()
@@ -282,8 +266,7 @@ create_router()
local nsname
nsname="$(get_rtname "${rtid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
create_host()
@@ -292,29 +275,12 @@ create_host()
local nsname
nsname="$(get_hsname "${hsid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
cleanup()
{
- local nsname
- local i
-
- # destroy routers
- for i in ${ROUTERS}; do
- nsname="$(get_rtname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
- # destroy hosts
- for i in ${HOSTS}; do
- nsname="$(get_hsname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
+ cleanup_all_ns
# check whether the setup phase was completed successfully or not. In
# case of an error during the setup phase of the testing environment,
# the selftest is considered as "skipped".
@@ -334,10 +300,10 @@ add_link_rt_pairs()
local nsname
local neigh_nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
- neigh_nsname="$(get_rtname "${neigh}")"
+ eval neigh_nsname=\${$(get_rtname "${neigh}")}
ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -369,7 +335,7 @@ setup_rt_networking()
local devname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -387,9 +353,6 @@ setup_rt_networking()
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
}
@@ -403,7 +366,7 @@ setup_rt_local_sids()
local nsname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -432,7 +395,7 @@ setup_rt_local_sids()
dev "${VRF_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behavior instaces are grouped together in the 'localsid'
+ # Endpoint behavior instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule \
add to "${VPN_LOCATOR_SERVICE}::/16" \
@@ -469,7 +432,7 @@ __setup_rt_policy()
local policy=''
local n
- nsname="$(get_rtname "${encap_rt}")"
+ eval nsname=\${$(get_rtname "${encap_rt}")}
for n in ${end_rts}; do
policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
@@ -516,8 +479,8 @@ setup_hs()
local hsname
local rtname
- hsname="$(get_hsname "${hs}")"
- rtname="$(get_rtname "${rt}")"
+ eval hsname=\${$(get_hsname "${hs}")}
+ eval rtname=\${$(get_rtname "${rt}")}
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -555,11 +518,6 @@ setup_hs()
ip netns exec "${rtname}" \
sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec "${rtname}" \
- sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
-
ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
}
@@ -656,7 +614,7 @@ check_rt_connectivity()
local prefix
local rtsrc_nsname
- rtsrc_nsname="$(get_rtname "${rtsrc}")"
+ eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
@@ -679,7 +637,7 @@ check_hs_ipv6_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -691,7 +649,7 @@ check_hs_ipv4_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
index cb4177d41b21..0979b5316fdf 100755
--- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
+++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
@@ -116,10 +116,8 @@
# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b)
#
-# Kselftest framework requirement - SKIP code is 4.
-readonly ksft_skip=4
+source lib.sh
-readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
readonly DUMMY_DEVNAME="dum0"
readonly RT2HS_DEVNAME="veth-hs"
readonly HS_VETH_NAME="veth0"
@@ -199,32 +197,18 @@ test_command_or_ksft_skip()
fi
}
-get_nodename()
-{
- local name="$1"
-
- echo "${name}-${RDMSUFF}"
-}
-
get_rtname()
{
local rtid="$1"
- get_nodename "rt-${rtid}"
+ echo "rt_${rtid}"
}
get_hsname()
{
local hsid="$1"
- get_nodename "hs-${hsid}"
-}
-
-__create_namespace()
-{
- local name="$1"
-
- ip netns add "${name}"
+ echo "hs_${hsid}"
}
create_router()
@@ -233,8 +217,7 @@ create_router()
local nsname
nsname="$(get_rtname "${rtid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
create_host()
@@ -243,28 +226,12 @@ create_host()
local nsname
nsname="$(get_hsname "${hsid}")"
-
- __create_namespace "${nsname}"
+ setup_ns "${nsname}"
}
cleanup()
{
- local nsname
- local i
-
- # destroy routers
- for i in ${ROUTERS}; do
- nsname="$(get_rtname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
-
- # destroy hosts
- for i in ${HOSTS}; do
- nsname="$(get_hsname "${i}")"
-
- ip netns del "${nsname}" &>/dev/null || true
- done
+ cleanup_all_ns
# check whether the setup phase was completed successfully or not. In
# case of an error during the setup phase of the testing environment,
@@ -285,10 +252,10 @@ add_link_rt_pairs()
local nsname
local neigh_nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
- neigh_nsname="$(get_rtname "${neigh}")"
+ eval neigh_nsname=\${$(get_rtname "${neigh}")}
ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
type veth peer name "veth-rt-${neigh}-${rt}" \
@@ -320,7 +287,7 @@ setup_rt_networking()
local devname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -341,9 +308,6 @@ setup_rt_networking()
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
-
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
}
@@ -357,7 +321,7 @@ setup_rt_local_sids()
local nsname
local neigh
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
for neigh in ${rt_neighs}; do
devname="veth-rt-${rt}-${neigh}"
@@ -379,7 +343,7 @@ setup_rt_local_sids()
encap seg6local action End dev "${DUMMY_DEVNAME}"
# all SIDs for VPNs start with a common locator. Routes and SRv6
- # Endpoint behaviors instaces are grouped together in the 'localsid'
+ # Endpoint behaviors instances are grouped together in the 'localsid'
# table.
ip -netns "${nsname}" -6 rule add \
to "${VPN_LOCATOR_SERVICE}::/16" \
@@ -407,7 +371,7 @@ __setup_rt_policy()
local policy=''
local n
- nsname="$(get_rtname "${encap_rt}")"
+ eval nsname=\${$(get_rtname "${encap_rt}")}
for n in ${end_rts}; do
policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
@@ -446,7 +410,7 @@ setup_decap()
local rt="$1"
local nsname
- nsname="$(get_rtname "${rt}")"
+ eval nsname=\${$(get_rtname "${rt}")}
# Local End.DX2 behavior
ip -netns "${nsname}" -6 route \
@@ -463,8 +427,8 @@ setup_hs()
local hsname
local rtname
- hsname="$(get_hsname "${hs}")"
- rtname="$(get_rtname "${rt}")"
+ eval hsname=\${$(get_hsname "${hs}")}
+ eval rtname=\${$(get_rtname "${rt}")}
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
@@ -486,11 +450,6 @@ setup_hs()
add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
-
- # disable the rp_filter otherwise the kernel gets confused about how
- # to route decap ipv4 packets.
- ip netns exec "${rtname}" \
- sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
}
# set an auto-generated mac address
@@ -508,7 +467,7 @@ set_mac_address()
local ifname="$4"
local nsname
- nsname=$(get_nodename "${nodename}")
+ eval nsname=\${${nodename}}
ip -netns "${nsname}" link set dev "${ifname}" down
@@ -532,7 +491,7 @@ set_host_l2peer()
local hssrc_name
local ipaddr
- hssrc_name="$(get_hsname "${hssrc}")"
+ eval hssrc_name=\${$(get_hsname "${hssrc}")}
if [ "${proto}" -eq 6 ]; then
ipaddr="${ipprefix}::${hsdst}"
@@ -562,7 +521,7 @@ setup_l2vpn()
local rtdst="${hsdst}"
# set fixed mac for source node and the neigh MAC address
- set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}"
+ set_mac_address "hs_${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}"
set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6
set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4
@@ -570,7 +529,7 @@ setup_l2vpn()
# to the mac address of the remote peer (L2 VPN destination host).
# Otherwise, traffic coming from the source host is dropped at the
# ingress router.
- set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}"
+ set_mac_address "rt_${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}"
# set the SRv6 Policies at the ingress router
setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \
@@ -647,7 +606,7 @@ check_rt_connectivity()
local prefix
local rtsrc_nsname
- rtsrc_nsname="$(get_rtname "${rtsrc}")"
+ eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")}
prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
@@ -670,7 +629,7 @@ check_hs_ipv6_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
@@ -682,7 +641,7 @@ check_hs_ipv4_connectivity()
local hsdst="$2"
local hssrc_nsname
- hssrc_nsname="$(get_hsname "${hssrc}")"
+ eval hssrc_nsname=\${$(get_hsname "${hssrc}")}
ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
diff --git a/tools/testing/selftests/net/tap.c b/tools/testing/selftests/net/tap.c
index 247c3b3ac1c9..9ec1c9b50e77 100644
--- a/tools/testing/selftests/net/tap.c
+++ b/tools/testing/selftests/net/tap.c
@@ -17,7 +17,7 @@
#include <linux/virtio_net.h>
#include <netinet/ip.h>
#include <netinet/udp.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static const char param_dev_tap_name[] = "xmacvtap0";
static const char param_dev_dummy_name[] = "xdummy0";
diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config
index 3605e38711cb..971cb6fa2d63 100644
--- a/tools/testing/selftests/net/tcp_ao/config
+++ b/tools/testing/selftests/net/tcp_ao/config
@@ -1,8 +1,8 @@
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_RMD160=y
CONFIG_CRYPTO_SHA1=y
-CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_NET_VRF=y
CONFIG_TCP_AO=y
diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c
index d418162d335f..93b61e9a36f1 100644
--- a/tools/testing/selftests/net/tcp_ao/connect-deny.c
+++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c
@@ -4,6 +4,7 @@
#include "aolib.h"
#define fault(type) (inj == FAULT_ ## type)
+static volatile int sk_pair;
static inline int test_add_key_maclen(int sk, const char *key, uint8_t maclen,
union tcp_addr in_addr, uint8_t prefix,
@@ -34,10 +35,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
const char *cnt_name, test_cnt cnt_expected,
fault_t inj)
{
- struct tcp_ao_counters ao_cnt1, ao_cnt2;
+ struct tcp_counters cnt1, cnt2;
uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */
+ test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected;
int lsk, err, sk = 0;
- time_t timeout;
lsk = test_listen_socket(this_ip_addr, port, 1);
@@ -46,21 +47,24 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
if (cnt_name)
before_cnt = netstat_get_one(cnt_name, NULL);
- if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt1))
- test_error("test_get_tcp_ao_counters()");
+ if (pwd && test_get_tcp_counters(lsk, &cnt1))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* preparations done */
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- err = test_wait_fd(lsk, timeout, 0);
+ err = test_skpair_wait_poll(lsk, 0, poll_cnt, &sk_pair);
if (err == -ETIMEDOUT) {
+ sk_pair = err;
if (!fault(TIMEOUT))
- test_fail("timed out for accept()");
+ test_fail("%s: timed out for accept()", tst_name);
+ } else if (err == -EKEYREJECTED) {
+ if (!fault(KEYREJECT))
+ test_fail("%s: key was rejected", tst_name);
} else if (err < 0) {
- test_error("test_wait_fd()");
+ test_error("test_skpair_wait_poll()");
} else {
if (fault(TIMEOUT))
- test_fail("ready to accept");
+ test_fail("%s: ready to accept", tst_name);
sk = accept(lsk, NULL, NULL);
if (sk < 0) {
@@ -72,13 +76,13 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
}
synchronize_threads(); /* before counter checks */
- if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2))
- test_error("test_get_tcp_ao_counters()");
+ if (pwd && test_get_tcp_counters(lsk, &cnt2))
+ test_error("test_get_tcp_counters()");
close(lsk);
if (pwd)
- test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+ test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
if (!cnt_name)
goto out;
@@ -109,7 +113,7 @@ static void *server_fn(void *arg)
try_accept("Non-AO server + AO client", port++, NULL,
this_ip_dest, -1, 100, 100, 0,
- "TCPAOKeyNotFound", 0, FAULT_TIMEOUT);
+ "TCPAOKeyNotFound", TEST_CNT_NS_KEY_NOT_FOUND, FAULT_TIMEOUT);
try_accept("AO server + Non-AO client", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0,
@@ -135,8 +139,9 @@ static void *server_fn(void *arg)
wrong_addr, -1, 100, 100, 0,
"TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT);
+ /* Key rejected by the other side, failing short through skpair */
try_accept("Client: Wrong addr", port++, NULL,
- this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_TIMEOUT);
+ this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_KEYREJECT);
try_accept("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 200, 100, 0,
@@ -163,8 +168,7 @@ static void try_connect(const char *tst_name, unsigned int port,
uint8_t sndid, uint8_t rcvid,
test_cnt cnt_expected, fault_t inj)
{
- struct tcp_ao_counters ao_cnt1, ao_cnt2;
- time_t timeout;
+ struct tcp_counters cnt1, cnt2;
int sk, ret;
sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
@@ -174,16 +178,15 @@ static void try_connect(const char *tst_name, unsigned int port,
if (pwd && test_add_key(sk, pwd, addr, prefix, sndid, rcvid))
test_error("setsockopt(TCP_AO_ADD_KEY)");
- if (pwd && test_get_tcp_ao_counters(sk, &ao_cnt1))
- test_error("test_get_tcp_ao_counters()");
+ if (pwd && test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* preparations done */
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
-
+ ret = test_skpair_connect_poll(sk, this_ip_dest, port, cnt_expected, &sk_pair);
synchronize_threads(); /* before counter checks */
if (ret < 0) {
+ sk_pair = ret;
if (fault(KEYREJECT) && ret == -EKEYREJECTED) {
test_ok("%s: connect() was prevented", tst_name);
} else if (ret == -ETIMEDOUT && fault(TIMEOUT)) {
@@ -202,9 +205,11 @@ static void try_connect(const char *tst_name, unsigned int port,
else
test_ok("%s: connected", tst_name);
if (pwd && ret > 0) {
- if (test_get_tcp_ao_counters(sk, &ao_cnt2))
- test_error("test_get_tcp_ao_counters()");
- test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
+ test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
+ } else if (pwd) {
+ test_tcp_counters_free(&cnt1);
}
out:
synchronize_threads(); /* close() */
@@ -241,6 +246,11 @@ static void *client_fn(void *arg)
try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD,
this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+ /*
+ * XXX: The test doesn't increase any counters, see tcp_make_synack().
+ * Potentially, it can be speed up by setting sk_pair = -ETIMEDOUT
+ * but the price would be increased complexity of the tracer thread.
+ */
trace_ao_event_sk_expect(TCP_AO_SYNACK_NO_KEY, this_ip_dest, addr_any,
port, 0, 100, 100);
try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD,
diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c
index f1d8d29e393f..340f00e979ea 100644
--- a/tools/testing/selftests/net/tcp_ao/connect.c
+++ b/tools/testing/selftests/net/tcp_ao/connect.c
@@ -35,7 +35,7 @@ static void *client_fn(void *arg)
uint64_t before_aogood, after_aogood;
const size_t nr_packets = 20;
struct netstat *ns_before, *ns_after;
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters ao1, ao2;
if (sk < 0)
test_error("socket()");
@@ -50,18 +50,18 @@ static void *client_fn(void *arg)
ns_before = netstat_read();
before_aogood = netstat_get(ns_before, "TCPAOGood", NULL);
- if (test_get_tcp_ao_counters(sk, &ao1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &ao1))
+ test_error("test_get_tcp_counters()");
- if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, 100, nr_packets)) {
test_fail("verify failed");
return NULL;
}
ns_after = netstat_read();
after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &ao2))
+ test_error("test_get_tcp_counters()");
netstat_print_diff(ns_before, ns_after);
netstat_free(ns_before);
netstat_free(ns_after);
@@ -71,14 +71,14 @@ static void *client_fn(void *arg)
nr_packets, after_aogood, before_aogood);
return NULL;
}
- if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD))
+ if (test_assert_counters("connect", &ao1, &ao2, TEST_CNT_GOOD))
return NULL;
test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %zu",
- before_aogood, ao1.ao_info_pkt_good,
- ao1.key_cnts[0].pkt_good,
- after_aogood, ao2.ao_info_pkt_good,
- ao2.key_cnts[0].pkt_good,
+ before_aogood, ao1.ao.ao_info_pkt_good,
+ ao1.ao.key_cnts[0].pkt_good,
+ after_aogood, ao2.ao.ao_info_pkt_good,
+ ao2.ao.key_cnts[0].pkt_good,
nr_packets);
return NULL;
}
diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
index a1614f0d8c44..85c1a1e958c6 100644
--- a/tools/testing/selftests/net/tcp_ao/icmps-discard.c
+++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
@@ -53,7 +53,7 @@ static void serve_interfered(int sk)
ssize_t test_quota = packet_size * packets_nr * 10;
uint64_t dest_unreach_a, dest_unreach_b;
uint64_t icmp_ignored_a, icmp_ignored_b;
- struct tcp_ao_counters ao_cnt1, ao_cnt2;
+ struct tcp_counters cnt1, cnt2;
bool counter_not_found;
struct netstat *ns_after, *ns_before;
ssize_t bytes;
@@ -61,16 +61,16 @@ static void serve_interfered(int sk)
ns_before = netstat_read();
dest_unreach_a = netstat_get(ns_before, dst_unreach, NULL);
icmp_ignored_a = netstat_get(ns_before, tcpao_icmps, NULL);
- if (test_get_tcp_ao_counters(sk, &ao_cnt1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
bytes = test_server_run(sk, test_quota, 0);
ns_after = netstat_read();
netstat_print_diff(ns_before, ns_after);
dest_unreach_b = netstat_get(ns_after, dst_unreach, NULL);
icmp_ignored_b = netstat_get(ns_after, tcpao_icmps,
&counter_not_found);
- if (test_get_tcp_ao_counters(sk, &ao_cnt2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
netstat_free(ns_before);
netstat_free(ns_after);
@@ -91,9 +91,9 @@ static void serve_interfered(int sk)
return;
}
#ifdef TEST_ICMPS_ACCEPT
- test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD);
+ test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD);
#else
- test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP);
+ test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP);
#endif
if (icmp_ignored_a >= icmp_ignored_b) {
test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64,
@@ -395,7 +395,6 @@ static void icmp_interfere(const size_t nr, uint32_t rcv_nxt, void *src, void *d
static void send_interfered(int sk)
{
- const unsigned int timeout = TEST_TIMEOUT_SEC;
struct sockaddr_in6 src, dst;
socklen_t addr_sz;
@@ -409,7 +408,7 @@ static void send_interfered(int sk)
while (1) {
uint32_t rcv_nxt;
- if (test_client_verify(sk, packet_size, packets_nr, timeout)) {
+ if (test_client_verify(sk, packet_size, packets_nr)) {
test_fail("client: connection is broken");
return;
}
diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c
index d4385b52c10b..69d9a7a05d5c 100644
--- a/tools/testing/selftests/net/tcp_ao/key-management.c
+++ b/tools/testing/selftests/net/tcp_ao/key-management.c
@@ -629,11 +629,11 @@ static int key_collection_socket(bool server, unsigned int port)
}
static void verify_counters(const char *tst_name, bool is_listen_sk, bool server,
- struct tcp_ao_counters *a, struct tcp_ao_counters *b)
+ struct tcp_counters *a, struct tcp_counters *b)
{
unsigned int i;
- __test_tcp_ao_counters_cmp(tst_name, a, b, TEST_CNT_GOOD);
+ test_assert_counters_sk(tst_name, a, b, TEST_CNT_GOOD);
for (i = 0; i < collection.nr_keys; i++) {
struct test_key *key = &collection.keys[i];
@@ -652,12 +652,12 @@ static void verify_counters(const char *tst_name, bool is_listen_sk, bool server
rx_cnt_expected = key->used_on_server_tx;
}
- test_tcp_ao_key_counters_cmp(tst_name, a, b,
- rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0,
- sndid, rcvid);
+ test_assert_counters_key(tst_name, &a->ao, &b->ao,
+ rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0,
+ sndid, rcvid);
}
- test_tcp_ao_counters_free(a);
- test_tcp_ao_counters_free(b);
+ test_tcp_counters_free(a);
+ test_tcp_counters_free(b);
test_ok("%s: passed counters checks", tst_name);
}
@@ -791,17 +791,17 @@ out:
}
static int start_server(const char *tst_name, unsigned int port, size_t quota,
- struct tcp_ao_counters *begin,
+ struct tcp_counters *begin,
unsigned int current_index, unsigned int rnext_index)
{
- struct tcp_ao_counters lsk_c1, lsk_c2;
+ struct tcp_counters lsk_c1, lsk_c2;
ssize_t bytes;
int sk, lsk;
synchronize_threads(); /* 1: key collection initialized */
lsk = key_collection_socket(true, port);
- if (test_get_tcp_ao_counters(lsk, &lsk_c1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(lsk, &lsk_c1))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 2: MKTs added => connect() */
if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
test_error("test_wait_fd()");
@@ -809,12 +809,12 @@ static int start_server(const char *tst_name, unsigned int port, size_t quota,
sk = accept(lsk, NULL, NULL);
if (sk < 0)
test_error("accept()");
- if (test_get_tcp_ao_counters(sk, begin))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, begin))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 3: accepted => send data */
- if (test_get_tcp_ao_counters(lsk, &lsk_c2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(lsk, &lsk_c2))
+ test_error("test_get_tcp_counters()");
verify_keys(tst_name, lsk, true, true);
close(lsk);
@@ -830,12 +830,12 @@ static int start_server(const char *tst_name, unsigned int port, size_t quota,
}
static void end_server(const char *tst_name, int sk,
- struct tcp_ao_counters *begin)
+ struct tcp_counters *begin)
{
- struct tcp_ao_counters end;
+ struct tcp_counters end;
- if (test_get_tcp_ao_counters(sk, &end))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &end))
+ test_error("test_get_tcp_counters()");
verify_keys(tst_name, sk, false, true);
synchronize_threads(); /* 4: verified => closed */
@@ -848,7 +848,7 @@ static void end_server(const char *tst_name, int sk,
static void try_server_run(const char *tst_name, unsigned int port, size_t quota,
unsigned int current_index, unsigned int rnext_index)
{
- struct tcp_ao_counters tmp;
+ struct tcp_counters tmp;
int sk;
sk = start_server(tst_name, port, quota, &tmp,
@@ -860,7 +860,7 @@ static void server_rotations(const char *tst_name, unsigned int port,
size_t quota, unsigned int rotations,
unsigned int current_index, unsigned int rnext_index)
{
- struct tcp_ao_counters tmp;
+ struct tcp_counters tmp;
unsigned int i;
int sk;
@@ -886,7 +886,7 @@ static void server_rotations(const char *tst_name, unsigned int port,
static int run_client(const char *tst_name, unsigned int port,
unsigned int nr_keys, int current_index, int rnext_index,
- struct tcp_ao_counters *before,
+ struct tcp_counters *before,
const size_t msg_sz, const size_t msg_nr)
{
int sk;
@@ -904,8 +904,8 @@ static int run_client(const char *tst_name, unsigned int port,
if (test_set_key(sk, sndid, rcvid))
test_error("failed to set current/rnext keys");
}
- if (before && test_get_tcp_ao_counters(sk, before))
- test_error("test_get_tcp_ao_counters()");
+ if (before && test_get_tcp_counters(sk, before))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 2: MKTs added => connect() */
if (test_connect_socket(sk, this_ip_dest, port++) <= 0)
@@ -918,11 +918,11 @@ static int run_client(const char *tst_name, unsigned int port,
collection.keys[rnext_index].used_on_server_tx = 1;
synchronize_threads(); /* 3: accepted => send data */
- if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, msg_sz, msg_nr)) {
test_fail("verify failed");
close(sk);
if (before)
- test_tcp_ao_counters_free(before);
+ test_tcp_counters_free(before);
return -1;
}
@@ -931,7 +931,7 @@ static int run_client(const char *tst_name, unsigned int port,
static int start_client(const char *tst_name, unsigned int port,
unsigned int nr_keys, int current_index, int rnext_index,
- struct tcp_ao_counters *before,
+ struct tcp_counters *before,
const size_t msg_sz, const size_t msg_nr)
{
if (init_default_key_collection(nr_keys, true))
@@ -943,9 +943,9 @@ static int start_client(const char *tst_name, unsigned int port,
static void end_client(const char *tst_name, int sk, unsigned int nr_keys,
int current_index, int rnext_index,
- struct tcp_ao_counters *start)
+ struct tcp_counters *start)
{
- struct tcp_ao_counters end;
+ struct tcp_counters end;
/* Some application may become dependent on this kernel choice */
if (current_index < 0)
@@ -955,8 +955,8 @@ static void end_client(const char *tst_name, int sk, unsigned int nr_keys,
verify_current_rnext(tst_name, sk,
collection.keys[current_index].client_keyid,
collection.keys[rnext_index].server_keyid);
- if (start && test_get_tcp_ao_counters(sk, &end))
- test_error("test_get_tcp_ao_counters()");
+ if (start && test_get_tcp_counters(sk, &end))
+ test_error("test_get_tcp_counters()");
verify_keys(tst_name, sk, false, false);
synchronize_threads(); /* 4: verify => closed */
close(sk);
@@ -1016,7 +1016,7 @@ static void try_unmatched_keys(int sk, int *rnext_index, unsigned int port)
trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_addr, this_ip_dest,
-1, port, 0, -1, -1, -1, -1, -1,
-1, key->server_keyid, -1);
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk, msg_len, nr_packets))
test_fail("verify failed");
*rnext_index = i;
}
@@ -1048,7 +1048,7 @@ static void check_current_back(const char *tst_name, unsigned int port,
unsigned int current_index, unsigned int rnext_index,
unsigned int rotate_to_index)
{
- struct tcp_ao_counters tmp;
+ struct tcp_counters tmp;
int sk;
sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
@@ -1061,7 +1061,7 @@ static void check_current_back(const char *tst_name, unsigned int port,
port, -1, 0, -1, -1, -1, -1, -1,
collection.keys[rotate_to_index].client_keyid,
collection.keys[current_index].client_keyid, -1);
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk, msg_len, nr_packets))
test_fail("verify failed");
/* There is a race here: between setting the current_key with
* setsockopt(TCP_AO_INFO) and starting to send some data - there
@@ -1081,7 +1081,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
unsigned int nr_keys, unsigned int rotations,
unsigned int current_index, unsigned int rnext_index)
{
- struct tcp_ao_counters tmp;
+ struct tcp_counters tmp;
unsigned int i;
int sk;
@@ -1099,10 +1099,10 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
collection.keys[i].server_keyid, -1);
if (test_set_key(sk, -1, collection.keys[i].server_keyid))
test_error("Can't change the Rnext key");
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, msg_len, nr_packets)) {
test_fail("verify failed");
close(sk);
- test_tcp_ao_counters_free(&tmp);
+ test_tcp_counters_free(&tmp);
return;
}
verify_current_rnext(tst_name, sk, -1,
@@ -1116,7 +1116,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port,
static void try_client_run(const char *tst_name, unsigned int port,
unsigned int nr_keys, int current_index, int rnext_index)
{
- struct tcp_ao_counters tmp;
+ struct tcp_counters tmp;
int sk;
sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
index 5db2f65cddc4..ebb2899c12fe 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/aolib.h
+++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
@@ -289,7 +289,7 @@ extern int link_set_up(const char *intf);
extern const unsigned int test_server_port;
extern int test_wait_fd(int sk, time_t sec, bool write);
extern int __test_connect_socket(int sk, const char *device,
- void *addr, size_t addr_sz, time_t timeout);
+ void *addr, size_t addr_sz, bool async);
extern int __test_listen_socket(int backlog, void *addr, size_t addr_sz);
static inline int test_listen_socket(const union tcp_addr taddr,
@@ -331,25 +331,26 @@ static inline int test_listen_socket(const union tcp_addr taddr,
* If set to 0 - kernel will try to retransmit SYN number of times, set in
* /proc/sys/net/ipv4/tcp_syn_retries
* By default set to 1 to make tests pass faster on non-busy machine.
+ * [in process of removal, don't use in new tests]
*/
#ifndef TEST_RETRANSMIT_SEC
#define TEST_RETRANSMIT_SEC 1
#endif
static inline int _test_connect_socket(int sk, const union tcp_addr taddr,
- unsigned int port, time_t timeout)
+ unsigned int port, bool async)
{
sockaddr_af addr;
tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port));
return __test_connect_socket(sk, veth_name,
- (void *)&addr, sizeof(addr), timeout);
+ (void *)&addr, sizeof(addr), async);
}
static inline int test_connect_socket(int sk, const union tcp_addr taddr,
unsigned int port)
{
- return _test_connect_socket(sk, taddr, port, TEST_TIMEOUT_SEC);
+ return _test_connect_socket(sk, taddr, port, false);
}
extern int __test_set_md5(int sk, void *addr, size_t addr_sz,
@@ -483,10 +484,7 @@ static inline int test_set_ao_flags(int sk, bool ao_required, bool accept_icmps)
}
extern ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec);
-extern ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
- const size_t msg_len, time_t timeout_sec);
-extern int test_client_verify(int sk, const size_t msg_len, const size_t nr,
- time_t timeout_sec);
+extern int test_client_verify(int sk, const size_t msg_len, const size_t nr);
struct tcp_ao_key_counters {
uint8_t sndid;
@@ -512,7 +510,15 @@ struct tcp_ao_counters {
size_t nr_keys;
struct tcp_ao_key_counters *key_cnts;
};
-extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out);
+
+struct tcp_counters {
+ struct tcp_ao_counters ao;
+ uint64_t netns_md5_notfound;
+ uint64_t netns_md5_unexpected;
+ uint64_t netns_md5_failure;
+};
+
+extern int test_get_tcp_counters(int sk, struct tcp_counters *out);
#define TEST_CNT_KEY_GOOD BIT(0)
#define TEST_CNT_KEY_BAD BIT(1)
@@ -526,8 +532,31 @@ extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out);
#define TEST_CNT_NS_KEY_NOT_FOUND BIT(9)
#define TEST_CNT_NS_AO_REQUIRED BIT(10)
#define TEST_CNT_NS_DROPPED_ICMP BIT(11)
+#define TEST_CNT_NS_MD5_NOT_FOUND BIT(12)
+#define TEST_CNT_NS_MD5_UNEXPECTED BIT(13)
+#define TEST_CNT_NS_MD5_FAILURE BIT(14)
typedef uint16_t test_cnt;
+#define _for_each_counter(f) \
+do { \
+ /* per-netns */ \
+ f(ao.netns_ao_good, TEST_CNT_NS_GOOD); \
+ f(ao.netns_ao_bad, TEST_CNT_NS_BAD); \
+ f(ao.netns_ao_key_not_found, TEST_CNT_NS_KEY_NOT_FOUND); \
+ f(ao.netns_ao_required, TEST_CNT_NS_AO_REQUIRED); \
+ f(ao.netns_ao_dropped_icmp, TEST_CNT_NS_DROPPED_ICMP); \
+ /* per-socket */ \
+ f(ao.ao_info_pkt_good, TEST_CNT_SOCK_GOOD); \
+ f(ao.ao_info_pkt_bad, TEST_CNT_SOCK_BAD); \
+ f(ao.ao_info_pkt_key_not_found, TEST_CNT_SOCK_KEY_NOT_FOUND); \
+ f(ao.ao_info_pkt_ao_required, TEST_CNT_SOCK_AO_REQUIRED); \
+ f(ao.ao_info_pkt_dropped_icmp, TEST_CNT_SOCK_DROPPED_ICMP); \
+ /* non-AO */ \
+ f(netns_md5_notfound, TEST_CNT_NS_MD5_NOT_FOUND); \
+ f(netns_md5_unexpected, TEST_CNT_NS_MD5_UNEXPECTED); \
+ f(netns_md5_failure, TEST_CNT_NS_MD5_FAILURE); \
+} while (0)
+
#define TEST_CNT_AO_GOOD (TEST_CNT_SOCK_GOOD | TEST_CNT_NS_GOOD)
#define TEST_CNT_AO_BAD (TEST_CNT_SOCK_BAD | TEST_CNT_NS_BAD)
#define TEST_CNT_AO_KEY_NOT_FOUND (TEST_CNT_SOCK_KEY_NOT_FOUND | \
@@ -539,34 +568,71 @@ typedef uint16_t test_cnt;
#define TEST_CNT_GOOD (TEST_CNT_KEY_GOOD | TEST_CNT_AO_GOOD)
#define TEST_CNT_BAD (TEST_CNT_KEY_BAD | TEST_CNT_AO_BAD)
-extern int __test_tcp_ao_counters_cmp(const char *tst_name,
- struct tcp_ao_counters *before, struct tcp_ao_counters *after,
+extern test_cnt test_cmp_counters(struct tcp_counters *before,
+ struct tcp_counters *after);
+extern int test_assert_counters_sk(const char *tst_name,
+ struct tcp_counters *before, struct tcp_counters *after,
test_cnt expected);
-extern int test_tcp_ao_key_counters_cmp(const char *tst_name,
+extern int test_assert_counters_key(const char *tst_name,
struct tcp_ao_counters *before, struct tcp_ao_counters *after,
test_cnt expected, int sndid, int rcvid);
-extern void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts);
+extern void test_tcp_counters_free(struct tcp_counters *cnts);
+
+/*
+ * Polling for netns and socket counters during select()/connect() and also
+ * client/server messaging. Instead of constant timeout on underlying select(),
+ * check the counters and return early. This allows to pass the tests where
+ * timeout is expected without waiting for that fixing timeout (tests speed-up).
+ * Previously shorter timeouts were used for tests expecting to time out,
+ * but that leaded to sporadic false positives on counter checks failures,
+ * as one second timeouts aren't enough for TCP retransmit.
+ *
+ * Two sides of the socketpair (client/server) should synchronize failures
+ * using a shared variable *err, so that they can detect the other side's
+ * failure.
+ */
+extern int test_skpair_wait_poll(int sk, bool write, test_cnt cond,
+ volatile int *err);
+extern int _test_skpair_connect_poll(int sk, const char *device,
+ void *addr, size_t addr_sz,
+ test_cnt cond, volatile int *err);
+static inline int test_skpair_connect_poll(int sk, const union tcp_addr taddr,
+ unsigned int port,
+ test_cnt cond, volatile int *err)
+{
+ sockaddr_af addr;
+
+ tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port));
+ return _test_skpair_connect_poll(sk, veth_name,
+ (void *)&addr, sizeof(addr), cond, err);
+}
+
+extern int test_skpair_client(int sk, const size_t msg_len, const size_t nr,
+ test_cnt cond, volatile int *err);
+extern int test_skpair_server(int sk, ssize_t quota,
+ test_cnt cond, volatile int *err);
+
/*
- * Frees buffers allocated in test_get_tcp_ao_counters().
+ * Frees buffers allocated in test_get_tcp_counters().
* The function doesn't expect new keys or keys removed between calls
- * to test_get_tcp_ao_counters(). Check key counters manually if they
+ * to test_get_tcp_counters(). Check key counters manually if they
* may change.
*/
-static inline int test_tcp_ao_counters_cmp(const char *tst_name,
- struct tcp_ao_counters *before,
- struct tcp_ao_counters *after,
- test_cnt expected)
+static inline int test_assert_counters(const char *tst_name,
+ struct tcp_counters *before,
+ struct tcp_counters *after,
+ test_cnt expected)
{
int ret;
- ret = __test_tcp_ao_counters_cmp(tst_name, before, after, expected);
+ ret = test_assert_counters_sk(tst_name, before, after, expected);
if (ret)
goto out;
- ret = test_tcp_ao_key_counters_cmp(tst_name, before, after,
- expected, -1, -1);
+ ret = test_assert_counters_key(tst_name, &before->ao, &after->ao,
+ expected, -1, -1);
out:
- test_tcp_ao_counters_free(before);
- test_tcp_ao_counters_free(after);
+ test_tcp_counters_free(before);
+ test_tcp_counters_free(after);
return ret;
}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
index 24380c68fec6..27403f875054 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c
@@ -427,11 +427,8 @@ static void dump_trace_event(struct expected_trace_point *e)
test_print("trace event filter %s [%s:%d => %s:%d, L3index %d, flags: %s%s%s%s%s, keyid: %d, rnext: %d, maclen: %d, sne: %d] = %zu",
trace_event_names[e->type],
src, e->src_port, dst, e->dst_port, e->L3index,
- (e->fin > 0) ? "F" : (e->fin == 0) ? "!F" : "",
- (e->syn > 0) ? "S" : (e->syn == 0) ? "!S" : "",
- (e->rst > 0) ? "R" : (e->rst == 0) ? "!R" : "",
- (e->psh > 0) ? "P" : (e->psh == 0) ? "!P" : "",
- (e->ack > 0) ? "." : (e->ack == 0) ? "!." : "",
+ e->fin ? "F" : "", e->syn ? "S" : "", e->rst ? "R" : "",
+ e->psh ? "P" : "", e->ack ? "." : "",
e->keyid, e->rnext, e->maclen, e->sne, e->matched);
}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c
index a27cc03c9fbd..49aec2922a31 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/setup.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c
@@ -9,7 +9,7 @@
* Can't be included in the header: it defines static variables which
* will be unique to every object. Let's include it only once here.
*/
-#include "../../../kselftest.h"
+#include "kselftest.h"
/* Prevent overriding of one thread's output by another */
static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER;
diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c
index 0ffda966c677..ef8e9031d47a 100644
--- a/tools/testing/selftests/net/tcp_ao/lib/sock.c
+++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c
@@ -34,10 +34,8 @@ int __test_listen_socket(int backlog, void *addr, size_t addr_sz)
return sk;
}
-int test_wait_fd(int sk, time_t sec, bool write)
+static int __test_wait_fd(int sk, struct timeval *tv, bool write)
{
- struct timeval tv = { .tv_sec = sec };
- struct timeval *ptv = NULL;
fd_set fds, efds;
int ret;
socklen_t slen = sizeof(ret);
@@ -47,14 +45,11 @@ int test_wait_fd(int sk, time_t sec, bool write)
FD_ZERO(&efds);
FD_SET(sk, &efds);
- if (sec)
- ptv = &tv;
-
errno = 0;
if (write)
- ret = select(sk + 1, NULL, &fds, &efds, ptv);
+ ret = select(sk + 1, NULL, &fds, &efds, tv);
else
- ret = select(sk + 1, &fds, NULL, &efds, ptv);
+ ret = select(sk + 1, &fds, NULL, &efds, tv);
if (ret < 0)
return -errno;
if (ret == 0) {
@@ -69,8 +64,54 @@ int test_wait_fd(int sk, time_t sec, bool write)
return 0;
}
+int test_wait_fd(int sk, time_t sec, bool write)
+{
+ struct timeval tv = { .tv_sec = sec, };
+
+ return __test_wait_fd(sk, sec ? &tv : NULL, write);
+}
+
+static bool __skpair_poll_should_stop(int sk, struct tcp_counters *c,
+ test_cnt condition)
+{
+ struct tcp_counters c2;
+ test_cnt diff;
+
+ if (test_get_tcp_counters(sk, &c2))
+ test_error("test_get_tcp_counters()");
+
+ diff = test_cmp_counters(c, &c2);
+ test_tcp_counters_free(&c2);
+ return (diff & condition) == condition;
+}
+
+/* How often wake up and check netns counters & paired (*err) */
+#define POLL_USEC 150
+static int __test_skpair_poll(int sk, bool write, uint64_t timeout,
+ struct tcp_counters *c, test_cnt cond,
+ volatile int *err)
+{
+ uint64_t t;
+
+ for (t = 0; t <= timeout * 1000000; t += POLL_USEC) {
+ struct timeval tv = { .tv_usec = POLL_USEC, };
+ int ret;
+
+ ret = __test_wait_fd(sk, &tv, write);
+ if (ret != -ETIMEDOUT)
+ return ret;
+ if (c && cond && __skpair_poll_should_stop(sk, c, cond))
+ break;
+ if (err && *err)
+ return *err;
+ }
+ if (err)
+ *err = -ETIMEDOUT;
+ return -ETIMEDOUT;
+}
+
int __test_connect_socket(int sk, const char *device,
- void *addr, size_t addr_sz, time_t timeout)
+ void *addr, size_t addr_sz, bool async)
{
long flags;
int err;
@@ -82,15 +123,6 @@ int __test_connect_socket(int sk, const char *device,
test_error("setsockopt(SO_BINDTODEVICE, %s)", device);
}
- if (!timeout) {
- err = connect(sk, addr, addr_sz);
- if (err) {
- err = -errno;
- goto out;
- }
- return 0;
- }
-
flags = fcntl(sk, F_GETFL);
if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0))
test_error("fcntl()");
@@ -100,9 +132,9 @@ int __test_connect_socket(int sk, const char *device,
err = -errno;
goto out;
}
- if (timeout < 0)
+ if (async)
return sk;
- err = test_wait_fd(sk, timeout, 1);
+ err = test_wait_fd(sk, TEST_TIMEOUT_SEC, 1);
if (err)
goto out;
}
@@ -113,6 +145,45 @@ out:
return err;
}
+int test_skpair_wait_poll(int sk, bool write,
+ test_cnt cond, volatile int *err)
+{
+ struct tcp_counters c;
+ int ret;
+
+ *err = 0;
+ if (test_get_tcp_counters(sk, &c))
+ test_error("test_get_tcp_counters()");
+ synchronize_threads(); /* 1: init skpair & read nscounters */
+
+ ret = __test_skpair_poll(sk, write, TEST_TIMEOUT_SEC, &c, cond, err);
+ test_tcp_counters_free(&c);
+ return ret;
+}
+
+int _test_skpair_connect_poll(int sk, const char *device,
+ void *addr, size_t addr_sz,
+ test_cnt condition, volatile int *err)
+{
+ struct tcp_counters c;
+ int ret;
+
+ *err = 0;
+ if (test_get_tcp_counters(sk, &c))
+ test_error("test_get_tcp_counters()");
+ synchronize_threads(); /* 1: init skpair & read nscounters */
+ ret = __test_connect_socket(sk, device, addr, addr_sz, true);
+ if (ret < 0) {
+ test_tcp_counters_free(&c);
+ return (*err = ret);
+ }
+ ret = __test_skpair_poll(sk, 1, TEST_TIMEOUT_SEC, &c, condition, err);
+ if (ret < 0)
+ close(sk);
+ test_tcp_counters_free(&c);
+ return ret;
+}
+
int __test_set_md5(int sk, void *addr, size_t addr_sz, uint8_t prefix,
int vrf, const char *password)
{
@@ -333,12 +404,12 @@ do { \
return 0;
}
-int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
+int test_get_tcp_counters(int sk, struct tcp_counters *out)
{
struct tcp_ao_getsockopt *key_dump;
socklen_t key_dump_sz = sizeof(*key_dump);
struct tcp_ao_info_opt info = {};
- bool c1, c2, c3, c4, c5;
+ bool c1, c2, c3, c4, c5, c6, c7, c8;
struct netstat *ns;
int err, nr_keys;
@@ -346,25 +417,30 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
/* per-netns */
ns = netstat_read();
- out->netns_ao_good = netstat_get(ns, "TCPAOGood", &c1);
- out->netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2);
- out->netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3);
- out->netns_ao_required = netstat_get(ns, "TCPAORequired", &c4);
- out->netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5);
+ out->ao.netns_ao_good = netstat_get(ns, "TCPAOGood", &c1);
+ out->ao.netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2);
+ out->ao.netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3);
+ out->ao.netns_ao_required = netstat_get(ns, "TCPAORequired", &c4);
+ out->ao.netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5);
+ out->netns_md5_notfound = netstat_get(ns, "TCPMD5NotFound", &c6);
+ out->netns_md5_unexpected = netstat_get(ns, "TCPMD5Unexpected", &c7);
+ out->netns_md5_failure = netstat_get(ns, "TCPMD5Failure", &c8);
netstat_free(ns);
- if (c1 || c2 || c3 || c4 || c5)
+ if (c1 || c2 || c3 || c4 || c5 || c6 || c7 || c8)
return -EOPNOTSUPP;
err = test_get_ao_info(sk, &info);
+ if (err == -ENOENT)
+ return 0;
if (err)
return err;
/* per-socket */
- out->ao_info_pkt_good = info.pkt_good;
- out->ao_info_pkt_bad = info.pkt_bad;
- out->ao_info_pkt_key_not_found = info.pkt_key_not_found;
- out->ao_info_pkt_ao_required = info.pkt_ao_required;
- out->ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp;
+ out->ao.ao_info_pkt_good = info.pkt_good;
+ out->ao.ao_info_pkt_bad = info.pkt_bad;
+ out->ao.ao_info_pkt_key_not_found = info.pkt_key_not_found;
+ out->ao.ao_info_pkt_ao_required = info.pkt_ao_required;
+ out->ao.ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp;
/* per-key */
nr_keys = test_get_ao_keys_nr(sk);
@@ -372,7 +448,7 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
return nr_keys;
if (nr_keys == 0)
test_error("test_get_ao_keys_nr() == 0");
- out->nr_keys = (size_t)nr_keys;
+ out->ao.nr_keys = (size_t)nr_keys;
key_dump = calloc(nr_keys, key_dump_sz);
if (!key_dump)
return -errno;
@@ -386,72 +462,84 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
return -errno;
}
- out->key_cnts = calloc(nr_keys, sizeof(out->key_cnts[0]));
- if (!out->key_cnts) {
+ out->ao.key_cnts = calloc(nr_keys, sizeof(out->ao.key_cnts[0]));
+ if (!out->ao.key_cnts) {
free(key_dump);
return -errno;
}
while (nr_keys--) {
- out->key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid;
- out->key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid;
- out->key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good;
- out->key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad;
+ out->ao.key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid;
+ out->ao.key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid;
+ out->ao.key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good;
+ out->ao.key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad;
}
free(key_dump);
return 0;
}
-int __test_tcp_ao_counters_cmp(const char *tst_name,
- struct tcp_ao_counters *before,
- struct tcp_ao_counters *after,
- test_cnt expected)
+test_cnt test_cmp_counters(struct tcp_counters *before,
+ struct tcp_counters *after)
+{
+#define __cmp(cnt, e_cnt) \
+do { \
+ if (before->cnt > after->cnt) \
+ test_error("counter " __stringify(cnt) " decreased"); \
+ if (before->cnt != after->cnt) \
+ ret |= e_cnt; \
+} while (0)
+
+ test_cnt ret = 0;
+ size_t i;
+
+ if (before->ao.nr_keys != after->ao.nr_keys)
+ test_error("the number of keys has changed");
+
+ _for_each_counter(__cmp);
+
+ i = before->ao.nr_keys;
+ while (i--) {
+ __cmp(ao.key_cnts[i].pkt_good, TEST_CNT_KEY_GOOD);
+ __cmp(ao.key_cnts[i].pkt_bad, TEST_CNT_KEY_BAD);
+ }
+#undef __cmp
+ return ret;
+}
+
+int test_assert_counters_sk(const char *tst_name,
+ struct tcp_counters *before,
+ struct tcp_counters *after,
+ test_cnt expected)
{
-#define __cmp_ao(cnt, expecting_inc) \
+#define __cmp_ao(cnt, e_cnt) \
do { \
if (before->cnt > after->cnt) { \
test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64, \
- tst_name ?: "", before->cnt, after->cnt); \
+ tst_name ?: "", before->cnt, after->cnt); \
return -1; \
} \
- if ((before->cnt != after->cnt) != (expecting_inc)) { \
+ if ((before->cnt != after->cnt) != !!(expected & e_cnt)) { \
test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64, \
- tst_name ?: "", (expecting_inc) ? "" : "not ", \
+ tst_name ?: "", (expected & e_cnt) ? "" : "not ", \
before->cnt, after->cnt); \
return -1; \
} \
-} while(0)
+} while (0)
errno = 0;
- /* per-netns */
- __cmp_ao(netns_ao_good, !!(expected & TEST_CNT_NS_GOOD));
- __cmp_ao(netns_ao_bad, !!(expected & TEST_CNT_NS_BAD));
- __cmp_ao(netns_ao_key_not_found,
- !!(expected & TEST_CNT_NS_KEY_NOT_FOUND));
- __cmp_ao(netns_ao_required, !!(expected & TEST_CNT_NS_AO_REQUIRED));
- __cmp_ao(netns_ao_dropped_icmp,
- !!(expected & TEST_CNT_NS_DROPPED_ICMP));
- /* per-socket */
- __cmp_ao(ao_info_pkt_good, !!(expected & TEST_CNT_SOCK_GOOD));
- __cmp_ao(ao_info_pkt_bad, !!(expected & TEST_CNT_SOCK_BAD));
- __cmp_ao(ao_info_pkt_key_not_found,
- !!(expected & TEST_CNT_SOCK_KEY_NOT_FOUND));
- __cmp_ao(ao_info_pkt_ao_required, !!(expected & TEST_CNT_SOCK_AO_REQUIRED));
- __cmp_ao(ao_info_pkt_dropped_icmp,
- !!(expected & TEST_CNT_SOCK_DROPPED_ICMP));
+ _for_each_counter(__cmp_ao);
return 0;
#undef __cmp_ao
}
-int test_tcp_ao_key_counters_cmp(const char *tst_name,
- struct tcp_ao_counters *before,
- struct tcp_ao_counters *after,
- test_cnt expected,
- int sndid, int rcvid)
+int test_assert_counters_key(const char *tst_name,
+ struct tcp_ao_counters *before,
+ struct tcp_ao_counters *after,
+ test_cnt expected, int sndid, int rcvid)
{
size_t i;
-#define __cmp_ao(i, cnt, expecting_inc) \
+#define __cmp_ao(i, cnt, e_cnt) \
do { \
if (before->key_cnts[i].cnt > after->key_cnts[i].cnt) { \
test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64 " for key %u:%u", \
@@ -461,16 +549,16 @@ do { \
before->key_cnts[i].rcvid); \
return -1; \
} \
- if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != (expecting_inc)) { \
+ if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != !!(expected & e_cnt)) { \
test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64 " for key %u:%u", \
- tst_name ?: "", (expecting_inc) ? "" : "not ",\
+ tst_name ?: "", (expected & e_cnt) ? "" : "not ",\
before->key_cnts[i].cnt, \
after->key_cnts[i].cnt, \
before->key_cnts[i].sndid, \
before->key_cnts[i].rcvid); \
return -1; \
} \
-} while(0)
+} while (0)
if (before->nr_keys != after->nr_keys) {
test_fail("%s: Keys changed on the socket %zu != %zu",
@@ -485,20 +573,22 @@ do { \
continue;
if (rcvid >= 0 && before->key_cnts[i].rcvid != rcvid)
continue;
- __cmp_ao(i, pkt_good, !!(expected & TEST_CNT_KEY_GOOD));
- __cmp_ao(i, pkt_bad, !!(expected & TEST_CNT_KEY_BAD));
+ __cmp_ao(i, pkt_good, TEST_CNT_KEY_GOOD);
+ __cmp_ao(i, pkt_bad, TEST_CNT_KEY_BAD);
}
return 0;
#undef __cmp_ao
}
-void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts)
+void test_tcp_counters_free(struct tcp_counters *cnts)
{
- free(cnts->key_cnts);
+ free(cnts->ao.key_cnts);
}
#define TEST_BUF_SIZE 4096
-ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
+static ssize_t _test_server_run(int sk, ssize_t quota, struct tcp_counters *c,
+ test_cnt cond, volatile int *err,
+ time_t timeout_sec)
{
ssize_t total = 0;
@@ -507,7 +597,7 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
ssize_t bytes, sent;
int ret;
- ret = test_wait_fd(sk, timeout_sec, 0);
+ ret = __test_skpair_poll(sk, 0, timeout_sec, c, cond, err);
if (ret)
return ret;
@@ -518,7 +608,7 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
if (bytes == 0)
break;
- ret = test_wait_fd(sk, timeout_sec, 1);
+ ret = __test_skpair_poll(sk, 1, timeout_sec, c, cond, err);
if (ret)
return ret;
@@ -533,13 +623,41 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
return total;
}
-ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
- const size_t msg_len, time_t timeout_sec)
+ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
+{
+ return _test_server_run(sk, quota, NULL, 0, NULL,
+ timeout_sec ?: TEST_TIMEOUT_SEC);
+}
+
+int test_skpair_server(int sk, ssize_t quota, test_cnt cond, volatile int *err)
+{
+ struct tcp_counters c;
+ ssize_t ret;
+
+ *err = 0;
+ if (test_get_tcp_counters(sk, &c))
+ test_error("test_get_tcp_counters()");
+ synchronize_threads(); /* 1: init skpair & read nscounters */
+
+ ret = _test_server_run(sk, quota, &c, cond, err, TEST_TIMEOUT_SEC);
+ test_tcp_counters_free(&c);
+ return ret;
+}
+
+static ssize_t test_client_loop(int sk, size_t buf_sz, const size_t msg_len,
+ struct tcp_counters *c, test_cnt cond,
+ volatile int *err)
{
char msg[msg_len];
int nodelay = 1;
+ char *buf;
size_t i;
+ buf = alloca(buf_sz);
+ if (!buf)
+ return -ENOMEM;
+ randomize_buffer(buf, buf_sz);
+
if (setsockopt(sk, IPPROTO_TCP, TCP_NODELAY, &nodelay, sizeof(nodelay)))
test_error("setsockopt(TCP_NODELAY)");
@@ -547,7 +665,7 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
size_t sent, bytes = min(msg_len, buf_sz - i);
int ret;
- ret = test_wait_fd(sk, timeout_sec, 1);
+ ret = __test_skpair_poll(sk, 1, TEST_TIMEOUT_SEC, c, cond, err);
if (ret)
return ret;
@@ -561,7 +679,8 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
do {
ssize_t got;
- ret = test_wait_fd(sk, timeout_sec, 0);
+ ret = __test_skpair_poll(sk, 0, TEST_TIMEOUT_SEC,
+ c, cond, err);
if (ret)
return ret;
@@ -580,15 +699,31 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
return i;
}
-int test_client_verify(int sk, const size_t msg_len, const size_t nr,
- time_t timeout_sec)
+int test_client_verify(int sk, const size_t msg_len, const size_t nr)
{
size_t buf_sz = msg_len * nr;
- char *buf = alloca(buf_sz);
ssize_t ret;
- randomize_buffer(buf, buf_sz);
- ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec);
+ ret = test_client_loop(sk, buf_sz, msg_len, NULL, 0, NULL);
+ if (ret < 0)
+ return (int)ret;
+ return ret != buf_sz ? -1 : 0;
+}
+
+int test_skpair_client(int sk, const size_t msg_len, const size_t nr,
+ test_cnt cond, volatile int *err)
+{
+ struct tcp_counters c;
+ size_t buf_sz = msg_len * nr;
+ ssize_t ret;
+
+ *err = 0;
+ if (test_get_tcp_counters(sk, &c))
+ test_error("test_get_tcp_counters()");
+ synchronize_threads(); /* 1: init skpair & read nscounters */
+
+ ret = test_client_loop(sk, buf_sz, msg_len, &c, cond, err);
+ test_tcp_counters_free(&c);
if (ret < 0)
return (int)ret;
return ret != buf_sz ? -1 : 0;
diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c
index ecc6f1e3a414..9a059b6c4523 100644
--- a/tools/testing/selftests/net/tcp_ao/restore.c
+++ b/tools/testing/selftests/net/tcp_ao/restore.c
@@ -16,11 +16,11 @@ const size_t quota = nr_packets * msg_len;
static void try_server_run(const char *tst_name, unsigned int port,
fault_t inj, test_cnt cnt_expected)
{
+ test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected;
const char *cnt_name = "TCPAOGood";
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
uint64_t before_cnt, after_cnt;
- int sk, lsk;
- time_t timeout;
+ int sk, lsk, dummy;
ssize_t bytes;
if (fault(TIMEOUT))
@@ -48,11 +48,10 @@ static void try_server_run(const char *tst_name, unsigned int port,
}
before_cnt = netstat_get_one(cnt_name, NULL);
- if (test_get_tcp_ao_counters(sk, &ao1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- bytes = test_server_run(sk, quota, timeout);
+ bytes = test_skpair_server(sk, quota, poll_cnt, &dummy);
if (fault(TIMEOUT)) {
if (bytes > 0)
test_fail("%s: server served: %zd", tst_name, bytes);
@@ -65,17 +64,17 @@ static void try_server_run(const char *tst_name, unsigned int port,
test_ok("%s: server alive", tst_name);
}
synchronize_threads(); /* 3: counters checks */
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
after_cnt = netstat_get_one(cnt_name, NULL);
- test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
+ test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
if (after_cnt <= before_cnt) {
- test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
- tst_name, cnt_name, after_cnt, before_cnt);
+ test_fail("%s(server): %s counter did not increase: %" PRIu64 " <= %" PRIu64,
+ tst_name, cnt_name, after_cnt, before_cnt);
} else {
- test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64,
+ test_ok("%s(server): counter %s increased %" PRIu64 " => %" PRIu64,
tst_name, cnt_name, before_cnt, after_cnt);
}
@@ -92,16 +91,16 @@ static void *server_fn(void *arg)
{
unsigned int port = test_server_port;
- try_server_run("TCP-AO migrate to another socket", port++,
+ try_server_run("TCP-AO migrate to another socket (server)", port++,
0, TEST_CNT_GOOD);
- try_server_run("TCP-AO with wrong send ISN", port++,
+ try_server_run("TCP-AO with wrong send ISN (server)", port++,
FAULT_TIMEOUT, TEST_CNT_BAD);
- try_server_run("TCP-AO with wrong receive ISN", port++,
+ try_server_run("TCP-AO with wrong receive ISN (server)", port++,
FAULT_TIMEOUT, TEST_CNT_BAD);
- try_server_run("TCP-AO with wrong send SEQ ext number", port++,
+ try_server_run("TCP-AO with wrong send SEQ ext number (server)", port++,
FAULT_TIMEOUT, TEST_CNT_BAD);
- try_server_run("TCP-AO with wrong receive SEQ ext number", port++,
- FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD);
+ try_server_run("TCP-AO with wrong receive SEQ ext number (server)",
+ port++, FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD);
synchronize_threads(); /* don't race to exit: client exits */
return NULL;
@@ -125,7 +124,7 @@ static void test_get_sk_checkpoint(unsigned int server_port, sockaddr_af *saddr,
test_error("failed to connect()");
synchronize_threads(); /* 2: accepted => send data */
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk, msg_len, nr_packets))
test_fail("pre-migrate verify failed");
test_enable_repair(sk);
@@ -139,11 +138,11 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port,
struct tcp_ao_repair *ao_img,
fault_t inj, test_cnt cnt_expected)
{
+ test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected;
const char *cnt_name = "TCPAOGood";
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
uint64_t before_cnt, after_cnt;
- time_t timeout;
- int sk;
+ int sk, dummy;
if (fault(TIMEOUT))
cnt_name = "TCPAOBad";
@@ -159,30 +158,30 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port,
test_error("setsockopt(TCP_AO_ADD_KEY)");
test_ao_restore(sk, ao_img);
- if (test_get_tcp_ao_counters(sk, &ao1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
test_disable_repair(sk);
test_sock_state_free(img);
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- if (test_client_verify(sk, msg_len, nr_packets, timeout)) {
+ if (test_skpair_client(sk, msg_len, nr_packets, poll_cnt, &dummy)) {
if (fault(TIMEOUT))
test_ok("%s: post-migrate connection is broken", tst_name);
else
test_fail("%s: post-migrate connection is working", tst_name);
} else {
if (fault(TIMEOUT))
- test_fail("%s: post-migrate connection still working", tst_name);
+ test_fail("%s: post-migrate connection is working", tst_name);
else
test_ok("%s: post-migrate connection is alive", tst_name);
}
+
synchronize_threads(); /* 3: counters checks */
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
after_cnt = netstat_get_one(cnt_name, NULL);
- test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
+ test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
if (after_cnt <= before_cnt) {
test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64,
@@ -203,7 +202,7 @@ static void *client_fn(void *arg)
sockaddr_af saddr;
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
- test_sk_restore("TCP-AO migrate to another socket", port++,
+ test_sk_restore("TCP-AO migrate to another socket (client)", port++,
&saddr, &tcp_img, &ao_img, 0, TEST_CNT_GOOD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
@@ -212,7 +211,7 @@ static void *client_fn(void *arg)
-1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
- test_sk_restore("TCP-AO with wrong send ISN", port++,
+ test_sk_restore("TCP-AO with wrong send ISN (client)", port++,
&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
@@ -221,7 +220,7 @@ static void *client_fn(void *arg)
-1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
- test_sk_restore("TCP-AO with wrong receive ISN", port++,
+ test_sk_restore("TCP-AO with wrong receive ISN (client)", port++,
&saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
@@ -229,8 +228,8 @@ static void *client_fn(void *arg)
trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest,
-1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1);
/* not expecting server => client mismatches as only snd sne is broken */
- test_sk_restore("TCP-AO with wrong send SEQ ext number", port++,
- &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
+ test_sk_restore("TCP-AO with wrong send SEQ ext number (client)",
+ port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
TEST_CNT_NS_BAD | TEST_CNT_GOOD);
test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
@@ -238,8 +237,8 @@ static void *client_fn(void *arg)
/* not expecting client => server mismatches as only rcv sne is broken */
trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr,
port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1);
- test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++,
- &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
+ test_sk_restore("TCP-AO with wrong receive SEQ ext number (client)",
+ port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
TEST_CNT_NS_GOOD | TEST_CNT_BAD);
return NULL;
diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c
index 6364facaa63e..883cddf377cf 100644
--- a/tools/testing/selftests/net/tcp_ao/rst.c
+++ b/tools/testing/selftests/net/tcp_ao/rst.c
@@ -84,15 +84,15 @@ static void close_forced(int sk)
static void test_server_active_rst(unsigned int port)
{
- struct tcp_ao_counters cnt1, cnt2;
+ struct tcp_counters cnt1, cnt2;
ssize_t bytes;
int sk, lsk;
lsk = test_listen_socket(this_ip_addr, port, backlog);
if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
test_error("setsockopt(TCP_AO_ADD_KEY)");
- if (test_get_tcp_ao_counters(lsk, &cnt1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(lsk, &cnt1))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 1: MKT added */
if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
@@ -103,8 +103,8 @@ static void test_server_active_rst(unsigned int port)
test_error("accept()");
synchronize_threads(); /* 2: connection accept()ed, another queued */
- if (test_get_tcp_ao_counters(lsk, &cnt2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(lsk, &cnt2))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 3: close listen socket */
close(lsk);
@@ -120,7 +120,7 @@ static void test_server_active_rst(unsigned int port)
synchronize_threads(); /* 5: closed active sk */
synchronize_threads(); /* 6: counters checks */
- if (test_tcp_ao_counters_cmp("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD))
+ if (test_assert_counters("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD))
test_fail("MKT counters (server) have not only good packets");
else
test_ok("MKT counters are good on server");
@@ -128,7 +128,7 @@ static void test_server_active_rst(unsigned int port)
static void test_server_passive_rst(unsigned int port)
{
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
int sk, lsk;
ssize_t bytes;
@@ -147,8 +147,8 @@ static void test_server_passive_rst(unsigned int port)
synchronize_threads(); /* 2: accepted => send data */
close(lsk);
- if (test_get_tcp_ao_counters(sk, &ao1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
if (bytes != quota) {
@@ -160,12 +160,12 @@ static void test_server_passive_rst(unsigned int port)
synchronize_threads(); /* 3: checkpoint the client */
synchronize_threads(); /* 4: close the server, creating twsk */
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
close(sk);
synchronize_threads(); /* 5: restore the socket, send more data */
- test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD);
+ test_assert_counters("passive RST server", &cnt1, &cnt2, TEST_CNT_GOOD);
synchronize_threads(); /* 6: server exits */
}
@@ -271,8 +271,7 @@ static void test_client_active_rst(unsigned int port)
synchronize_threads(); /* 1: MKT added */
for (i = 0; i < last; i++) {
- err = _test_connect_socket(sk[i], this_ip_dest, port,
- (i == 0) ? TEST_TIMEOUT_SEC : -1);
+ err = _test_connect_socket(sk[i], this_ip_dest, port, i != 0);
if (err < 0)
test_error("failed to connect()");
}
@@ -283,12 +282,12 @@ static void test_client_active_rst(unsigned int port)
test_error("test_wait_fds(): %d", err);
/* async connect() with third sk to get into request_sock_queue */
- err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
+ err = _test_connect_socket(sk[last], this_ip_dest, port, 1);
if (err < 0)
test_error("failed to connect()");
synchronize_threads(); /* 3: close listen socket */
- if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk[0], packet_sz, quota / packet_sz))
test_fail("Failed to send data on connected socket");
else
test_ok("Verified established tcp connection");
@@ -323,7 +322,7 @@ static void test_client_active_rst(unsigned int port)
static void test_client_passive_rst(unsigned int port)
{
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
struct tcp_ao_repair ao_img;
struct tcp_sock_state img;
sockaddr_af saddr;
@@ -341,7 +340,7 @@ static void test_client_passive_rst(unsigned int port)
test_error("failed to connect()");
synchronize_threads(); /* 2: accepted => send data */
- if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk, packet_sz, quota / packet_sz))
test_fail("Failed to send data on connected socket");
else
test_ok("Verified established tcp connection");
@@ -397,8 +396,8 @@ static void test_client_passive_rst(unsigned int port)
test_error("setsockopt(TCP_AO_ADD_KEY)");
test_ao_restore(sk, &ao_img);
- if (test_get_tcp_ao_counters(sk, &ao1))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt1))
+ test_error("test_get_tcp_counters()");
test_disable_repair(sk);
test_sock_state_free(&img);
@@ -417,7 +416,7 @@ static void test_client_passive_rst(unsigned int port)
* IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0,
* options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0
*/
- err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC);
+ err = test_client_verify(sk, packet_sz, quota / packet_sz);
/* Make sure that the connection was reset, not timeouted */
if (err && err == -ECONNRESET)
test_ok("client sock was passively reset post-seq-adjust");
@@ -426,12 +425,12 @@ static void test_client_passive_rst(unsigned int port)
else
test_fail("client sock is yet connected post-seq-adjust");
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* 6: server exits */
close(sk);
- test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD);
+ test_assert_counters("client passive RST", &cnt1, &cnt2, TEST_CNT_GOOD);
}
static void *client_fn(void *arg)
diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c
index 3ecd2b58de6a..2c73bea698a6 100644
--- a/tools/testing/selftests/net/tcp_ao/self-connect.c
+++ b/tools/testing/selftests/net/tcp_ao/self-connect.c
@@ -16,6 +16,9 @@ static void __setup_lo_intf(const char *lo_intf,
if (link_set_up(lo_intf))
test_error("Failed to bring %s up", lo_intf);
+
+ if (ip_route_add(lo_intf, TEST_FAMILY, local_addr, local_addr))
+ test_error("Failed to add a local route %s", lo_intf);
}
static void setup_lo_intf(const char *lo_intf)
@@ -30,7 +33,7 @@ static void setup_lo_intf(const char *lo_intf)
static void tcp_self_connect(const char *tst, unsigned int port,
bool different_keyids, bool check_restore)
{
- struct tcp_ao_counters before_ao, after_ao;
+ struct tcp_counters before, after;
uint64_t before_aogood, after_aogood;
struct netstat *ns_before, *ns_after;
const size_t nr_packets = 20;
@@ -60,17 +63,17 @@ static void tcp_self_connect(const char *tst, unsigned int port,
ns_before = netstat_read();
before_aogood = netstat_get(ns_before, "TCPAOGood", NULL);
- if (test_get_tcp_ao_counters(sk, &before_ao))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &before))
+ test_error("test_get_tcp_counters()");
if (__test_connect_socket(sk, "lo", (struct sockaddr *)&addr,
- sizeof(addr), TEST_TIMEOUT_SEC) < 0) {
+ sizeof(addr), 0) < 0) {
ns_after = netstat_read();
netstat_print_diff(ns_before, ns_after);
test_error("failed to connect()");
}
- if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, 100, nr_packets)) {
test_fail("%s: tcp connection verify failed", tst);
close(sk);
return;
@@ -78,8 +81,8 @@ static void tcp_self_connect(const char *tst, unsigned int port,
ns_after = netstat_read();
after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
- if (test_get_tcp_ao_counters(sk, &after_ao))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &after))
+ test_error("test_get_tcp_counters()");
if (!check_restore) {
/* to debug: netstat_print_diff(ns_before, ns_after); */
netstat_free(ns_before);
@@ -93,7 +96,7 @@ static void tcp_self_connect(const char *tst, unsigned int port,
return;
}
- if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) {
+ if (test_assert_counters(tst, &before, &after, TEST_CNT_GOOD)) {
close(sk);
return;
}
@@ -136,7 +139,7 @@ static void tcp_self_connect(const char *tst, unsigned int port,
test_ao_restore(sk, &ao_img);
test_disable_repair(sk);
test_sock_state_free(&img);
- if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, 100, nr_packets)) {
test_fail("%s: tcp connection verify failed", tst);
close(sk);
return;
diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c
index 8901a6785dc8..6478da6a71c3 100644
--- a/tools/testing/selftests/net/tcp_ao/seq-ext.c
+++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Check that after SEQ number wrap-around:
* 1. SEQ-extension has upper bytes set
- * 2. TCP conneciton is alive and no TCPAOBad segments
+ * 2. TCP connection is alive and no TCPAOBad segments
* In order to test (2), the test doesn't just adjust seq number for a queue
* on a connected socket, but migrates it to another sk+port number, so
* that there won't be any delayed packets that will fail to verify
@@ -40,7 +40,7 @@ static void test_adjust_seqs(struct tcp_sock_state *img,
static int test_sk_restore(struct tcp_sock_state *img,
struct tcp_ao_repair *ao_img, sockaddr_af *saddr,
const union tcp_addr daddr, unsigned int dport,
- struct tcp_ao_counters *cnt)
+ struct tcp_counters *cnt)
{
int sk;
@@ -54,8 +54,8 @@ static int test_sk_restore(struct tcp_sock_state *img,
test_error("setsockopt(TCP_AO_ADD_KEY)");
test_ao_restore(sk, ao_img);
- if (test_get_tcp_ao_counters(sk, cnt))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, cnt))
+ test_error("test_get_tcp_counters()");
test_disable_repair(sk);
test_sock_state_free(img);
@@ -65,7 +65,7 @@ static int test_sk_restore(struct tcp_sock_state *img,
static void *server_fn(void *arg)
{
uint64_t before_good, after_good, after_bad;
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
struct tcp_sock_state img;
struct tcp_ao_repair ao_img;
sockaddr_af saddr;
@@ -114,7 +114,7 @@ static void *server_fn(void *arg)
test_adjust_seqs(&img, &ao_img, true);
synchronize_threads(); /* 4: dump finished */
sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
- client_new_port, &ao1);
+ client_new_port, &cnt1);
trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_addr,
this_ip_dest, test_server_port + 1, client_new_port, 1);
@@ -136,11 +136,11 @@ static void *server_fn(void *arg)
}
synchronize_threads(); /* 6: verify counters after SEQ-number rollover */
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
after_good = netstat_get_one("TCPAOGood", NULL);
- test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
+ test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD);
if (after_good <= before_good) {
test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64,
@@ -173,7 +173,7 @@ out:
static void *client_fn(void *arg)
{
uint64_t before_good, after_good, after_bad;
- struct tcp_ao_counters ao1, ao2;
+ struct tcp_counters cnt1, cnt2;
struct tcp_sock_state img;
struct tcp_ao_repair ao_img;
sockaddr_af saddr;
@@ -191,7 +191,7 @@ static void *client_fn(void *arg)
test_error("failed to connect()");
synchronize_threads(); /* 2: accepted => send data */
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
+ if (test_client_verify(sk, msg_len, nr_packets)) {
test_fail("pre-migrate verify failed");
return NULL;
}
@@ -213,20 +213,20 @@ static void *client_fn(void *arg)
test_adjust_seqs(&img, &ao_img, false);
synchronize_threads(); /* 4: dump finished */
sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
- test_server_port + 1, &ao1);
+ test_server_port + 1, &cnt1);
synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */
- if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+ if (test_client_verify(sk, msg_len, nr_packets))
test_fail("post-migrate verify failed");
else
test_ok("post-migrate connection alive");
synchronize_threads(); /* 5: verify counters after SEQ-number rollover */
- if (test_get_tcp_ao_counters(sk, &ao2))
- test_error("test_get_tcp_ao_counters()");
+ if (test_get_tcp_counters(sk, &cnt2))
+ test_error("test_get_tcp_counters()");
after_good = netstat_get_one("TCPAOGood", NULL);
- test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
+ test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD);
if (after_good <= before_good) {
test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64,
diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
index f779e5892bc1..a1467b64390a 100644
--- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
+++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
@@ -6,6 +6,7 @@
#define fault(type) (inj == FAULT_ ## type)
static const char *md5_password = "Some evil genius, enemy to mankind, must have been the first contriver.";
static const char *ao_password = DEFAULT_TEST_PASSWORD;
+static volatile int sk_pair;
static union tcp_addr client2;
static union tcp_addr client3;
@@ -41,10 +42,10 @@ static void try_accept(const char *tst_name, unsigned int port,
const char *cnt_name, test_cnt cnt_expected,
int needs_tcp_md5, fault_t inj)
{
- struct tcp_ao_counters ao_cnt1, ao_cnt2;
+ struct tcp_counters cnt1, cnt2;
uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */
- int lsk, err, sk = 0;
- time_t timeout;
+ test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected;
+ int lsk, err, sk = -1;
if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
return;
@@ -63,22 +64,25 @@ static void try_accept(const char *tst_name, unsigned int port,
if (cnt_name)
before_cnt = netstat_get_one(cnt_name, NULL);
- if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt1))
- test_error("test_get_tcp_ao_counters()");
+ if (ao_addr && test_get_tcp_counters(lsk, &cnt1))
+ test_error("test_get_tcp_counters()");
synchronize_threads(); /* preparations done */
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- err = test_wait_fd(lsk, timeout, 0);
+ err = test_skpair_wait_poll(lsk, 0, poll_cnt, &sk_pair);
synchronize_threads(); /* connect()/accept() timeouts */
if (err == -ETIMEDOUT) {
+ sk_pair = err;
if (!fault(TIMEOUT))
- test_fail("timed out for accept()");
+ test_fail("%s: timed out for accept()", tst_name);
+ } else if (err == -EKEYREJECTED) {
+ if (!fault(KEYREJECT))
+ test_fail("%s: key was rejected", tst_name);
} else if (err < 0) {
- test_error("test_wait_fd()");
+ test_error("test_skpair_wait_poll()");
} else {
if (fault(TIMEOUT))
- test_fail("ready to accept");
+ test_fail("%s: ready to accept", tst_name);
sk = accept(lsk, NULL, NULL);
if (sk < 0) {
@@ -89,8 +93,8 @@ static void try_accept(const char *tst_name, unsigned int port,
}
}
- if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt2))
- test_error("test_get_tcp_ao_counters()");
+ if (ao_addr && test_get_tcp_counters(lsk, &cnt2))
+ test_error("test_get_tcp_counters()");
close(lsk);
if (!cnt_name) {
@@ -108,11 +112,11 @@ static void try_accept(const char *tst_name, unsigned int port,
tst_name, cnt_name, before_cnt, after_cnt);
}
if (ao_addr)
- test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+ test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected);
out:
synchronize_threads(); /* test_kill_sk() */
- if (sk > 0)
+ if (sk >= 0)
test_kill_sk(sk);
}
@@ -153,78 +157,82 @@ static void *server_fn(void *arg)
server_add_routes();
- try_accept("AO server (INADDR_ANY): AO client", port++, NULL, 0,
+ try_accept("[server] AO server (INADDR_ANY): AO client", port++, NULL, 0,
&addr_any, 0, 0, 100, 100, 0, "TCPAOGood",
TEST_CNT_GOOD, 0, 0);
- try_accept("AO server (INADDR_ANY): MD5 client", port++, NULL, 0,
+ try_accept("[server] AO server (INADDR_ANY): MD5 client", port++, NULL, 0,
&addr_any, 0, 0, 100, 100, 0, "TCPMD5Unexpected",
- 0, 1, FAULT_TIMEOUT);
- try_accept("AO server (INADDR_ANY): no sign client", port++, NULL, 0,
+ TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT);
+ try_accept("[server] AO server (INADDR_ANY): no sign client", port++, NULL, 0,
&addr_any, 0, 0, 100, 100, 0, "TCPAORequired",
TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT);
- try_accept("AO server (AO_REQUIRED): AO client", port++, NULL, 0,
+ try_accept("[server] AO server (AO_REQUIRED): AO client", port++, NULL, 0,
&this_ip_dest, TEST_PREFIX, true,
100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0);
- try_accept("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
+ try_accept("[server] AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
&this_ip_dest, TEST_PREFIX, true,
100, 100, 0, "TCPAORequired",
TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT);
- try_accept("MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0,
+ try_accept("[server] MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0,
NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound",
- 0, 1, FAULT_TIMEOUT);
- try_accept("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
+ TEST_CNT_NS_KEY_NOT_FOUND, 1, FAULT_TIMEOUT);
+ try_accept("[server] MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0);
- try_accept("MD5 server (INADDR_ANY): no sign client", port++, &addr_any,
+ try_accept("[server] MD5 server (INADDR_ANY): no sign client", port++, &addr_any,
0, NULL, 0, 0, 0, 0, 0, "TCPMD5NotFound",
- 0, 1, FAULT_TIMEOUT);
+ TEST_CNT_NS_MD5_NOT_FOUND, 1, FAULT_TIMEOUT);
- try_accept("no sign server: AO client", port++, NULL, 0,
+ try_accept("[server] no sign server: AO client", port++, NULL, 0,
NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound",
- TEST_CNT_AO_KEY_NOT_FOUND, 0, FAULT_TIMEOUT);
- try_accept("no sign server: MD5 client", port++, NULL, 0,
+ TEST_CNT_NS_KEY_NOT_FOUND, 0, FAULT_TIMEOUT);
+ try_accept("[server] no sign server: MD5 client", port++, NULL, 0,
NULL, 0, 0, 0, 0, 0, "TCPMD5Unexpected",
- 0, 1, FAULT_TIMEOUT);
- try_accept("no sign server: no sign client", port++, NULL, 0,
+ TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT);
+ try_accept("[server] no sign server: no sign client", port++, NULL, 0,
NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0);
- try_accept("AO+MD5 server: AO client (matching)", port++,
+ try_accept("[server] AO+MD5 server: AO client (matching)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 1, 0);
- try_accept("AO+MD5 server: AO client (misconfig, matching MD5)", port++,
+ try_accept("[server] AO+MD5 server: AO client (misconfig, matching MD5)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND,
1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: AO client (misconfig, non-matching)", port++,
+ try_accept("[server] AO+MD5 server: AO client (misconfig, non-matching)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND,
1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: MD5 client (matching)", port++,
+ try_accept("[server] AO+MD5 server: MD5 client (matching)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, NULL, 0, 1, 0);
- try_accept("AO+MD5 server: MD5 client (misconfig, matching AO)", port++,
+ try_accept("[server] AO+MD5 server: MD5 client (misconfig, matching AO)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
- 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: MD5 client (misconfig, non-matching)", port++,
+ 100, 100, 0, "TCPMD5Unexpected",
+ TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT);
+ try_accept("[server] AO+MD5 server: MD5 client (misconfig, non-matching)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
- 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: no sign client (unmatched)", port++,
+ 100, 100, 0, "TCPMD5Unexpected",
+ TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT);
+ try_accept("[server] AO+MD5 server: no sign client (unmatched)", port++,
&this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, "CurrEstab", 0, 1, 0);
- try_accept("AO+MD5 server: no sign client (misconfig, matching AO)",
+ try_accept("[server] AO+MD5 server: no sign client (misconfig, matching AO)",
port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
100, 100, 0, "TCPAORequired",
TEST_CNT_AO_REQUIRED, 1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: no sign client (misconfig, matching MD5)",
+ try_accept("[server] AO+MD5 server: no sign client (misconfig, matching MD5)",
port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
- 100, 100, 0, "TCPMD5NotFound", 0, 1, FAULT_TIMEOUT);
+ 100, 100, 0, "TCPMD5NotFound",
+ TEST_CNT_NS_MD5_NOT_FOUND, 1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys",
+ /* Key rejected by the other side, failing short through skpair */
+ try_accept("[server] AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys",
port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
- 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT);
- try_accept("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys",
+ 100, 100, 0, NULL, 0, 1, FAULT_KEYREJECT);
+ try_accept("[server] AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys",
port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
- 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT);
+ 100, 100, 0, NULL, 0, 1, FAULT_KEYREJECT);
server_add_fail_tests(&port);
@@ -259,7 +267,6 @@ static void try_connect(const char *tst_name, unsigned int port,
uint8_t sndid, uint8_t rcvid, uint8_t vrf,
fault_t inj, int needs_tcp_md5, union tcp_addr *bind_addr)
{
- time_t timeout;
int sk, ret;
if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
@@ -281,11 +288,10 @@ static void try_connect(const char *tst_name, unsigned int port,
synchronize_threads(); /* preparations done */
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
-
+ ret = test_skpair_connect_poll(sk, this_ip_dest, port, 0, &sk_pair);
synchronize_threads(); /* connect()/accept() timeouts */
if (ret < 0) {
+ sk_pair = ret;
if (fault(KEYREJECT) && ret == -EKEYREJECTED)
test_ok("%s: connect() was prevented", tst_name);
else if (ret == -ETIMEDOUT && fault(TIMEOUT))
@@ -305,8 +311,7 @@ static void try_connect(const char *tst_name, unsigned int port,
out:
synchronize_threads(); /* test_kill_sk() */
- /* _test_connect_socket() cleans up on failure */
- if (ret > 0)
+ if (ret > 0) /* test_skpair_connect_poll() cleans up on failure */
test_kill_sk(sk);
}
@@ -437,7 +442,6 @@ static void try_to_add(const char *tst_name, unsigned int port,
int ao_vrf, uint8_t sndid, uint8_t rcvid,
int needs_tcp_md5, fault_t inj)
{
- time_t timeout;
int sk, ret;
if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
@@ -450,11 +454,10 @@ static void try_to_add(const char *tst_name, unsigned int port,
synchronize_threads(); /* preparations done */
- timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
- ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+ ret = test_skpair_connect_poll(sk, this_ip_dest, port, 0, &sk_pair);
synchronize_threads(); /* connect()/accept() timeouts */
- if (ret <= 0) {
+ if (ret < 0) {
test_error("%s: connect() returned %d", tst_name, ret);
goto out;
}
@@ -490,8 +493,7 @@ static void try_to_add(const char *tst_name, unsigned int port,
out:
synchronize_threads(); /* test_kill_sk() */
- /* _test_connect_socket() cleans up on failure */
- if (ret > 0)
+ if (ret > 0) /* test_skpair_connect_poll() cleans up on failure */
test_kill_sk(sk);
}
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
index c1cb0c75156a..4b3f9b5e50fe 100644
--- a/tools/testing/selftests/net/tcp_fastopen_backup_key.c
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
@@ -26,7 +26,7 @@
#include <fcntl.h>
#include <time.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#ifndef TCP_FASTOPEN_KEY
#define TCP_FASTOPEN_KEY 33
diff --git a/tools/testing/selftests/net/tcp_port_share.c b/tools/testing/selftests/net/tcp_port_share.c
new file mode 100644
index 000000000000..6146b62610df
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_port_share.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2025 Cloudflare, Inc.
+
+/* Tests for TCP port sharing (bind bucket reuse). */
+
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdlib.h>
+
+#include "kselftest_harness.h"
+
+#define DST_PORT 30000
+#define SRC_PORT 40000
+
+struct sockaddr_inet {
+ union {
+ struct sockaddr_storage ss;
+ struct sockaddr_in6 v6;
+ struct sockaddr_in v4;
+ struct sockaddr sa;
+ };
+ socklen_t len;
+ char str[INET6_ADDRSTRLEN + __builtin_strlen("[]:65535") + 1];
+};
+
+const int one = 1;
+
+static int disconnect(int fd)
+{
+ return connect(fd, &(struct sockaddr){ AF_UNSPEC }, sizeof(struct sockaddr));
+}
+
+static int getsockname_port(int fd)
+{
+ struct sockaddr_inet addr = {};
+ int err;
+
+ addr.len = sizeof(addr);
+ err = getsockname(fd, &addr.sa, &addr.len);
+ if (err)
+ return -1;
+
+ switch (addr.sa.sa_family) {
+ case AF_INET:
+ return ntohs(addr.v4.sin_port);
+ case AF_INET6:
+ return ntohs(addr.v6.sin6_port);
+ default:
+ errno = EAFNOSUPPORT;
+ return -1;
+ }
+}
+
+static void make_inet_addr(int af, const char *ip, __u16 port,
+ struct sockaddr_inet *addr)
+{
+ const char *fmt = "";
+
+ memset(addr, 0, sizeof(*addr));
+
+ switch (af) {
+ case AF_INET:
+ addr->len = sizeof(addr->v4);
+ addr->v4.sin_family = af;
+ addr->v4.sin_port = htons(port);
+ inet_pton(af, ip, &addr->v4.sin_addr);
+ fmt = "%s:%hu";
+ break;
+ case AF_INET6:
+ addr->len = sizeof(addr->v6);
+ addr->v6.sin6_family = af;
+ addr->v6.sin6_port = htons(port);
+ inet_pton(af, ip, &addr->v6.sin6_addr);
+ fmt = "[%s]:%hu";
+ break;
+ }
+
+ snprintf(addr->str, sizeof(addr->str), fmt, ip, port);
+}
+
+FIXTURE(tcp_port_share) {};
+
+FIXTURE_VARIANT(tcp_port_share) {
+ int domain;
+ /* IP to listen on and connect to */
+ const char *dst_ip;
+ /* Primary IP to connect from */
+ const char *src1_ip;
+ /* Secondary IP to connect from */
+ const char *src2_ip;
+ /* IP to bind to in order to block the source port */
+ const char *bind_ip;
+};
+
+FIXTURE_VARIANT_ADD(tcp_port_share, ipv4) {
+ .domain = AF_INET,
+ .dst_ip = "127.0.0.1",
+ .src1_ip = "127.1.1.1",
+ .src2_ip = "127.2.2.2",
+ .bind_ip = "127.3.3.3",
+};
+
+FIXTURE_VARIANT_ADD(tcp_port_share, ipv6) {
+ .domain = AF_INET6,
+ .dst_ip = "::1",
+ .src1_ip = "2001:db8::1",
+ .src2_ip = "2001:db8::2",
+ .bind_ip = "2001:db8::3",
+};
+
+FIXTURE_SETUP(tcp_port_share)
+{
+ int sc;
+
+ ASSERT_EQ(unshare(CLONE_NEWNET), 0);
+ ASSERT_EQ(system("ip link set dev lo up"), 0);
+ ASSERT_EQ(system("ip addr add dev lo 2001:db8::1/32 nodad"), 0);
+ ASSERT_EQ(system("ip addr add dev lo 2001:db8::2/32 nodad"), 0);
+ ASSERT_EQ(system("ip addr add dev lo 2001:db8::3/32 nodad"), 0);
+
+ sc = open("/proc/sys/net/ipv4/ip_local_port_range", O_WRONLY);
+ ASSERT_GE(sc, 0);
+ ASSERT_GT(dprintf(sc, "%hu %hu\n", SRC_PORT, SRC_PORT), 0);
+ ASSERT_EQ(close(sc), 0);
+}
+
+FIXTURE_TEARDOWN(tcp_port_share) {}
+
+/* Verify that an ephemeral port becomes available again after the socket
+ * bound to it and blocking it from reuse is closed.
+ */
+TEST_F(tcp_port_share, can_reuse_port_after_bind_and_close)
+{
+ const typeof(variant) v = variant;
+ struct sockaddr_inet addr;
+ int c1, c2, ln, pb;
+
+ /* Listen on <dst_ip>:<DST_PORT> */
+ ln = socket(v->domain, SOCK_STREAM, 0);
+ ASSERT_GE(ln, 0) TH_LOG("socket(): %m");
+ ASSERT_EQ(setsockopt(ln, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+ ASSERT_EQ(bind(ln, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+ ASSERT_EQ(listen(ln, 2), 0);
+
+ /* Connect from <src1_ip>:<SRC_PORT> */
+ c1 = socket(v->domain, SOCK_STREAM, 0);
+ ASSERT_GE(c1, 0) TH_LOG("socket(): %m");
+ ASSERT_EQ(setsockopt(c1, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->src1_ip, 0, &addr);
+ ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+ make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+ ASSERT_EQ(connect(c1, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str);
+ ASSERT_EQ(getsockname_port(c1), SRC_PORT);
+
+ /* Bind to <bind_ip>:<SRC_PORT>. Block the port from reuse. */
+ pb = socket(v->domain, SOCK_STREAM, 0);
+ ASSERT_GE(pb, 0) TH_LOG("socket(): %m");
+ ASSERT_EQ(setsockopt(pb, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr);
+ ASSERT_EQ(bind(pb, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+ /* Try to connect from <src2_ip>:<SRC_PORT>. Expect failure. */
+ c2 = socket(v->domain, SOCK_STREAM, 0);
+ ASSERT_GE(c2, 0) TH_LOG("socket");
+ ASSERT_EQ(setsockopt(c2, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->src2_ip, 0, &addr);
+ ASSERT_EQ(bind(c2, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+ make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+ ASSERT_EQ(connect(c2, &addr.sa, addr.len), -1) TH_LOG("connect(%s)", addr.str);
+ ASSERT_EQ(errno, EADDRNOTAVAIL) TH_LOG("%m");
+
+ /* Unbind from <bind_ip>:<SRC_PORT>. Unblock the port for reuse. */
+ ASSERT_EQ(close(pb), 0);
+
+ /* Connect again from <src2_ip>:<SRC_PORT> */
+ EXPECT_EQ(connect(c2, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str);
+ EXPECT_EQ(getsockname_port(c2), SRC_PORT);
+
+ ASSERT_EQ(close(c2), 0);
+ ASSERT_EQ(close(c1), 0);
+ ASSERT_EQ(close(ln), 0);
+}
+
+/* Verify that a socket auto-bound during connect() blocks port reuse after
+ * disconnect (connect(AF_UNSPEC)) followed by an explicit port bind().
+ */
+TEST_F(tcp_port_share, port_block_after_disconnect)
+{
+ const typeof(variant) v = variant;
+ struct sockaddr_inet addr;
+ int c1, c2, ln, pb;
+
+ /* Listen on <dst_ip>:<DST_PORT> */
+ ln = socket(v->domain, SOCK_STREAM, 0);
+ ASSERT_GE(ln, 0) TH_LOG("socket(): %m");
+ ASSERT_EQ(setsockopt(ln, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+ ASSERT_EQ(bind(ln, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+ ASSERT_EQ(listen(ln, 2), 0);
+
+ /* Connect from <src1_ip>:<SRC_PORT> */
+ c1 = socket(v->domain, SOCK_STREAM, 0);
+ ASSERT_GE(c1, 0) TH_LOG("socket(): %m");
+ ASSERT_EQ(setsockopt(c1, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->src1_ip, 0, &addr);
+ ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+ make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+ ASSERT_EQ(connect(c1, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str);
+ ASSERT_EQ(getsockname_port(c1), SRC_PORT);
+
+ /* Disconnect the socket and bind it to <bind_ip>:<SRC_PORT> to block the port */
+ ASSERT_EQ(disconnect(c1), 0) TH_LOG("disconnect: %m");
+ ASSERT_EQ(setsockopt(c1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr);
+ ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+ /* Trigger port-addr bucket state update with another bind() and close() */
+ pb = socket(v->domain, SOCK_STREAM, 0);
+ ASSERT_GE(pb, 0) TH_LOG("socket(): %m");
+ ASSERT_EQ(setsockopt(pb, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr);
+ ASSERT_EQ(bind(pb, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+ ASSERT_EQ(close(pb), 0);
+
+ /* Connect from <src2_ip>:<SRC_PORT>. Expect failure. */
+ c2 = socket(v->domain, SOCK_STREAM, 0);
+ ASSERT_GE(c2, 0) TH_LOG("socket: %m");
+ ASSERT_EQ(setsockopt(c2, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0);
+
+ make_inet_addr(v->domain, v->src2_ip, 0, &addr);
+ ASSERT_EQ(bind(c2, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str);
+
+ make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr);
+ EXPECT_EQ(connect(c2, &addr.sa, addr.len), -1) TH_LOG("connect(%s)", addr.str);
+ EXPECT_EQ(errno, EADDRNOTAVAIL) TH_LOG("%m");
+
+ ASSERT_EQ(close(c2), 0);
+ ASSERT_EQ(close(c1), 0);
+ ASSERT_EQ(close(ln), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/test_blackhole_dev.sh b/tools/testing/selftests/net/test_blackhole_dev.sh
deleted file mode 100755
index 3119b80e711f..000000000000
--- a/tools/testing/selftests/net/test_blackhole_dev.sh
+++ /dev/null
@@ -1,11 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-# Runs blackhole-dev test using blackhole-dev kernel module
-
-if /sbin/modprobe -q test_blackhole_dev ; then
- /sbin/modprobe -q -r test_blackhole_dev;
- echo "test_blackhole_dev: ok";
-else
- echo "test_blackhole_dev: [FAIL]";
- exit 1;
-fi
diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh
index 1b3f89e2b86e..2a7224fe74f2 100755
--- a/tools/testing/selftests/net/test_bridge_backup_port.sh
+++ b/tools/testing/selftests/net/test_bridge_backup_port.sh
@@ -315,6 +315,29 @@ backup_port()
tc_check_packets $sw1 "dev vx0 egress" 101 1
log_test $? 0 "No forwarding out of vx0"
+ # Check that packets are forwarded out of vx0 when swp1 is
+ # administratively down and out of swp1 when it is administratively up
+ # again.
+ run_cmd "ip -n $sw1 link set dev swp1 down"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+ log_test $? 0 "swp1 administratively down"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 3
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "Forwarding out of vx0"
+
+ run_cmd "ip -n $sw1 link set dev swp1 up"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
+ log_test $? 0 "swp1 administratively up"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 4
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "No forwarding out of vx0"
+
# Remove vx0 as the backup port of swp1 and check that packets are no
# longer forwarded out of vx0 when swp1 does not have a carrier.
run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port"
@@ -322,9 +345,9 @@ backup_port()
log_test $? 1 "vx0 not configured as backup port of swp1"
run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets $sw1 "dev swp1 egress" 101 4
+ tc_check_packets $sw1 "dev swp1 egress" 101 5
log_test $? 0 "Forwarding out of swp1"
- tc_check_packets $sw1 "dev vx0 egress" 101 1
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
log_test $? 0 "No forwarding out of vx0"
run_cmd "ip -n $sw1 link set dev swp1 carrier off"
@@ -332,9 +355,9 @@ backup_port()
log_test $? 0 "swp1 carrier off"
run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
- tc_check_packets $sw1 "dev swp1 egress" 101 4
+ tc_check_packets $sw1 "dev swp1 egress" 101 5
log_test $? 0 "No forwarding out of swp1"
- tc_check_packets $sw1 "dev vx0 egress" 101 1
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
log_test $? 0 "No forwarding out of vx0"
}
diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
index 02b986c9c247..9067197c9055 100755
--- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
+++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
@@ -51,7 +51,9 @@ ret=0
# All tests in this script. Can be overridden with -t option.
TESTS="
neigh_suppress_arp
+ neigh_suppress_uc_arp
neigh_suppress_ns
+ neigh_suppress_uc_ns
neigh_vlan_suppress_arp
neigh_vlan_suppress_ns
"
@@ -388,6 +390,52 @@ neigh_suppress_arp()
neigh_suppress_arp_common $vid $sip $tip
}
+neigh_suppress_uc_arp_common()
+{
+ local vid=$1; shift
+ local sip=$1; shift
+ local tip=$1; shift
+ local tmac
+
+ echo
+ echo "Unicast ARP, per-port ARP suppression - VLAN $vid"
+ echo "-----------------------------------------------"
+
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+ run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid"
+ run_cmd "ip -n $sw1 neigh replace $tip lladdr $tmac nud permanent dev br0.$vid"
+
+ run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto arp flower arp_sip $tip arp_op reply action pass"
+
+ run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto arp flower arp_tip $sip arp_op reply action pass"
+
+ run_cmd "ip netns exec $h1 mausezahn eth0.$vid -c 1 -a own -b $tmac -t arp 'request sip=$sip, tip=$tip, tmac=$tmac' -q"
+ tc_check_packets $h1 "dev eth0.$vid ingress" 101 1
+ log_test $? 0 "Unicast ARP, suppression on, h1 filter"
+ tc_check_packets $h2 "dev eth0.$vid egress" 101 1
+ log_test $? 0 "Unicast ARP, suppression on, h2 filter"
+}
+
+neigh_suppress_uc_arp()
+{
+ local vid=10
+ local sip=192.0.2.1
+ local tip=192.0.2.2
+
+ neigh_suppress_uc_arp_common $vid $sip $tip
+
+ vid=20
+ sip=192.0.2.17
+ tip=192.0.2.18
+ neigh_suppress_uc_arp_common $vid $sip $tip
+}
+
neigh_suppress_ns_common()
{
local vid=$1; shift
@@ -494,6 +542,78 @@ neigh_suppress_ns()
neigh_suppress_ns_common $vid $saddr $daddr $maddr
}
+icmpv6_header_get()
+{
+ local csum=$1; shift
+ local tip=$1; shift
+ local type
+ local p
+
+ # Type 135 (Neighbor Solicitation), hex format
+ type="87"
+ p=$(:
+ )"$type:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"$csum:"$( : ICMPv6.checksum
+ )"00:00:00:00:"$( : Reserved
+ )"$tip:"$( : Target Address
+ )
+ echo $p
+}
+
+neigh_suppress_uc_ns_common()
+{
+ local vid=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local full_dip=$1; shift
+ local csum=$1; shift
+ local tmac
+
+ echo
+ echo "Unicast NS, per-port NS suppression - VLAN $vid"
+ echo "---------------------------------------------"
+
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+ run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid"
+ run_cmd "ip -n $sw1 -6 neigh replace $dip lladdr $tmac nud permanent dev br0.$vid"
+
+ run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 src_ip $dip type 136 code 0 action pass"
+
+ run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 dst_ip $sip type 136 code 0 action pass"
+
+ run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a own -b $tmac -A $sip -B $dip -t ip hop=255,next=58,payload=$(icmpv6_header_get $csum $full_dip) -q"
+ tc_check_packets $h1 "dev eth0.$vid ingress" 101 1
+ log_test $? 0 "Unicast NS, suppression on, h1 filter"
+ tc_check_packets $h2 "dev eth0.$vid egress" 101 1
+ log_test $? 0 "Unicast NS, suppression on, h2 filter"
+}
+
+neigh_suppress_uc_ns()
+{
+ local vid=10
+ local saddr=2001:db8:1::1
+ local daddr=2001:db8:1::2
+ local full_daddr=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02
+ local csum="ef:79"
+
+ neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum
+
+ vid=20
+ saddr=2001:db8:2::1
+ daddr=2001:db8:2::2
+ full_daddr=20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:02
+ csum="ef:76"
+
+ neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum
+}
+
neigh_vlan_suppress_arp()
{
local vid1=10
@@ -825,6 +945,11 @@ if [ ! -x "$(command -v jq)" ]; then
exit $ksft_skip
fi
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit $ksft_skip
+fi
+
bridge link help 2>&1 | grep -q "neigh_vlan_suppress"
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support"
diff --git a/tools/testing/selftests/net/test_neigh.sh b/tools/testing/selftests/net/test_neigh.sh
new file mode 100755
index 000000000000..7c594bf6ead0
--- /dev/null
+++ b/tools/testing/selftests/net/test_neigh.sh
@@ -0,0 +1,366 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+TESTS="
+ extern_valid_ipv4
+ extern_valid_ipv6
+"
+VERBOSE=0
+
+################################################################################
+# Utilities
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ stderr=
+ fi
+
+ out=$(eval "$cmd" "$stderr")
+ rc=$?
+ if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+################################################################################
+# Setup
+
+setup()
+{
+ set -e
+
+ setup_ns ns1 ns2
+
+ ip -n "$ns1" link add veth0 type veth peer name veth1 netns "$ns2"
+ ip -n "$ns1" link set dev veth0 up
+ ip -n "$ns2" link set dev veth1 up
+
+ ip -n "$ns1" address add 192.0.2.1/24 dev veth0
+ ip -n "$ns1" address add 2001:db8:1::1/64 dev veth0 nodad
+ ip -n "$ns2" address add 192.0.2.2/24 dev veth1
+ ip -n "$ns2" address add 2001:db8:1::2/64 dev veth1 nodad
+
+ ip netns exec "$ns1" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec "$ns2" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+
+ sleep 5
+
+ set +e
+}
+
+exit_cleanup_all()
+{
+ cleanup_all_ns
+ exit "${EXIT_STATUS}"
+}
+
+################################################################################
+# Tests
+
+extern_valid_common()
+{
+ local af_str=$1; shift
+ local ip_addr=$1; shift
+ local tbl_name=$1; shift
+ local subnet=$1; shift
+ local mac
+
+ mac=$(ip -n "$ns2" -j link show dev veth1 | jq -r '.[]["address"]')
+
+ RET=0
+
+ # Check that simple addition works.
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "No \"extern_valid\" flag after addition"
+
+ log_test "$af_str \"extern_valid\" flag: Add entry"
+
+ RET=0
+
+ # Check that an entry cannot be added with "extern_valid" flag and an
+ # invalid state.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr nud none dev veth0 extern_valid"
+ check_fail $? "Managed to add an entry with \"extern_valid\" flag and an invalid state"
+
+ log_test "$af_str \"extern_valid\" flag: Add with an invalid state"
+
+ RET=0
+
+ # Check that entry cannot be added with both "extern_valid" flag and
+ # "use" / "managed" flag.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ check_fail $? "Managed to add an entry with \"extern_valid\" flag and \"use\" flag"
+
+ log_test "$af_str \"extern_valid\" flag: Add with \"use\" flag"
+
+ RET=0
+
+ # Check that "extern_valid" flag can be toggled using replace.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Did not manage to set \"extern_valid\" flag with replace"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_fail $? "Did not manage to clear \"extern_valid\" flag with replace"
+
+ log_test "$af_str \"extern_valid\" flag: Replace entry"
+
+ RET=0
+
+ # Check that an existing "extern_valid" entry can be marked as
+ # "managed".
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid managed"
+ check_err $? "Did not manage to add \"managed\" flag to an existing \"extern_valid\" entry"
+
+ log_test "$af_str \"extern_valid\" flag: Replace entry with \"managed\" flag"
+
+ RET=0
+
+ # Check that entry cannot be replaced with "extern_valid" flag and an
+ # invalid state.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr nud none dev veth0 extern_valid"
+ check_fail $? "Managed to replace an entry with \"extern_valid\" flag and an invalid state"
+
+ log_test "$af_str \"extern_valid\" flag: Replace with an invalid state"
+
+ RET=0
+
+ # Check that an "extern_valid" entry is flushed when the interface is
+ # put administratively down.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 link set dev veth0 down"
+ run_cmd "ip -n $ns1 link set dev veth0 up"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0"
+ check_fail $? "\"extern_valid\" entry not flushed upon interface down"
+
+ log_test "$af_str \"extern_valid\" flag: Interface down"
+
+ RET=0
+
+ # Check that an "extern_valid" entry is not flushed when the interface
+ # loses its carrier.
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns2 link set dev veth1 down"
+ run_cmd "ip -n $ns2 link set dev veth1 up"
+ run_cmd "sleep 2"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0"
+ check_err $? "\"extern_valid\" entry flushed upon carrier down"
+
+ log_test "$af_str \"extern_valid\" flag: Carrier down"
+
+ RET=0
+
+ # Check that when entry transitions to "reachable" state it maintains
+ # the "extern_valid" flag. Wait "delay_probe" seconds for ARP request /
+ # NS to be sent.
+ local delay_probe
+
+ delay_probe=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["delay_probe"]')
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ run_cmd "sleep $((delay_probe / 1000 + 2))"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"REACHABLE\""
+ check_err $? "Entry did not transition to \"reachable\" state"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry did not maintain \"extern_valid\" flag after transition to \"reachable\" state"
+
+ log_test "$af_str \"extern_valid\" flag: Transition to \"reachable\" state"
+
+ RET=0
+
+ # Drop all packets, trigger resolution and check that entry goes back
+ # to "stale" state instead of "failed".
+ local mcast_reprobes
+ local retrans_time
+ local ucast_probes
+ local app_probes
+ local probes
+ local delay
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ run_cmd "tc -n $ns2 qdisc add dev veth1 clsact"
+ run_cmd "tc -n $ns2 filter add dev veth1 ingress proto all matchall action drop"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use"
+ retrans_time=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["retrans"]')
+ ucast_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["ucast_probes"]')
+ app_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["app_probes"]')
+ mcast_reprobes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["mcast_reprobes"]')
+ delay=$((delay_probe + (ucast_probes + app_probes + mcast_reprobes) * retrans_time))
+ run_cmd "sleep $((delay / 1000 + 2))"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"STALE\""
+ check_err $? "Entry did not return to \"stale\" state"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry did not maintain \"extern_valid\" flag after returning to \"stale\" state"
+ probes=$(ip -n "$ns1" -j -s neigh get "$ip_addr" dev veth0 | jq '.[]["probes"]')
+ if [[ $probes -eq 0 ]]; then
+ check_err 1 "No probes were sent"
+ fi
+
+ log_test "$af_str \"extern_valid\" flag: Transition back to \"stale\" state"
+
+ run_cmd "tc -n $ns2 qdisc del dev veth1 clsact"
+
+ RET=0
+
+ # Forced garbage collection runs whenever the number of entries is
+ # larger than "thresh3" and deletes stale entries that have not been
+ # updated in the last 5 seconds.
+ #
+ # Check that an "extern_valid" entry survives a forced garbage
+ # collection. Add an entry, wait 5 seconds and add more entries than
+ # "thresh3" so that forced garbage collection will run.
+ #
+ # Note that the garbage collection thresholds are global resources and
+ # that changes in the initial namespace affect all the namespaces.
+ local forced_gc_runs_t0
+ local forced_gc_runs_t1
+ local orig_thresh1
+ local orig_thresh2
+ local orig_thresh3
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]')
+ orig_thresh2=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh2")) | .["thresh2"]')
+ orig_thresh3=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh3")) | .["thresh3"]')
+ run_cmd "ip ntable change name $tbl_name thresh3 10 thresh2 9 thresh1 8"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0"
+ run_cmd "sleep 5"
+ forced_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]')
+ for i in {1..20}; do
+ run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0"
+ done
+ forced_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]')
+ if [[ $forced_gc_runs_t1 -eq $forced_gc_runs_t0 ]]; then
+ check_err 1 "Forced garbage collection did not run"
+ fi
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry with \"extern_valid\" flag did not survive forced garbage collection"
+ run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0"
+ check_fail $? "Entry without \"extern_valid\" flag survived forced garbage collection"
+
+ log_test "$af_str \"extern_valid\" flag: Forced garbage collection"
+
+ run_cmd "ip ntable change name $tbl_name thresh3 $orig_thresh3 thresh2 $orig_thresh2 thresh1 $orig_thresh1"
+
+ RET=0
+
+ # Periodic garbage collection runs every "base_reachable"/2 seconds and
+ # if the number of entries is larger than "thresh1", then it deletes
+ # stale entries that have not been used in the last "gc_stale" seconds.
+ #
+ # Check that an "extern_valid" entry survives a periodic garbage
+ # collection. Add an "extern_valid" entry, add more than "thresh1"
+ # regular entries, wait "base_reachable" (longer than "gc_stale")
+ # seconds and check that the "extern_valid" entry was not deleted.
+ #
+ # Note that the garbage collection thresholds and "base_reachable" are
+ # global resources and that changes in the initial namespace affect all
+ # the namespaces.
+ local periodic_gc_runs_t0
+ local periodic_gc_runs_t1
+ local orig_base_reachable
+ local orig_gc_stale
+
+ run_cmd "ip -n $ns1 neigh flush dev veth0"
+ orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]')
+ orig_base_reachable=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["base_reachable"]')
+ run_cmd "ip ntable change name $tbl_name thresh1 10 base_reachable 10000"
+ orig_gc_stale=$(ip -n "$ns1" -j ntable show name "$tbl_name" dev veth0 | jq '.[]["gc_stale"]')
+ run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale 1000"
+ run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid"
+ run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0"
+ # Wait orig_base_reachable/2 for the new interval to take effect.
+ run_cmd "sleep $(((orig_base_reachable / 1000) / 2 + 2))"
+ for i in {1..20}; do
+ run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0"
+ done
+ periodic_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]')
+ run_cmd "sleep 10"
+ periodic_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]')
+ [[ $periodic_gc_runs_t1 -ne $periodic_gc_runs_t0 ]]
+ check_err $? "Periodic garbage collection did not run"
+ run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\""
+ check_err $? "Entry with \"extern_valid\" flag did not survive periodic garbage collection"
+ run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0"
+ check_fail $? "Entry without \"extern_valid\" flag survived periodic garbage collection"
+
+ log_test "$af_str \"extern_valid\" flag: Periodic garbage collection"
+
+ run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale $orig_gc_stale"
+ run_cmd "ip ntable change name $tbl_name thresh1 $orig_thresh1 base_reachable $orig_base_reachable"
+}
+
+extern_valid_ipv4()
+{
+ extern_valid_common "IPv4" 192.0.2.2 "arp_cache" 192.0.2.
+}
+
+extern_valid_ipv6()
+{
+ extern_valid_common "IPv6" 2001:db8:1::2 "ndisc_cache" 2001:db8:1::
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+while getopts ":t:pvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$((VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+require_command jq
+
+if ! ip neigh help 2>&1 | grep -q "extern_valid"; then
+ echo "SKIP: iproute2 ip too old, missing \"extern_valid\" support"
+ exit "$ksft_skip"
+fi
+
+trap exit_cleanup_all EXIT
+
+for t in $TESTS
+do
+ setup; $t; cleanup_all_ns;
+done
diff --git a/tools/testing/selftests/net/test_so_rcv.sh b/tools/testing/selftests/net/test_so_rcv.sh
new file mode 100755
index 000000000000..d8aa4362879d
--- /dev/null
+++ b/tools/testing/selftests/net/test_so_rcv.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+HOSTS=("127.0.0.1" "::1")
+PORT=1234
+TOTAL_TESTS=0
+FAILED_TESTS=0
+
+declare -A TESTS=(
+ ["SO_RCVPRIORITY"]="-P 2"
+ ["SO_RCVMARK"]="-M 3"
+)
+
+check_result() {
+ ((TOTAL_TESTS++))
+ if [ "$1" -ne 0 ]; then
+ ((FAILED_TESTS++))
+ fi
+}
+
+cleanup()
+{
+ cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+setup_ns NS
+
+for HOST in "${HOSTS[@]}"; do
+ PROTOCOL="IPv4"
+ if [[ "$HOST" == "::1" ]]; then
+ PROTOCOL="IPv6"
+ fi
+ for test_name in "${!TESTS[@]}"; do
+ echo "Running $test_name test, $PROTOCOL"
+ arg=${TESTS[$test_name]}
+
+ ip netns exec $NS ./so_rcv_listener $arg $HOST $PORT &
+ LISTENER_PID=$!
+
+ sleep 0.5
+
+ if ! ip netns exec $NS ./cmsg_sender $arg $HOST $PORT; then
+ echo "Sender failed for $test_name, $PROTOCOL"
+ kill "$LISTENER_PID" 2>/dev/null
+ wait "$LISTENER_PID"
+ check_result 1
+ continue
+ fi
+
+ wait "$LISTENER_PID"
+ LISTENER_EXIT_CODE=$?
+
+ if [ "$LISTENER_EXIT_CODE" -eq 0 ]; then
+ echo "Rcv test OK for $test_name, $PROTOCOL"
+ check_result 0
+ else
+ echo "Rcv test FAILED for $test_name, $PROTOCOL"
+ check_result 1
+ fi
+ done
+done
+
+if [ "$FAILED_TESTS" -ne 0 ]; then
+ echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed"
+ exit ${KSFT_FAIL}
+else
+ echo "OK - All $TOTAL_TESTS tests passed"
+ exit ${KSFT_PASS}
+fi
diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
index 2d442cdab11e..8b414d0edada 100755
--- a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
+++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh
@@ -1,29 +1,114 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Check FDB default-remote handling across "ip link set".
+ALL_TESTS="
+ test_set_remote
+ test_change_mc_remote
+"
+source lib.sh
check_remotes()
{
local what=$1; shift
local N=$(bridge fdb sh dev vx | grep 00:00:00:00:00:00 | wc -l)
- echo -ne "expected two remotes after $what\t"
- if [[ $N != 2 ]]; then
- echo "[FAIL]"
- EXIT_STATUS=1
+ ((N == 2))
+ check_err $? "expected 2 remotes after $what, got $N"
+}
+
+# Check FDB default-remote handling across "ip link set".
+test_set_remote()
+{
+ RET=0
+
+ adf_ip_link_add vx up type vxlan id 2000 dstport 4789
+ bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent
+ bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent
+ check_remotes "fdb append"
+
+ ip link set dev vx type vxlan remote 192.0.2.30
+ check_remotes "link set"
+
+ log_test 'FDB default-remote handling across "ip link set"'
+}
+
+fmt_remote()
+{
+ local addr=$1; shift
+
+ if [[ $addr == 224.* ]]; then
+ echo "group $addr"
else
- echo "[ OK ]"
+ echo "remote $addr"
fi
}
-ip link add name vx up type vxlan id 2000 dstport 4789
-bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent
-bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent
-check_remotes "fdb append"
+change_remote()
+{
+ local remote=$1; shift
+
+ ip link set dev vx type vxlan $(fmt_remote $remote) dev v1
+}
+
+check_membership()
+{
+ local check_vec=("$@")
+
+ local memberships
+ memberships=$(
+ netstat -n --groups |
+ sed -n '/^v1\b/p' |
+ grep -o '[^ ]*$'
+ )
+ check_err $? "Couldn't obtain group memberships"
+
+ local item
+ for item in "${check_vec[@]}"; do
+ eval "local $item"
+ echo "$memberships" | grep -q "\b$group\b"
+ check_err_fail $fail $? "$group is_ex reported in IGMP query response"
+ done
+}
+
+test_change_mc_remote()
+{
+ check_command netstat || return
+
+ adf_ip_link_add v1 up type veth peer name v2
+ adf_ip_link_set_up v2
+
+ RET=0
+
+ adf_ip_link_add vx up type vxlan dstport 4789 \
+ local 192.0.2.1 $(fmt_remote 224.1.1.1) dev v1 vni 1000
+
+ check_membership "group=224.1.1.1 fail=0" \
+ "group=224.1.1.2 fail=1" \
+ "group=224.1.1.3 fail=1"
+
+ log_test "MC group report after VXLAN creation"
+
+ RET=0
+
+ change_remote 224.1.1.2
+ check_membership "group=224.1.1.1 fail=1" \
+ "group=224.1.1.2 fail=0" \
+ "group=224.1.1.3 fail=1"
+
+ log_test "MC group report after changing VXLAN remote MC->MC"
+
+ RET=0
+
+ change_remote 192.0.2.2
+ check_membership "group=224.1.1.1 fail=1" \
+ "group=224.1.1.2 fail=1" \
+ "group=224.1.1.3 fail=1"
+
+ log_test "MC group report after changing VXLAN remote MC->UC"
+}
+
+trap defer_scopes_cleanup EXIT
-ip link set dev vx type vxlan remote 192.0.2.30
-check_remotes "link set"
+tests_run
-ip link del dev vx
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/test_vxlan_nh.sh b/tools/testing/selftests/net/test_vxlan_nh.sh
new file mode 100755
index 000000000000..20f3369f776b
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_nh.sh
@@ -0,0 +1,223 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+TESTS="
+ basic_tx_ipv4
+ basic_tx_ipv6
+ learning
+ proxy_ipv4
+ proxy_ipv6
+"
+VERBOSE=0
+
+################################################################################
+# Utilities
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ stderr=
+ fi
+
+ out=$(eval "$cmd" "$stderr")
+ rc=$?
+ if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+################################################################################
+# Cleanup
+
+exit_cleanup_all()
+{
+ cleanup_all_ns
+ exit "${EXIT_STATUS}"
+}
+
+################################################################################
+# Tests
+
+nh_stats_get()
+{
+ ip -n "$ns1" -s -j nexthop show id 10 | jq ".[][\"group_stats\"][][\"packets\"]"
+}
+
+tc_stats_get()
+{
+ tc_rule_handle_stats_get "dev dummy1 egress" 101 ".packets" "-n $ns1"
+}
+
+basic_tx_common()
+{
+ local af_str=$1; shift
+ local proto=$1; shift
+ local local_addr=$1; shift
+ local plen=$1; shift
+ local remote_addr=$1; shift
+
+ RET=0
+
+ # Test basic Tx functionality. Check that stats are incremented on
+ # both the FDB nexthop group and the egress device.
+
+ run_cmd "ip -n $ns1 link add name dummy1 up type dummy"
+ run_cmd "ip -n $ns1 route add $remote_addr/$plen dev dummy1"
+ run_cmd "tc -n $ns1 qdisc add dev dummy1 clsact"
+ run_cmd "tc -n $ns1 filter add dev dummy1 egress proto $proto pref 1 handle 101 flower ip_proto udp dst_ip $remote_addr dst_port 4789 action pass"
+
+ run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo"
+
+ run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb"
+ run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+ run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789"
+ run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10"
+
+ run_cmd "ip netns exec $ns1 mausezahn vx0 -a own -b 00:11:22:33:44:55 -c 1 -q"
+
+ busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" nh_stats_get > /dev/null
+ check_err $? "FDB nexthop group stats did not increase"
+
+ busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" tc_stats_get > /dev/null
+ check_err $? "tc filter stats did not increase"
+
+ log_test "VXLAN FDB nexthop: $af_str basic Tx"
+}
+
+basic_tx_ipv4()
+{
+ basic_tx_common "IPv4" ipv4 192.0.2.1 32 192.0.2.2
+}
+
+basic_tx_ipv6()
+{
+ basic_tx_common "IPv6" ipv6 2001:db8:1::1 128 2001:db8:1::2
+}
+
+learning()
+{
+ RET=0
+
+ # When learning is enabled on the VXLAN device, an incoming packet
+ # might try to refresh an FDB entry that points to an FDB nexthop group
+ # instead of an ordinary remote destination. Check that the kernel does
+ # not crash in this situation.
+
+ run_cmd "ip -n $ns1 address add 192.0.2.1/32 dev lo"
+ run_cmd "ip -n $ns1 address add 192.0.2.2/32 dev lo"
+
+ run_cmd "ip -n $ns1 nexthop add id 1 via 192.0.2.3 fdb"
+ run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+ run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass"
+ run_cmd "ip -n $ns1 link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning"
+
+ run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020"
+ run_cmd "bridge -n $ns1 fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10"
+
+ run_cmd "ip netns exec $ns1 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q"
+
+ log_test "VXLAN FDB nexthop: learning"
+}
+
+proxy_common()
+{
+ local af_str=$1; shift
+ local local_addr=$1; shift
+ local plen=$1; shift
+ local remote_addr=$1; shift
+ local neigh_addr=$1; shift
+ local ping_cmd=$1; shift
+
+ RET=0
+
+ # When the "proxy" option is enabled on the VXLAN device, the device
+ # will suppress ARP requests and IPv6 Neighbor Solicitation messages if
+ # it is able to reply on behalf of the remote host. That is, if a
+ # matching and valid neighbor entry is configured on the VXLAN device
+ # whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The
+ # FDB entry for the neighbor's MAC address might point to an FDB
+ # nexthop group instead of an ordinary remote destination. Check that
+ # the kernel does not crash in this situation.
+
+ run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo"
+
+ run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb"
+ run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb"
+
+ run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789 proxy"
+
+ run_cmd "ip -n $ns1 neigh add $neigh_addr lladdr 00:11:22:33:44:55 nud perm dev vx0"
+
+ run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10"
+
+ run_cmd "ip netns exec $ns1 $ping_cmd"
+
+ log_test "VXLAN FDB nexthop: $af_str proxy"
+}
+
+proxy_ipv4()
+{
+ proxy_common "IPv4" 192.0.2.1 32 192.0.2.2 192.0.2.3 \
+ "arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3"
+}
+
+proxy_ipv6()
+{
+ proxy_common "IPv6" 2001:db8:1::1 128 2001:db8:1::2 2001:db8:1::3 \
+ "ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+while getopts ":t:pvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=$((VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+require_command mausezahn
+require_command arping
+require_command ndisc6
+require_command jq
+
+if ! ip nexthop help 2>&1 | grep -q "stats"; then
+ echo "SKIP: iproute2 ip too old, missing nexthop stats support"
+ exit "$ksft_skip"
+fi
+
+trap exit_cleanup_all EXIT
+
+for t in $TESTS
+do
+ setup_ns ns1; $t; cleanup_all_ns;
+done
diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
index 6127a78ee988..8deacc565afa 100755
--- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
+++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
@@ -146,18 +146,17 @@ run_cmd()
}
check_hv_connectivity() {
- ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
- sleep 1
- ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
+ slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
+ slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
return $?
}
check_vm_connectivity() {
- run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
+ slowwait 5 run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)"
- run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
+ slowwait 5 run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)"
}
diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c
new file mode 100644
index 000000000000..eb3cac5e583c
--- /dev/null
+++ b/tools/testing/selftests/net/tfo.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+#include <netinet/tcp.h>
+#include <errno.h>
+
+static int cfg_server;
+static int cfg_client;
+static int cfg_port = 8000;
+static struct sockaddr_in6 cfg_addr;
+static char *cfg_outfile;
+
+static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6)
+{
+ int ret;
+
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = htons(port);
+
+ ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr);
+ if (ret != 1) {
+ /* fallback to plain IPv4 */
+ ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]);
+ if (ret != 1)
+ return -1;
+
+ /* add ::ffff prefix */
+ sin6->sin6_addr.s6_addr32[0] = 0;
+ sin6->sin6_addr.s6_addr32[1] = 0;
+ sin6->sin6_addr.s6_addr16[4] = 0;
+ sin6->sin6_addr.s6_addr16[5] = 0xffff;
+ }
+
+ return 0;
+}
+
+static void run_server(void)
+{
+ unsigned long qlen = 32;
+ int fd, opt, connfd;
+ socklen_t len;
+ char buf[64];
+ FILE *outfile;
+
+ outfile = fopen(cfg_outfile, "w");
+ if (!outfile)
+ error(1, errno, "fopen() outfile");
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket()");
+
+ opt = 1;
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0)
+ error(1, errno, "setsockopt(SO_REUSEADDR)");
+
+ if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) < 0)
+ error(1, errno, "setsockopt(TCP_FASTOPEN)");
+
+ if (bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)) < 0)
+ error(1, errno, "bind()");
+
+ if (listen(fd, 5) < 0)
+ error(1, errno, "listen()");
+
+ len = sizeof(cfg_addr);
+ connfd = accept(fd, (struct sockaddr *)&cfg_addr, &len);
+ if (connfd < 0)
+ error(1, errno, "accept()");
+
+ len = sizeof(opt);
+ if (getsockopt(connfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &opt, &len) < 0)
+ error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)");
+
+ read(connfd, buf, 64);
+ fprintf(outfile, "%d\n", opt);
+
+ fclose(outfile);
+ close(connfd);
+ close(fd);
+}
+
+static void run_client(void)
+{
+ int fd;
+ char *msg = "Hello, world!";
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket()");
+
+ sendto(fd, msg, strlen(msg), MSG_FASTOPEN, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr));
+
+ close(fd);
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s (-s|-c) -h<server_ip> -p<port> -o<outfile> ", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ struct sockaddr_in6 *addr6 = (void *) &cfg_addr;
+ char *addr = NULL;
+ int ret;
+ int c;
+
+ if (argc <= 1)
+ usage(argv[0]);
+
+ while ((c = getopt(argc, argv, "sch:p:o:")) != -1) {
+ switch (c) {
+ case 's':
+ if (cfg_client)
+ error(1, 0, "Pass one of -s or -c");
+ cfg_server = 1;
+ break;
+ case 'c':
+ if (cfg_server)
+ error(1, 0, "Pass one of -s or -c");
+ cfg_client = 1;
+ break;
+ case 'h':
+ addr = optarg;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 'o':
+ cfg_outfile = strdup(optarg);
+ if (!cfg_outfile)
+ error(1, 0, "outfile invalid");
+ break;
+ }
+ }
+
+ if (cfg_server && addr)
+ error(1, 0, "Server cannot have -h specified");
+
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ addr6->sin6_addr = in6addr_any;
+ if (addr) {
+ ret = parse_address(addr, cfg_port, addr6);
+ if (ret)
+ error(1, 0, "Client address parse error: %s", addr);
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_opts(argc, argv);
+
+ if (cfg_server)
+ run_server();
+ else if (cfg_client)
+ run_client();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh
new file mode 100755
index 000000000000..a4550511830a
--- /dev/null
+++ b/tools/testing/selftests/net/tfo_passive.sh
@@ -0,0 +1,112 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+source lib.sh
+
+NSIM_SV_ID=$((256 + RANDOM % 256))
+NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID
+NSIM_CL_ID=$((512 + RANDOM % 256))
+NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
+NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device
+
+SERVER_IP=192.168.1.1
+CLIENT_IP=192.168.1.2
+SERVER_PORT=48675
+
+setup_ns()
+{
+ set -e
+ ip netns add nssv
+ ip netns add nscl
+
+ NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_SV_SYS/net -exec basename {} \;)
+ NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_CL_SYS/net -exec basename {} \;)
+
+ ip link set $NSIM_SV_NAME netns nssv
+ ip link set $NSIM_CL_NAME netns nscl
+
+ ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME
+ ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME
+
+ ip netns exec nssv ip link set dev $NSIM_SV_NAME up
+ ip netns exec nscl ip link set dev $NSIM_CL_NAME up
+
+ # Enable passive TFO
+ ip netns exec nssv sysctl -w net.ipv4.tcp_fastopen=519 > /dev/null
+
+ set +e
+}
+
+cleanup_ns()
+{
+ ip netns del nscl
+ ip netns del nssv
+}
+
+###
+### Code start
+###
+
+modprobe netdevsim
+
+# linking
+
+echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW
+echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+
+setup_ns
+
+NSIM_SV_FD=$((256 + RANDOM % 256))
+exec {NSIM_SV_FD}</var/run/netns/nssv
+NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex)
+
+NSIM_CL_FD=$((256 + RANDOM % 256))
+exec {NSIM_CL_FD}</var/run/netns/nscl
+NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex)
+
+echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \
+ $NSIM_DEV_SYS_LINK
+
+if [ $? -ne 0 ]; then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup_ns
+ exit 1
+fi
+
+out_file=$(mktemp)
+
+timeout -k 1s 30s ip netns exec nssv ./tfo \
+ -s \
+ -p ${SERVER_PORT} \
+ -o ${out_file}&
+
+wait_local_port_listen nssv ${SERVER_PORT} tcp
+
+ip netns exec nscl ./tfo -c -h ${SERVER_IP} -p ${SERVER_PORT}
+
+wait
+
+res=$(cat $out_file)
+rm $out_file
+
+if [ "$res" = "0" ]; then
+ echo "got invalid NAPI ID from passive TFO socket"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK
+
+echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL
+
+cleanup_ns
+
+modprobe -r netdevsim
+
+exit 0
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 1a706d03bb6b..a3ef4b57eb5f 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -21,7 +21,7 @@
#include <sys/socket.h>
#include <sys/stat.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
#define TLS_PAYLOAD_MAX_LEN 16384
#define SOL_TLS 282
@@ -44,9 +44,11 @@ struct tls_crypto_info_keys {
};
static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type,
- struct tls_crypto_info_keys *tls12)
+ struct tls_crypto_info_keys *tls12,
+ char key_generation)
{
- memset(tls12, 0, sizeof(*tls12));
+ memset(tls12, key_generation, sizeof(*tls12));
+ memset(tls12, 0, sizeof(struct tls_crypto_info));
switch (cipher_type) {
case TLS_CIPHER_CHACHA20_POLY1305:
@@ -179,13 +181,12 @@ static int tls_send_cmsg(int fd, unsigned char record_type,
return sendmsg(fd, &msg, flags);
}
-static int tls_recv_cmsg(struct __test_metadata *_metadata,
- int fd, unsigned char record_type,
- void *data, size_t len, int flags)
+static int __tls_recv_cmsg(struct __test_metadata *_metadata,
+ int fd, unsigned char *ctype,
+ void *data, size_t len, int flags)
{
char cbuf[CMSG_SPACE(sizeof(char))];
struct cmsghdr *cmsg;
- unsigned char ctype;
struct msghdr msg;
struct iovec vec;
int n;
@@ -204,7 +205,20 @@ static int tls_recv_cmsg(struct __test_metadata *_metadata,
EXPECT_NE(cmsg, NULL);
EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
- ctype = *((unsigned char *)CMSG_DATA(cmsg));
+ if (ctype)
+ *ctype = *((unsigned char *)CMSG_DATA(cmsg));
+
+ return n;
+}
+
+static int tls_recv_cmsg(struct __test_metadata *_metadata,
+ int fd, unsigned char record_type,
+ void *data, size_t len, int flags)
+{
+ unsigned char ctype;
+ int n;
+
+ n = __tls_recv_cmsg(_metadata, fd, &ctype, data, len, flags);
EXPECT_EQ(ctype, record_type);
return n;
@@ -275,7 +289,7 @@ TEST_F(tls_basic, recseq_wrap)
if (self->notls)
SKIP(return, "no TLS support");
- tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12);
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0);
memset(&tls12.aes128.rec_seq, 0xff, sizeof(tls12.aes128.rec_seq));
ASSERT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
@@ -391,7 +405,7 @@ FIXTURE_SETUP(tls)
SKIP(return, "Unsupported cipher in FIPS mode");
tls_crypto_info_init(variant->tls_version, variant->cipher_type,
- &tls12);
+ &tls12, 0);
ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
@@ -425,6 +439,8 @@ TEST_F(tls, sendfile)
EXPECT_GE(filefd, 0);
fstat(filefd, &st);
EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
+
+ close(filefd);
}
TEST_F(tls, send_then_sendfile)
@@ -446,6 +462,9 @@ TEST_F(tls, send_then_sendfile)
EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0);
EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size);
+
+ free(buf);
+ close(filefd);
}
static void chunked_sendfile(struct __test_metadata *_metadata,
@@ -545,6 +564,40 @@ TEST_F(tls, msg_more)
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
}
+TEST_F(tls, cmsg_msg_more)
+{
+ char *test_str = "test_read";
+ char record_type = 100;
+ int send_len = 10;
+
+ /* we don't allow MSG_MORE with non-DATA records */
+ EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len,
+ MSG_MORE), -1);
+ EXPECT_EQ(errno, EINVAL);
+}
+
+TEST_F(tls, msg_more_then_cmsg)
+{
+ char *test_str = "test_read";
+ char record_type = 100;
+ int send_len = 10;
+ char buf[10 * 2];
+ int ret;
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1);
+
+ ret = tls_send_cmsg(self->fd, record_type, test_str, send_len, 0);
+ EXPECT_EQ(ret, send_len);
+
+ /* initial DATA record didn't get merged with the non-DATA record */
+ EXPECT_EQ(recv(self->cfd, buf, send_len * 2, 0), send_len);
+
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+ buf, sizeof(buf), MSG_WAITALL),
+ send_len);
+}
+
TEST_F(tls, msg_more_unsent)
{
char const *test_str = "test_read";
@@ -893,6 +946,37 @@ TEST_F(tls, peek_and_splice)
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
+#define MAX_FRAGS 48
+TEST_F(tls, splice_short)
+{
+ struct iovec sendchar_iov;
+ char read_buf[0x10000];
+ char sendbuf[0x100];
+ char sendchar = 'S';
+ int pipefds[2];
+ int i;
+
+ sendchar_iov.iov_base = &sendchar;
+ sendchar_iov.iov_len = 1;
+
+ memset(sendbuf, 's', sizeof(sendbuf));
+
+ ASSERT_GE(pipe2(pipefds, O_NONBLOCK), 0);
+ ASSERT_GE(fcntl(pipefds[0], F_SETPIPE_SZ, (MAX_FRAGS + 1) * 0x1000), 0);
+
+ for (i = 0; i < MAX_FRAGS; i++)
+ ASSERT_GE(vmsplice(pipefds[1], &sendchar_iov, 1, 0), 0);
+
+ ASSERT_EQ(write(pipefds[1], sendbuf, sizeof(sendbuf)), sizeof(sendbuf));
+
+ EXPECT_EQ(splice(pipefds[0], NULL, self->fd, NULL, MAX_FRAGS + 0x1000, 0),
+ MAX_FRAGS + sizeof(sendbuf));
+ EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), 0), MAX_FRAGS + sizeof(sendbuf));
+ EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), MSG_DONTWAIT), -1);
+ EXPECT_EQ(errno, EAGAIN);
+}
+#undef MAX_FRAGS
+
TEST_F(tls, recvmsg_single)
{
char const *test_str = "test_recvmsg_single";
@@ -1175,7 +1259,7 @@ TEST_F(tls, bidir)
struct tls_crypto_info_keys tls12;
tls_crypto_info_init(variant->tls_version, variant->cipher_type,
- &tls12);
+ &tls12, 0);
ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12,
tls12.len);
@@ -1614,7 +1698,7 @@ TEST_F(tls, getsockopt)
EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type);
/* get the full crypto_info */
- tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect, 0);
len = expect.len;
memrnd(&get, sizeof(get));
EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0);
@@ -1668,6 +1752,778 @@ TEST_F(tls, recv_efault)
EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0);
}
+#define TLS_RECORD_TYPE_HANDSHAKE 0x16
+/* key_update, length 1, update_not_requested */
+static const char key_update_msg[] = "\x18\x00\x00\x01\x00";
+static void tls_send_keyupdate(struct __test_metadata *_metadata, int fd)
+{
+ size_t len = sizeof(key_update_msg);
+
+ EXPECT_EQ(tls_send_cmsg(fd, TLS_RECORD_TYPE_HANDSHAKE,
+ (char *)key_update_msg, len, 0),
+ len);
+}
+
+static void tls_recv_keyupdate(struct __test_metadata *_metadata, int fd, int flags)
+{
+ char buf[100];
+
+ EXPECT_EQ(tls_recv_cmsg(_metadata, fd, TLS_RECORD_TYPE_HANDSHAKE, buf, sizeof(buf), flags),
+ sizeof(key_update_msg));
+ EXPECT_EQ(memcmp(buf, key_update_msg, sizeof(key_update_msg)), 0);
+}
+
+/* set the key to 0 then 1 for RX, immediately to 1 for TX */
+TEST_F(tls_basic, rekey_rx)
+{
+ struct tls_crypto_info_keys tls12_0, tls12_1;
+ char const *test_str = "test_message";
+ int send_len = strlen(test_str) + 1;
+ char buf[20];
+ int ret;
+
+ if (self->notls)
+ return;
+
+ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+ &tls12_0, 0);
+ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+ &tls12_1, 1);
+
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_0, tls12_0.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len);
+ EXPECT_EQ(ret, 0);
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+/* set the key to 0 then 1 for TX, immediately to 1 for RX */
+TEST_F(tls_basic, rekey_tx)
+{
+ struct tls_crypto_info_keys tls12_0, tls12_1;
+ char const *test_str = "test_message";
+ int send_len = strlen(test_str) + 1;
+ char buf[20];
+ int ret;
+
+ if (self->notls)
+ return;
+
+ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+ &tls12_0, 0);
+ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+ &tls12_1, 1);
+
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_0, tls12_0.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len);
+ EXPECT_EQ(ret, 0);
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls_basic, disconnect)
+{
+ char const *test_str = "test_message";
+ int send_len = strlen(test_str) + 1;
+ struct tls_crypto_info_keys key;
+ struct sockaddr_in addr;
+ char buf[20];
+ int ret;
+
+ if (self->notls)
+ return;
+
+ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+ &key, 0);
+
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &key, key.len);
+ ASSERT_EQ(ret, 0);
+
+ /* Pre-queue the data so that setsockopt parses it but doesn't
+ * dequeue it from the TCP socket. recvmsg would dequeue.
+ */
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &key, key.len);
+ ASSERT_EQ(ret, 0);
+
+ addr.sin_family = AF_UNSPEC;
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr.sin_port = 0;
+ ret = connect(self->cfd, &addr, sizeof(addr));
+ EXPECT_EQ(ret, -1);
+ EXPECT_EQ(errno, EOPNOTSUPP);
+
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+}
+
+TEST_F(tls, rekey)
+{
+ char const *test_str_1 = "test_message_before_rekey";
+ char const *test_str_2 = "test_message_after_rekey";
+ struct tls_crypto_info_keys tls12;
+ int send_len;
+ char buf[100];
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ /* initial send/recv */
+ send_len = strlen(test_str_1) + 1;
+ EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ /* send after rekey */
+ send_len = strlen(test_str_2) + 1;
+ EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len);
+
+ /* can't receive the KeyUpdate without a control message */
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ /* recv blocking -> -EKEYEXPIRED */
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EKEYEXPIRED);
+
+ /* recv non-blocking -> -EKEYEXPIRED */
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1);
+ EXPECT_EQ(errno, EKEYEXPIRED);
+
+ /* update RX key */
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ /* recv after rekey */
+ EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0);
+}
+
+TEST_F(tls, rekey_fail)
+{
+ char const *test_str_1 = "test_message_before_rekey";
+ char const *test_str_2 = "test_message_after_rekey";
+ struct tls_crypto_info_keys tls12;
+ int send_len;
+ char buf[100];
+
+ /* initial send/recv */
+ send_len = strlen(test_str_1) + 1;
+ EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+
+ if (variant->tls_version != TLS_1_3_VERSION) {
+ /* just check that rekey is not supported and return */
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1);
+ EXPECT_EQ(errno, EBUSY);
+ return;
+ }
+
+ /* successful update */
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ /* invalid update: change of version */
+ tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* invalid update (RX socket): change of version */
+ tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* invalid update: change of cipher */
+ if (variant->cipher_type == TLS_CIPHER_AES_GCM_256)
+ tls_crypto_info_init(variant->tls_version, TLS_CIPHER_CHACHA20_POLY1305, &tls12, 1);
+ else
+ tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_256, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* send after rekey, the invalid updates shouldn't have an effect */
+ send_len = strlen(test_str_2) + 1;
+ EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len);
+
+ /* can't receive the KeyUpdate without a control message */
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ /* recv blocking -> -EKEYEXPIRED */
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EKEYEXPIRED);
+
+ /* recv non-blocking -> -EKEYEXPIRED */
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1);
+ EXPECT_EQ(errno, EKEYEXPIRED);
+
+ /* update RX key */
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ /* recv after rekey */
+ EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0);
+}
+
+TEST_F(tls, rekey_peek)
+{
+ char const *test_str_1 = "test_message_before_rekey";
+ struct tls_crypto_info_keys tls12;
+ int send_len;
+ char buf[100];
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ send_len = strlen(test_str_1) + 1;
+ EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len);
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len);
+ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+ /* can't receive the KeyUpdate without a control message */
+ EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+
+ /* peek KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ /* update RX key */
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+}
+
+TEST_F(tls, splice_rekey)
+{
+ int send_len = TLS_PAYLOAD_MAX_LEN / 2;
+ char mem_send[TLS_PAYLOAD_MAX_LEN];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ struct tls_crypto_info_keys tls12;
+ int p[2];
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ memrnd(mem_send, sizeof(mem_send));
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len);
+ EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+
+ /* can't splice the KeyUpdate */
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* peek KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ /* can't splice before updating the key */
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1);
+ EXPECT_EQ(errno, EKEYEXPIRED);
+
+ /* update RX key */
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len);
+ EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, rekey_peek_splice)
+{
+ char const *test_str_1 = "test_message_before_rekey";
+ struct tls_crypto_info_keys tls12;
+ int send_len;
+ char buf[100];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ int p[2];
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ ASSERT_GE(pipe(p), 0);
+
+ send_len = strlen(test_str_1) + 1;
+ EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len);
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len);
+ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len);
+ EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+ EXPECT_EQ(memcmp(mem_recv, test_str_1, send_len), 0);
+}
+
+TEST_F(tls, rekey_getsockopt)
+{
+ struct tls_crypto_info_keys tls12;
+ struct tls_crypto_info_keys tls12_get;
+ socklen_t len;
+
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 0);
+
+ len = tls12.len;
+ EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0);
+ EXPECT_EQ(len, tls12.len);
+ EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0);
+
+ len = tls12.len;
+ EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0);
+ EXPECT_EQ(len, tls12.len);
+ EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0);
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ len = tls12.len;
+ EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0);
+ EXPECT_EQ(len, tls12.len);
+ EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0);
+
+ len = tls12.len;
+ EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0);
+ EXPECT_EQ(len, tls12.len);
+ EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0);
+}
+
+TEST_F(tls, rekey_poll_pending)
+{
+ char const *test_str = "test_message_after_rekey";
+ struct tls_crypto_info_keys tls12;
+ struct pollfd pfd = { };
+ int send_len;
+ int ret;
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ /* send immediately after rekey */
+ send_len = strlen(test_str) + 1;
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+
+ /* key hasn't been updated, expect cfd to be non-readable */
+ pfd.fd = self->cfd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 0);
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret) {
+ int pid2, status;
+
+ /* wait before installing the new key */
+ sleep(1);
+
+ /* update RX key while poll() is sleeping */
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ pid2 = wait(&status);
+ EXPECT_EQ(pid2, ret);
+ EXPECT_EQ(status, 0);
+ } else {
+ pfd.fd = self->cfd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 5000), 1);
+
+ exit(!__test_passed(_metadata));
+ }
+}
+
+TEST_F(tls, rekey_poll_delay)
+{
+ char const *test_str = "test_message_after_rekey";
+ struct tls_crypto_info_keys tls12;
+ struct pollfd pfd = { };
+ int send_len;
+ int ret;
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret) {
+ int pid2, status;
+
+ /* wait before installing the new key */
+ sleep(1);
+
+ /* update RX key while poll() is sleeping */
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ sleep(1);
+ send_len = strlen(test_str) + 1;
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+
+ pid2 = wait(&status);
+ EXPECT_EQ(pid2, ret);
+ EXPECT_EQ(status, 0);
+ } else {
+ pfd.fd = self->cfd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 5000), 1);
+ exit(!__test_passed(_metadata));
+ }
+}
+
+struct raw_rec {
+ unsigned int plain_len;
+ unsigned char plain_data[100];
+ unsigned int cipher_len;
+ unsigned char cipher_data[128];
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: 'Hello world' */
+static const struct raw_rec id0_data_l11 = {
+ .plain_len = 11,
+ .plain_data = {
+ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+ 0x72, 0x6c, 0x64,
+ },
+ .cipher_len = 40,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0xa2, 0x33,
+ 0xde, 0x8d, 0x94, 0xf0, 0x29, 0x6c, 0xb1, 0xaf,
+ 0x6a, 0x75, 0xb2, 0x93, 0xad, 0x45, 0xd5, 0xfd,
+ 0x03, 0x51, 0x57, 0x8f, 0xf9, 0xcc, 0x3b, 0x42,
+ },
+};
+
+/* TLS 1.2, AES_CCM, ctrl, seqno:0, plaintext: '' */
+static const struct raw_rec id0_ctrl_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x38, 0x7b,
+ 0xa6, 0x1c, 0xdd, 0xa7, 0x19, 0x33, 0xab, 0xae,
+ 0x88, 0xe1, 0xd2, 0x08, 0x4f,
+ },
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: '' */
+static const struct raw_rec id0_data_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0xc5, 0x37, 0x90,
+ 0x70, 0x45, 0x89, 0xfb, 0x5c, 0xc7, 0x89, 0x03,
+ 0x68, 0x80, 0xd3, 0xd8, 0xcc,
+ },
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: 'Hello world' */
+static const struct raw_rec id1_data_l11 = {
+ .plain_len = 11,
+ .plain_data = {
+ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+ 0x72, 0x6c, 0x64,
+ },
+ .cipher_len = 40,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x3a, 0x1a, 0x9c,
+ 0xd0, 0xa8, 0x9a, 0xd6, 0x69, 0xd6, 0x1a, 0xe3,
+ 0xb5, 0x1f, 0x0d, 0x2c, 0xe2, 0x97, 0x46, 0xff,
+ 0x2b, 0xcc, 0x5a, 0xc4, 0xa3, 0xb9, 0xef, 0xba,
+ },
+};
+
+/* TLS 1.2, AES_CCM, ctrl, seqno:1, plaintext: '' */
+static const struct raw_rec id1_ctrl_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0x3e, 0xf0, 0xfe,
+ 0xee, 0xd9, 0xe2, 0x5d, 0xc7, 0x11, 0x4c, 0xe6,
+ 0xb4, 0x7e, 0xef, 0x40, 0x2b,
+ },
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: '' */
+static const struct raw_rec id1_data_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x01, 0xce, 0xfc, 0x86,
+ 0xc8, 0xf0, 0x55, 0xf9, 0x47, 0x3f, 0x74, 0xdc,
+ 0xc9, 0xbf, 0xfe, 0x5b, 0xb1,
+ },
+};
+
+/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: 'Hello world' */
+static const struct raw_rec id2_ctrl_l11 = {
+ .plain_len = 11,
+ .plain_data = {
+ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+ 0x72, 0x6c, 0x64,
+ },
+ .cipher_len = 40,
+ .cipher_data = {
+ 0x16, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19,
+ 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87,
+ 0x2a, 0x04, 0x11, 0x3d, 0xf8, 0x64, 0x5f, 0x36,
+ 0x8b, 0xa8, 0xee, 0x4c, 0x6d, 0x62, 0xa5, 0x00,
+ },
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: 'Hello world' */
+static const struct raw_rec id2_data_l11 = {
+ .plain_len = 11,
+ .plain_data = {
+ 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f,
+ 0x72, 0x6c, 0x64,
+ },
+ .cipher_len = 40,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19,
+ 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87,
+ 0x8e, 0xa1, 0xd0, 0xcd, 0x33, 0xb5, 0x86, 0x2b,
+ 0x17, 0xf1, 0x52, 0x2a, 0x55, 0x62, 0x65, 0x11,
+ },
+};
+
+/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: '' */
+static const struct raw_rec id2_ctrl_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0xdc, 0x5c, 0x0e,
+ 0x41, 0xdd, 0xba, 0xd3, 0xcc, 0xcf, 0x6d, 0xd9,
+ 0x06, 0xdb, 0x79, 0xe5, 0x5d,
+ },
+};
+
+/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: '' */
+static const struct raw_rec id2_data_l0 = {
+ .plain_len = 0,
+ .plain_data = {
+ },
+ .cipher_len = 29,
+ .cipher_data = {
+ 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x02, 0xc3, 0xca, 0x26,
+ 0x22, 0xe4, 0x25, 0xfb, 0x5f, 0x6d, 0xbf, 0x83,
+ 0x30, 0x48, 0x69, 0x1a, 0x47,
+ },
+};
+
+FIXTURE(zero_len)
+{
+ int fd, cfd;
+ bool notls;
+};
+
+FIXTURE_VARIANT(zero_len)
+{
+ const struct raw_rec *recs[4];
+ ssize_t recv_ret[4];
+};
+
+FIXTURE_VARIANT_ADD(zero_len, data_data_data)
+{
+ .recs = { &id0_data_l11, &id1_data_l11, &id2_data_l11, },
+ .recv_ret = { 33, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, data_0ctrl_data)
+{
+ .recs = { &id0_data_l11, &id1_ctrl_l0, &id2_data_l11, },
+ .recv_ret = { 11, 0, 11, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0data)
+{
+ .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l0, },
+ .recv_ret = { -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0data_0data_ctrl)
+{
+ .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l11, },
+ .recv_ret = { 0, 11, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0ctrl)
+{
+ .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l0, },
+ .recv_ret = { 0, 0, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0ctrl_0ctrl_0ctrl)
+{
+ .recs = { &id0_ctrl_l0, &id1_ctrl_l0, &id2_ctrl_l0, },
+ .recv_ret = { 0, 0, 0, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, 0data_0data_data)
+{
+ .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l11, },
+ .recv_ret = { 11, -EAGAIN, },
+};
+
+FIXTURE_VARIANT_ADD(zero_len, data_0data_0data)
+{
+ .recs = { &id0_data_l11, &id1_data_l0, &id2_data_l0, },
+ .recv_ret = { 11, -EAGAIN, },
+};
+
+FIXTURE_SETUP(zero_len)
+{
+ struct tls_crypto_info_keys tls12;
+ int ret;
+
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128,
+ &tls12, 0);
+
+ ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
+ if (self->notls)
+ return;
+
+ /* Don't install keys on fd, we'll send raw records */
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(zero_len)
+{
+ close(self->fd);
+ close(self->cfd);
+}
+
+TEST_F(zero_len, test)
+{
+ const struct raw_rec *const *rec;
+ unsigned char buf[128];
+ int rec_off;
+ int i;
+
+ for (i = 0; i < 4 && variant->recs[i]; i++)
+ EXPECT_EQ(send(self->fd, variant->recs[i]->cipher_data,
+ variant->recs[i]->cipher_len, 0),
+ variant->recs[i]->cipher_len);
+
+ rec = &variant->recs[0];
+ rec_off = 0;
+ for (i = 0; i < 4; i++) {
+ int j, ret;
+
+ ret = variant->recv_ret[i] >= 0 ? variant->recv_ret[i] : -1;
+ EXPECT_EQ(__tls_recv_cmsg(_metadata, self->cfd, NULL,
+ buf, sizeof(buf), MSG_DONTWAIT), ret);
+ if (ret == -1)
+ EXPECT_EQ(errno, -variant->recv_ret[i]);
+ if (variant->recv_ret[i] == -EAGAIN)
+ break;
+
+ for (j = 0; j < ret; j++) {
+ while (rec_off == (*rec)->plain_len) {
+ rec++;
+ rec_off = 0;
+ }
+ EXPECT_EQ(buf[j], (*rec)->plain_data[rec_off]);
+ rec_off++;
+ }
+ }
+};
+
FIXTURE(tls_err)
{
int fd, cfd;
@@ -1696,7 +2552,7 @@ FIXTURE_SETUP(tls_err)
int ret;
tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128,
- &tls12);
+ &tls12, 0);
ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls);
@@ -1984,6 +2840,163 @@ TEST_F(tls_err, poll_partial_rec_async)
}
}
+/* Use OOB+large send to trigger copy mode due to memory pressure.
+ * OOB causes a short read.
+ */
+TEST_F(tls_err, oob_pressure)
+{
+ char buf[1<<16];
+ int i;
+
+ memrnd(buf, sizeof(buf));
+
+ EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5);
+ EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf));
+ for (i = 0; i < 64; i++)
+ EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5);
+}
+
+/*
+ * Parse a stream of TLS records and ensure that each record respects
+ * the specified @max_payload_len.
+ */
+static size_t parse_tls_records(struct __test_metadata *_metadata,
+ const __u8 *rx_buf, int rx_len, int overhead,
+ __u16 max_payload_len)
+{
+ const __u8 *rec = rx_buf;
+ size_t total_plaintext_rx = 0;
+ const __u8 rec_header_len = 5;
+
+ while (rec < rx_buf + rx_len) {
+ __u16 record_payload_len;
+ __u16 plaintext_len;
+
+ /* Sanity check that it's a TLS header for application data */
+ ASSERT_EQ(rec[0], 23);
+ ASSERT_EQ(rec[1], 0x3);
+ ASSERT_EQ(rec[2], 0x3);
+
+ memcpy(&record_payload_len, rec + 3, 2);
+ record_payload_len = ntohs(record_payload_len);
+ ASSERT_GE(record_payload_len, overhead);
+
+ plaintext_len = record_payload_len - overhead;
+ total_plaintext_rx += plaintext_len;
+
+ /* Plaintext must not exceed the specified limit */
+ ASSERT_LE(plaintext_len, max_payload_len);
+ rec += rec_header_len + record_payload_len;
+ }
+
+ return total_plaintext_rx;
+}
+
+TEST(tls_12_tx_max_payload_len)
+{
+ struct tls_crypto_info_keys tls12;
+ int cfd, ret, fd, overhead;
+ size_t total_plaintext_rx = 0;
+ __u8 tx[1024], rx[2000];
+ __u16 limit = 128;
+ __u16 opt = 0;
+ unsigned int optlen = sizeof(opt);
+ bool notls;
+
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128,
+ &tls12, 0);
+
+ ulp_sock_pair(_metadata, &fd, &cfd, &notls);
+
+ if (notls)
+ exit(KSFT_SKIP);
+
+ /* Don't install keys on fd, we'll parse raw records */
+ ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit,
+ sizeof(limit));
+ ASSERT_EQ(ret, 0);
+
+ ret = getsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &opt, &optlen);
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(limit, opt);
+ EXPECT_EQ(optlen, sizeof(limit));
+
+ memset(tx, 0, sizeof(tx));
+ ASSERT_EQ(send(cfd, tx, sizeof(tx), 0), sizeof(tx));
+ close(cfd);
+
+ ret = recv(fd, rx, sizeof(rx), 0);
+
+ /*
+ * 16B tag + 8B IV -- record header (5B) is not counted but we'll
+ * need it to walk the record stream
+ */
+ overhead = 16 + 8;
+ total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead,
+ limit);
+
+ ASSERT_EQ(total_plaintext_rx, sizeof(tx));
+ close(fd);
+}
+
+TEST(tls_12_tx_max_payload_len_open_rec)
+{
+ struct tls_crypto_info_keys tls12;
+ int cfd, ret, fd, overhead;
+ size_t total_plaintext_rx = 0;
+ __u8 tx[1024], rx[2000];
+ __u16 tx_partial = 256;
+ __u16 og_limit = 512, limit = 128;
+ bool notls;
+
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128,
+ &tls12, 0);
+
+ ulp_sock_pair(_metadata, &fd, &cfd, &notls);
+
+ if (notls)
+ exit(KSFT_SKIP);
+
+ /* Don't install keys on fd, we'll parse raw records */
+ ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &og_limit,
+ sizeof(og_limit));
+ ASSERT_EQ(ret, 0);
+
+ memset(tx, 0, sizeof(tx));
+ ASSERT_EQ(send(cfd, tx, tx_partial, MSG_MORE), tx_partial);
+
+ /*
+ * Changing the payload limit with a pending open record should
+ * not be allowed.
+ */
+ ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit,
+ sizeof(limit));
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EBUSY);
+
+ ASSERT_EQ(send(cfd, tx + tx_partial, sizeof(tx) - tx_partial, MSG_EOR),
+ sizeof(tx) - tx_partial);
+ close(cfd);
+
+ ret = recv(fd, rx, sizeof(rx), 0);
+
+ /*
+ * 16B tag + 8B IV -- record header (5B) is not counted but we'll
+ * need it to walk the record stream
+ */
+ overhead = 16 + 8;
+ total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead,
+ og_limit);
+ ASSERT_EQ(total_plaintext_rx, sizeof(tx));
+ close(fd);
+}
+
TEST(non_established) {
struct tls12_crypto_info_aes_gcm_256 tls12;
struct sockaddr_in addr;
@@ -2118,7 +3131,7 @@ TEST(tls_v6ops) {
int sfd, ret, fd;
socklen_t len, len2;
- tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12);
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0);
addr.sin6_family = AF_INET6;
addr.sin6_addr = in6addr_any;
@@ -2177,7 +3190,7 @@ TEST(prequeue) {
len = sizeof(addr);
memrnd(buf, sizeof(buf));
- tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12);
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12, 0);
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_ANY);
@@ -2212,6 +3225,67 @@ TEST(prequeue) {
close(cfd);
}
+TEST(data_steal) {
+ struct tls_crypto_info_keys tls;
+ char buf[20000], buf2[20000];
+ struct sockaddr_in addr;
+ int sfd, cfd, ret, fd;
+ int pid, status;
+ socklen_t len;
+
+ len = sizeof(addr);
+ memrnd(buf, sizeof(buf));
+
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls, 0);
+
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr.sin_port = 0;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ sfd = socket(AF_INET, SOCK_STREAM, 0);
+
+ ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0);
+ ASSERT_EQ(listen(sfd, 10), 0);
+ ASSERT_EQ(getsockname(sfd, &addr, &len), 0);
+ ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0);
+ ASSERT_GE(cfd = accept(sfd, &addr, &len), 0);
+ close(sfd);
+
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+ if (ret) {
+ ASSERT_EQ(errno, ENOENT);
+ SKIP(return, "no TLS support");
+ }
+ ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0);
+
+ /* Spawn a child and get it into the read wait path of the underlying
+ * TCP socket.
+ */
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (!pid) {
+ EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2, MSG_WAITALL),
+ sizeof(buf) / 2);
+ exit(!__test_passed(_metadata));
+ }
+
+ usleep(10000);
+ ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0);
+ ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0);
+
+ EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf));
+ EXPECT_EQ(wait(&status), pid);
+ EXPECT_EQ(status, 0);
+ EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1);
+ /* Don't check errno, the error will be different depending
+ * on what random bytes TLS interpreted as the record length.
+ */
+
+ close(fd);
+ close(cfd);
+}
+
static void __attribute__((constructor)) fips_check(void) {
int res;
FILE *f;
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c
deleted file mode 100644
index 9ba03164d73a..000000000000
--- a/tools/testing/selftests/net/toeplitz.c
+++ /dev/null
@@ -1,589 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Toeplitz test
- *
- * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
- * 2. Compute the rx_hash in software based on the packet contents
- * 3. Compare the two
- *
- * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
- *
- * If '-C $rx_irq_cpu_list' is given, also
- *
- * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
- * 5. Compute the rxqueue that RSS would select based on this rx_hash
- * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
- * 7. Compare the cpus from 4 and 6
- *
- * Else if '-r $rps_bitmap' is given, also
- *
- * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
- * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
- * 6. Compare the cpus from 4 and 5
- */
-
-#define _GNU_SOURCE
-
-#include <arpa/inet.h>
-#include <errno.h>
-#include <error.h>
-#include <fcntl.h>
-#include <getopt.h>
-#include <linux/filter.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <net/if.h>
-#include <netdb.h>
-#include <netinet/ip.h>
-#include <netinet/ip6.h>
-#include <netinet/tcp.h>
-#include <netinet/udp.h>
-#include <poll.h>
-#include <stdbool.h>
-#include <stddef.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/sysinfo.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#include "../kselftest.h"
-
-#define TOEPLITZ_KEY_MIN_LEN 40
-#define TOEPLITZ_KEY_MAX_LEN 60
-
-#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */
-#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
-#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
-
-#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
-
-#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */
-
-#define RPS_MAX_CPUS 16UL /* must be a power of 2 */
-
-/* configuration options (cmdline arguments) */
-static uint16_t cfg_dport = 8000;
-static int cfg_family = AF_INET6;
-static char *cfg_ifname = "eth0";
-static int cfg_num_queues;
-static int cfg_num_rps_cpus;
-static bool cfg_sink;
-static int cfg_type = SOCK_STREAM;
-static int cfg_timeout_msec = 1000;
-static bool cfg_verbose;
-
-/* global vars */
-static int num_cpus;
-static int ring_block_nr;
-static int ring_block_sz;
-
-/* stats */
-static int frames_received;
-static int frames_nohash;
-static int frames_error;
-
-#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0)
-
-/* tpacket ring */
-struct ring_state {
- int fd;
- char *mmap;
- int idx;
- int cpu;
-};
-
-static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */
-static int rps_silo_to_cpu[RPS_MAX_CPUS];
-static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
-static struct ring_state rings[RSS_MAX_CPUS];
-
-static inline uint32_t toeplitz(const unsigned char *four_tuple,
- const unsigned char *key)
-{
- int i, bit, ret = 0;
- uint32_t key32;
-
- key32 = ntohl(*((uint32_t *)key));
- key += 4;
-
- for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
- for (bit = 7; bit >= 0; bit--) {
- if (four_tuple[i] & (1 << bit))
- ret ^= key32;
-
- key32 <<= 1;
- key32 |= !!(key[0] & (1 << bit));
- }
- key++;
- }
-
- return ret;
-}
-
-/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
-static void verify_rss(uint32_t rx_hash, int cpu)
-{
- int queue = rx_hash % cfg_num_queues;
-
- log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
- if (rx_irq_cpus[queue] != cpu) {
- log_verbose(". error: rss cpu mismatch (%d)", cpu);
- frames_error++;
- }
-}
-
-static void verify_rps(uint64_t rx_hash, int cpu)
-{
- int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
-
- log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
- if (rps_silo_to_cpu[silo] != cpu) {
- log_verbose(". error: rps cpu mismatch (%d)", cpu);
- frames_error++;
- }
-}
-
-static void log_rxhash(int cpu, uint32_t rx_hash,
- const char *addrs, int addr_len)
-{
- char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
- uint16_t *ports;
-
- if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
- !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
- error(1, 0, "address parse error");
-
- ports = (void *)addrs + (addr_len * 2);
- log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
- cpu, rx_hash, saddr, daddr,
- ntohs(ports[0]), ntohs(ports[1]));
-}
-
-/* Compare computed rxhash with rxhash received from tpacket_v3 */
-static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
-{
- unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
- uint32_t rx_hash_sw;
- const char *addrs;
- int addr_len;
-
- if (cfg_family == AF_INET) {
- addr_len = sizeof(struct in_addr);
- addrs = pkt + offsetof(struct iphdr, saddr);
- } else {
- addr_len = sizeof(struct in6_addr);
- addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
- }
-
- memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
- rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
-
- if (cfg_verbose)
- log_rxhash(cpu, rx_hash, addrs, addr_len);
-
- if (rx_hash != rx_hash_sw) {
- log_verbose(" != expected 0x%x\n", rx_hash_sw);
- frames_error++;
- return;
- }
-
- log_verbose(" OK");
- if (cfg_num_queues)
- verify_rss(rx_hash, cpu);
- else if (cfg_num_rps_cpus)
- verify_rps(rx_hash, cpu);
- log_verbose("\n");
-}
-
-static char *recv_frame(const struct ring_state *ring, char *frame)
-{
- struct tpacket3_hdr *hdr = (void *)frame;
-
- if (hdr->hv1.tp_rxhash)
- verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
- ring->cpu);
- else
- frames_nohash++;
-
- return frame + hdr->tp_next_offset;
-}
-
-/* A single TPACKET_V3 block can hold multiple frames */
-static bool recv_block(struct ring_state *ring)
-{
- struct tpacket_block_desc *block;
- char *frame;
- int i;
-
- block = (void *)(ring->mmap + ring->idx * ring_block_sz);
- if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
- return false;
-
- frame = (char *)block;
- frame += block->hdr.bh1.offset_to_first_pkt;
-
- for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
- frame = recv_frame(ring, frame);
- frames_received++;
- }
-
- block->hdr.bh1.block_status = TP_STATUS_KERNEL;
- ring->idx = (ring->idx + 1) % ring_block_nr;
-
- return true;
-}
-
-/* simple test: sleep once unconditionally and then process all rings */
-static void process_rings(void)
-{
- int i;
-
- usleep(1000 * cfg_timeout_msec);
-
- for (i = 0; i < num_cpus; i++)
- do {} while (recv_block(&rings[i]));
-
- fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
- frames_received - frames_nohash - frames_error,
- frames_nohash, frames_error);
-}
-
-static char *setup_ring(int fd)
-{
- struct tpacket_req3 req3 = {0};
- void *ring;
-
- req3.tp_retire_blk_tov = cfg_timeout_msec / 8;
- req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
-
- req3.tp_frame_size = 2048;
- req3.tp_frame_nr = 1 << 10;
- req3.tp_block_nr = 16;
-
- req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
- req3.tp_block_size /= req3.tp_block_nr;
-
- if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
- error(1, errno, "setsockopt PACKET_RX_RING");
-
- ring_block_sz = req3.tp_block_size;
- ring_block_nr = req3.tp_block_nr;
-
- ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
- if (ring == MAP_FAILED)
- error(1, 0, "mmap failed");
-
- return ring;
-}
-
-static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
-{
- struct sock_filter filter[] = {
- BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
- BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
- BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport),
- BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
- BPF_STMT(BPF_RET + BPF_K, 0),
- BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
- };
- struct sock_fprog prog = {};
-
- prog.filter = filter;
- prog.len = ARRAY_SIZE(filter);
- if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
- error(1, errno, "setsockopt filter");
-}
-
-/* filter on transport protocol and destination port */
-static void set_filter(int fd)
-{
- const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */
- uint8_t proto;
-
- proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
- if (cfg_family == AF_INET)
- __set_filter(fd, offsetof(struct iphdr, protocol), proto,
- sizeof(struct iphdr) + off_dport);
- else
- __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
- sizeof(struct ip6_hdr) + off_dport);
-}
-
-/* drop everything: used temporarily during setup */
-static void set_filter_null(int fd)
-{
- struct sock_filter filter[] = {
- BPF_STMT(BPF_RET + BPF_K, 0),
- };
- struct sock_fprog prog = {};
-
- prog.filter = filter;
- prog.len = ARRAY_SIZE(filter);
- if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
- error(1, errno, "setsockopt filter");
-}
-
-static int create_ring(char **ring)
-{
- struct fanout_args args = {
- .id = 1,
- .type_flags = PACKET_FANOUT_CPU,
- .max_num_members = RSS_MAX_CPUS
- };
- struct sockaddr_ll ll = { 0 };
- int fd, val;
-
- fd = socket(PF_PACKET, SOCK_DGRAM, 0);
- if (fd == -1)
- error(1, errno, "socket creation failed");
-
- val = TPACKET_V3;
- if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
- error(1, errno, "setsockopt PACKET_VERSION");
- *ring = setup_ring(fd);
-
- /* block packets until all rings are added to the fanout group:
- * else packets can arrive during setup and get misclassified
- */
- set_filter_null(fd);
-
- ll.sll_family = AF_PACKET;
- ll.sll_ifindex = if_nametoindex(cfg_ifname);
- ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
- htons(ETH_P_IPV6);
- if (bind(fd, (void *)&ll, sizeof(ll)))
- error(1, errno, "bind");
-
- /* must come after bind: verifies all programs in group match */
- if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
- /* on failure, retry using old API if that is sufficient:
- * it has a hard limit of 256 sockets, so only try if
- * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
- * in this API, the third argument is left implicit.
- */
- if (cfg_num_queues || num_cpus > 256 ||
- setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
- &args, sizeof(uint32_t)))
- error(1, errno, "setsockopt PACKET_FANOUT cpu");
- }
-
- return fd;
-}
-
-/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
-static int setup_sink(void)
-{
- int fd, val;
-
- fd = socket(cfg_family, cfg_type, 0);
- if (fd == -1)
- error(1, errno, "socket %d.%d", cfg_family, cfg_type);
-
- val = 1 << 20;
- if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
- error(1, errno, "setsockopt rcvbuf");
-
- return fd;
-}
-
-static void setup_rings(void)
-{
- int i;
-
- for (i = 0; i < num_cpus; i++) {
- rings[i].cpu = i;
- rings[i].fd = create_ring(&rings[i].mmap);
- }
-
- /* accept packets once all rings in the fanout group are up */
- for (i = 0; i < num_cpus; i++)
- set_filter(rings[i].fd);
-}
-
-static void cleanup_rings(void)
-{
- int i;
-
- for (i = 0; i < num_cpus; i++) {
- if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
- error(1, errno, "munmap");
- if (close(rings[i].fd))
- error(1, errno, "close");
- }
-}
-
-static void parse_cpulist(const char *arg)
-{
- do {
- rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
-
- arg = strchr(arg, ',');
- if (!arg)
- break;
- arg++; // skip ','
- } while (1);
-}
-
-static void show_cpulist(void)
-{
- int i;
-
- for (i = 0; i < cfg_num_queues; i++)
- fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
-}
-
-static void show_silos(void)
-{
- int i;
-
- for (i = 0; i < cfg_num_rps_cpus; i++)
- fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
-}
-
-static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
-{
- int i, ret, off;
-
- if (slen < TOEPLITZ_STR_MIN_LEN ||
- slen > TOEPLITZ_STR_MAX_LEN + 1)
- error(1, 0, "invalid toeplitz key");
-
- for (i = 0, off = 0; off < slen; i++, off += 3) {
- ret = sscanf(str + off, "%hhx", &key[i]);
- if (ret != 1)
- error(1, 0, "key parse error at %d off %d len %d",
- i, off, slen);
- }
-}
-
-static void parse_rps_bitmap(const char *arg)
-{
- unsigned long bitmap;
- int i;
-
- bitmap = strtoul(arg, NULL, 0);
-
- if (bitmap & ~(RPS_MAX_CPUS - 1))
- error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
- bitmap, RPS_MAX_CPUS - 1);
-
- for (i = 0; i < RPS_MAX_CPUS; i++)
- if (bitmap & 1UL << i)
- rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
-}
-
-static void parse_opts(int argc, char **argv)
-{
- static struct option long_options[] = {
- {"dport", required_argument, 0, 'd'},
- {"cpus", required_argument, 0, 'C'},
- {"key", required_argument, 0, 'k'},
- {"iface", required_argument, 0, 'i'},
- {"ipv4", no_argument, 0, '4'},
- {"ipv6", no_argument, 0, '6'},
- {"sink", no_argument, 0, 's'},
- {"tcp", no_argument, 0, 't'},
- {"timeout", required_argument, 0, 'T'},
- {"udp", no_argument, 0, 'u'},
- {"verbose", no_argument, 0, 'v'},
- {"rps", required_argument, 0, 'r'},
- {0, 0, 0, 0}
- };
- bool have_toeplitz = false;
- int index, c;
-
- while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) {
- switch (c) {
- case '4':
- cfg_family = AF_INET;
- break;
- case '6':
- cfg_family = AF_INET6;
- break;
- case 'C':
- parse_cpulist(optarg);
- break;
- case 'd':
- cfg_dport = strtol(optarg, NULL, 0);
- break;
- case 'i':
- cfg_ifname = optarg;
- break;
- case 'k':
- parse_toeplitz_key(optarg, strlen(optarg),
- toeplitz_key);
- have_toeplitz = true;
- break;
- case 'r':
- parse_rps_bitmap(optarg);
- break;
- case 's':
- cfg_sink = true;
- break;
- case 't':
- cfg_type = SOCK_STREAM;
- break;
- case 'T':
- cfg_timeout_msec = strtol(optarg, NULL, 0);
- break;
- case 'u':
- cfg_type = SOCK_DGRAM;
- break;
- case 'v':
- cfg_verbose = true;
- break;
-
- default:
- error(1, 0, "unknown option %c", optopt);
- break;
- }
- }
-
- if (!have_toeplitz)
- error(1, 0, "Must supply rss key ('-k')");
-
- num_cpus = get_nprocs();
- if (num_cpus > RSS_MAX_CPUS)
- error(1, 0, "increase RSS_MAX_CPUS");
-
- if (cfg_num_queues && cfg_num_rps_cpus)
- error(1, 0,
- "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
- if (cfg_verbose) {
- show_cpulist();
- show_silos();
- }
-}
-
-int main(int argc, char **argv)
-{
- const int min_tests = 10;
- int fd_sink = -1;
-
- parse_opts(argc, argv);
-
- if (cfg_sink)
- fd_sink = setup_sink();
-
- setup_rings();
- process_rings();
- cleanup_rings();
-
- if (cfg_sink && close(fd_sink))
- error(1, errno, "close sink");
-
- if (frames_received - frames_nohash < min_tests)
- error(1, 0, "too few frames for verification");
-
- return frames_error;
-}
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
deleted file mode 100755
index 8ff172f7bb1b..000000000000
--- a/tools/testing/selftests/net/toeplitz.sh
+++ /dev/null
@@ -1,199 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping
-# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu
-# ('-rps <rps_map>')
-#
-# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action,
-# which is a driver-specific encoding.
-#
-# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \
-# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]
-
-source setup_loopback.sh
-readonly SERVER_IP4="192.168.1.200/24"
-readonly SERVER_IP6="fda8::1/64"
-readonly SERVER_MAC="aa:00:00:00:00:02"
-
-readonly CLIENT_IP4="192.168.1.100/24"
-readonly CLIENT_IP6="fda8::2/64"
-readonly CLIENT_MAC="aa:00:00:00:00:01"
-
-PORT=8000
-KEY="$(</proc/sys/net/core/netdev_rss_key)"
-TEST_RSS=false
-RPS_MAP=""
-PROTO_FLAG=""
-IP_FLAG=""
-DEV="eth0"
-
-# Return the number of rxqs among which RSS is configured to spread packets.
-# This is determined by reading the RSS indirection table using ethtool.
-get_rss_cfg_num_rxqs() {
- echo $(ethtool -x "${DEV}" |
- grep -E [[:space:]]+[0-9]+:[[:space:]]+ |
- cut -d: -f2- |
- awk '{$1=$1};1' |
- tr ' ' '\n' |
- sort -u |
- wc -l)
-}
-
-# Return a list of the receive irq handler cpus.
-# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc.
-# Reads /sys/kernel/irq/ in order, so algorithm depends on
-# irq_{rxq-0} < irq_{rxq-1}, etc.
-get_rx_irq_cpus() {
- CPUS=""
- # sort so that irq 2 is read before irq 10
- SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V)
- # Consider only as many queues as RSS actually uses. We assume that
- # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1).
- RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs)
- RXQ_COUNT=0
-
- for i in ${SORTED_IRQS}
- do
- [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break
- # lookup relevant IRQs by action name
- [[ -e "$i/actions" ]] || continue
- cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue
- irqname=$(<"$i/actions")
-
- # does the IRQ get called
- irqcount=$(cat "$i/per_cpu_count" | tr -d '0,')
- [[ -n "${irqcount}" ]] || continue
-
- # lookup CPU
- irq=$(basename "$i")
- cpu=$(cat "/proc/irq/$irq/smp_affinity_list")
-
- if [[ -z "${CPUS}" ]]; then
- CPUS="${cpu}"
- else
- CPUS="${CPUS},${cpu}"
- fi
- RXQ_COUNT=$((RXQ_COUNT+1))
- done
-
- echo "${CPUS}"
-}
-
-get_disable_rfs_cmd() {
- echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;"
-}
-
-get_set_rps_bitmaps_cmd() {
- CMD=""
- for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus
- do
- CMD="${CMD} echo $1 > ${i};"
- done
-
- echo "${CMD}"
-}
-
-get_disable_rps_cmd() {
- echo "$(get_set_rps_bitmaps_cmd 0)"
-}
-
-die() {
- echo "$1"
- exit 1
-}
-
-check_nic_rxhash_enabled() {
- local -r pattern="receive-hashing:\ on"
-
- ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled"
-}
-
-parse_opts() {
- local prog=$0
- shift 1
-
- while [[ "$1" =~ "-" ]]; do
- if [[ "$1" = "-irq_prefix" ]]; then
- shift
- IRQ_PATTERN="^$1-[0-9]*$"
- elif [[ "$1" = "-u" || "$1" = "-t" ]]; then
- PROTO_FLAG="$1"
- elif [[ "$1" = "-4" ]]; then
- IP_FLAG="$1"
- SERVER_IP="${SERVER_IP4}"
- CLIENT_IP="${CLIENT_IP4}"
- elif [[ "$1" = "-6" ]]; then
- IP_FLAG="$1"
- SERVER_IP="${SERVER_IP6}"
- CLIENT_IP="${CLIENT_IP6}"
- elif [[ "$1" = "-rss" ]]; then
- TEST_RSS=true
- elif [[ "$1" = "-rps" ]]; then
- shift
- RPS_MAP="$1"
- elif [[ "$1" = "-i" ]]; then
- shift
- DEV="$1"
- else
- die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \
- [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]"
- fi
- shift
- done
-}
-
-setup() {
- setup_loopback_environment "${DEV}"
-
- # Set up server_ns namespace and client_ns namespace
- setup_macvlan_ns "${DEV}" $server_ns server \
- "${SERVER_MAC}" "${SERVER_IP}"
- setup_macvlan_ns "${DEV}" $client_ns client \
- "${CLIENT_MAC}" "${CLIENT_IP}"
-}
-
-cleanup() {
- cleanup_macvlan_ns $server_ns server $client_ns client
- cleanup_loopback "${DEV}"
-}
-
-parse_opts $0 $@
-
-setup
-trap cleanup EXIT
-
-check_nic_rxhash_enabled
-
-# Actual test starts here
-if [[ "${TEST_RSS}" = true ]]; then
- # RPS/RFS must be disabled because they move packets between cpus,
- # which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
- eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
- ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
- -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
- -C "$(get_rx_irq_cpus)" -s -v &
-elif [[ ! -z "${RPS_MAP}" ]]; then
- eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
- ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
- -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
- -r "0x${RPS_MAP}" -s -v &
-else
- ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
- -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
-fi
-
-server_pid=$!
-
-ip netns exec $client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
- "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
-
-client_pid=$!
-
-wait "${server_pid}"
-exit_code=$?
-kill -9 "${client_pid}"
-if [[ "${exit_code}" -eq 0 ]]; then
- echo "Test Succeeded!"
-fi
-exit "${exit_code}"
diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh
deleted file mode 100755
index 2fef34f4aba1..000000000000
--- a/tools/testing/selftests/net/toeplitz_client.sh
+++ /dev/null
@@ -1,28 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# A simple program for generating traffic for the toeplitz test.
-#
-# This program sends packets periodically for, conservatively, 20 seconds. The
-# intent is for the calling program to kill this program once it is no longer
-# needed, rather than waiting for the 20 second expiration.
-
-send_traffic() {
- expiration=$((SECONDS+20))
- while [[ "${SECONDS}" -lt "${expiration}" ]]
- do
- if [[ "${PROTO}" == "-u" ]]; then
- echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}"
- else
- echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}"
- fi
- sleep 0.001
- done
-}
-
-PROTO=$1
-IPVER=$2
-ADDR=$3
-PORT=$4
-
-send_traffic
diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh
index 282f14760940..a7c6ab8a0347 100755
--- a/tools/testing/selftests/net/traceroute.sh
+++ b/tools/testing/selftests/net/traceroute.sh
@@ -10,28 +10,6 @@ PAUSE_ON_FAIL=no
################################################################################
#
-log_test()
-{
- local rc=$1
- local expected=$2
- local msg="$3"
-
- if [ ${rc} -eq ${expected} ]; then
- printf "TEST: %-60s [ OK ]\n" "${msg}"
- nsuccess=$((nsuccess+1))
- else
- ret=1
- nfail=$((nfail+1))
- printf "TEST: %-60s [FAIL]\n" "${msg}"
- if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
- echo
- echo "hit enter to continue, 'q' to quit"
- read a
- [ "$a" = "q" ] && exit 1
- fi
- fi
-}
-
run_cmd()
{
local ns
@@ -58,6 +36,35 @@ run_cmd()
return $rc
}
+__check_traceroute_version()
+{
+ local cmd=$1; shift
+ local req_ver=$1; shift
+ local ver
+
+ req_ver=$(echo "$req_ver" | sed 's/\.//g')
+ ver=$($cmd -V 2>&1 | grep -Eo '[0-9]+.[0-9]+.[0-9]+' | sed 's/\.//g')
+ if [[ $ver -lt $req_ver ]]; then
+ return 1
+ else
+ return 0
+ fi
+}
+
+check_traceroute6_version()
+{
+ local req_ver=$1; shift
+
+ __check_traceroute_version traceroute6 "$req_ver"
+}
+
+check_traceroute_version()
+{
+ local req_ver=$1; shift
+
+ __check_traceroute_version traceroute "$req_ver"
+}
+
################################################################################
# create namespaces and interconnects
@@ -81,6 +88,8 @@ create_ns()
ip netns exec ${ns} ip -6 ro add unreachable default metric 8192
ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec ${ns} sysctl -qw net.ipv4.icmp_ratelimit=0
+ ip netns exec ${ns} sysctl -qw net.ipv6.icmp.ratelimit=0
ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
@@ -203,34 +212,275 @@ setup_traceroute6()
run_traceroute6()
{
- if [ ! -x "$(command -v traceroute6)" ]; then
- echo "SKIP: Could not run IPV6 test without traceroute6"
- return
- fi
-
setup_traceroute6
+ RET=0
+
# traceroute6 host-2 from host-1 (expects 2000:102::2)
run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2"
- log_test $? 0 "IPV6 traceroute"
+ check_err $? "traceroute6 did not return 2000:102::2"
+ log_test "IPv6 traceroute"
cleanup_traceroute6
}
################################################################################
+# traceroute6 with VRF test
+#
+# Verify that in this scenario
+#
+# ------------------------ N2
+# | |
+# ------ ------ N3 ----
+# | R1 | | R2 |------|H2|
+# ------ ------ ----
+# | |
+# ------------------------ N1
+# |
+# ----
+# |H1|
+# ----
+#
+# Where H1's default route goes through R1 and R1's default route goes through
+# R2 over N2, traceroute6 from H1 to H2 reports R2's address on N2 and not N1.
+# The interfaces connecting R2 to the different subnets are membmer in a VRF
+# and the intention is to check that traceroute6 does not report the VRF's
+# address.
+#
+# Addresses are assigned as follows:
+#
+# N1: 2000:101::/64
+# N2: 2000:102::/64
+# N3: 2000:103::/64
+#
+# R1's host part of address: 1
+# R2's host part of address: 2
+# H1's host part of address: 3
+# H2's host part of address: 4
+#
+# For example:
+# the IPv6 address of R1's interface on N2 is 2000:102::1/64
+
+cleanup_traceroute6_vrf()
+{
+ cleanup_all_ns
+}
+
+setup_traceroute6_vrf()
+{
+ # Start clean
+ cleanup_traceroute6_vrf
+
+ setup_ns h1 h2 r1 r2
+ create_ns "$h1"
+ create_ns "$h2"
+ create_ns "$r1"
+ create_ns "$r2"
+
+ ip -n "$r2" link add name vrf100 up type vrf table 100
+ ip -n "$r2" addr add 2001:db8:100::1/64 dev vrf100
+
+ # Setup N3
+ connect_ns "$r2" eth3 - 2000:103::2/64 "$h2" eth3 - 2000:103::4/64
+
+ ip -n "$r2" link set dev eth3 master vrf100
+
+ ip -n "$h2" route add default via 2000:103::2
+
+ # Setup N2
+ connect_ns "$r1" eth2 - 2000:102::1/64 "$r2" eth2 - 2000:102::2/64
+
+ ip -n "$r1" route add default via 2000:102::2
+
+ ip -n "$r2" link set dev eth2 master vrf100
+
+ # Setup N1. host-1 and router-2 connect to a bridge in router-1.
+ ip -n "$r1" link add name br100 up type bridge
+ ip -n "$r1" addr add 2000:101::1/64 dev br100
+
+ connect_ns "$h1" eth0 - 2000:101::3/64 "$r1" eth0 - -
+
+ ip -n "$h1" route add default via 2000:101::1
+
+ ip -n "$r1" link set dev eth0 master br100
+
+ connect_ns "$r2" eth1 - 2000:101::2/64 "$r1" eth1 - -
+
+ ip -n "$r2" link set dev eth1 master vrf100
+
+ ip -n "$r1" link set dev eth1 master br100
+
+ # Prime the network
+ ip netns exec "$h1" ping6 -c5 2000:103::4 >/dev/null 2>&1
+}
+
+run_traceroute6_vrf()
+{
+ setup_traceroute6_vrf
+
+ RET=0
+
+ # traceroute6 host-2 from host-1 (expects 2000:102::2)
+ run_cmd "$h1" "traceroute6 2000:103::4 | grep 2000:102::2"
+ check_err $? "traceroute6 did not return 2000:102::2"
+ log_test "IPv6 traceroute with VRF"
+
+ cleanup_traceroute6_vrf
+}
+
+################################################################################
+# traceroute6 with ICMP extensions test
+#
+# Verify that in this scenario
+#
+# ---- ---- ----
+# |H1|--------------------------|R1|--------------------------|H2|
+# ---- N1 ---- N2 ----
+#
+# ICMP extensions are correctly reported. The loopback interfaces on all the
+# nodes are assigned global addresses and the interfaces connecting the nodes
+# are assigned IPv6 link-local addresses.
+
+cleanup_traceroute6_ext()
+{
+ cleanup_all_ns
+}
+
+setup_traceroute6_ext()
+{
+ # Start clean
+ cleanup_traceroute6_ext
+
+ setup_ns h1 r1 h2
+ create_ns "$h1"
+ create_ns "$r1"
+ create_ns "$h2"
+
+ # Setup N1
+ connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64
+ # Setup N2
+ connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64
+
+ # Setup H1
+ ip -n "$h1" address add 2001:db8:1::1/128 dev lo
+ ip -n "$h1" route add ::/0 nexthop via fe80::2 dev eth1
+
+ # Setup R1
+ ip -n "$r1" address add 2001:db8:1::2/128 dev lo
+ ip -n "$r1" route add 2001:db8:1::1/128 nexthop via fe80::1 dev eth1
+ ip -n "$r1" route add 2001:db8:1::3/128 nexthop via fe80::4 dev eth2
+
+ # Setup H2
+ ip -n "$h2" address add 2001:db8:1::3/128 dev lo
+ ip -n "$h2" route add ::/0 nexthop via fe80::3 dev eth2
+
+ # Prime the network
+ ip netns exec "$h1" ping6 -c5 2001:db8:1::3 >/dev/null 2>&1
+}
+
+traceroute6_ext_iio_iif_test()
+{
+ local r1_ifindex h2_ifindex
+ local pkt_len=$1; shift
+
+ # Test that incoming interface info is not appended by default.
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC"
+ check_fail $? "Incoming interface info appended by default when should not"
+
+ # Test that the extension is appended when enabled.
+ run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv6/icmp/errors_extension_mask\""
+ check_err $? "Failed to enable incoming interface info extension on R1"
+
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC"
+ check_err $? "Incoming interface info not appended after enable"
+
+ # Test that the extension is not appended when disabled.
+ run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv6/icmp/errors_extension_mask\""
+ check_err $? "Failed to disable incoming interface info extension on R1"
+
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC"
+ check_fail $? "Incoming interface info appended after disable"
+
+ # Test that the extension is sent correctly from both R1 and H2.
+ run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01"
+ r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]')
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from R1"
+
+ run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01"
+ h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]')
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from H2"
+
+ # Add a global address on the incoming interface of R1 and check that
+ # it is reported.
+ run_cmd "$r1" "ip address add 2001:db8:100::1/64 dev eth1 nodad"
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,2001:db8:100::1,\"eth1\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from R1 after address addition"
+ run_cmd "$r1" "ip address del 2001:db8:100::1/64 dev eth1"
+
+ # Change name and MTU and make sure the result is still correct.
+ run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501"
+ run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'"
+ check_err $? "Wrong incoming interface info reported from R1 after name and MTU change"
+ run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500"
+
+ run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00"
+ run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00"
+}
+
+run_traceroute6_ext()
+{
+ # Need at least version 2.1.5 for RFC 5837 support.
+ if ! check_traceroute6_version 2.1.5; then
+ log_test_skip "traceroute6 too old, missing ICMP extensions support"
+ return
+ fi
+
+ setup_traceroute6_ext
+
+ RET=0
+
+ ## General ICMP extensions tests
+
+ # Test that ICMP extensions are disabled by default.
+ run_cmd "$h1" "sysctl net.ipv6.icmp.errors_extension_mask | grep \"= 0$\""
+ check_err $? "ICMP extensions are not disabled by default"
+
+ # Test that unsupported values are rejected. Do not use "sysctl" as
+ # older versions do not return an error code upon failure.
+ run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv6/icmp/errors_extension_mask\""
+ check_fail $? "Unsupported sysctl value was not rejected"
+
+ ## Extension-specific tests
+
+ # Incoming interface info test. Test with various packet sizes,
+ # including the default one.
+ traceroute6_ext_iio_iif_test
+ traceroute6_ext_iio_iif_test 127
+ traceroute6_ext_iio_iif_test 128
+ traceroute6_ext_iio_iif_test 129
+
+ log_test "IPv6 traceroute with ICMP extensions"
+
+ cleanup_traceroute6_ext
+}
+
+################################################################################
# traceroute test
#
-# Verify that traceroute from H1 to H2 shows 1.0.1.1 in this scenario
+# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when
+# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively.
#
-# 1.0.3.1/24
+# 1.0.3.3/24 1.0.3.1/24
# ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ----
# |H1|--------------------------|R1|--------------------------|H2|
# ---- N1 ---- N2 ----
#
-# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and
-# 1.0.3.1/24 and 1.0.1.1/24 are respectively R1's primary and secondary
-# address on N1.
-#
+# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and 1.0.3.1/24 and
+# 1.0.1.1/24 are R1's primary addresses on N1. The kernel is expected to prefer
+# a source address that is on the same subnet as the destination IP of the ICMP
+# error message.
cleanup_traceroute()
{
@@ -250,6 +500,7 @@ setup_traceroute()
connect_ns $h1 eth0 1.0.1.3/24 - \
$router eth1 1.0.3.1/24 -
+ ip -n "$h1" addr add 1.0.3.3/24 dev eth0
ip netns exec $h1 ip route add default via 1.0.1.1
ip netns exec $router ip addr add 1.0.1.1/24 dev eth1
@@ -268,18 +519,232 @@ setup_traceroute()
run_traceroute()
{
- if [ ! -x "$(command -v traceroute)" ]; then
- echo "SKIP: Could not run IPV4 test without traceroute"
+ setup_traceroute
+
+ RET=0
+
+ # traceroute host-2 from host-1. Expect a source IP that is on the same
+ # subnet as destination IP of the ICMP error message.
+ run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep -q 1.0.1.1"
+ check_err $? "traceroute did not return 1.0.1.1"
+ run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep -q 1.0.3.1"
+ check_err $? "traceroute did not return 1.0.3.1"
+ log_test "IPv4 traceroute"
+
+ cleanup_traceroute
+}
+
+################################################################################
+# traceroute with VRF test
+#
+# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when
+# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively. The
+# intention is to check that the kernel does not choose an IP assigned to the
+# VRF device, but rather an address from the VRF port (eth1) that received the
+# packet that generates the ICMP error message.
+#
+# 1.0.4.1/24 (vrf100)
+# 1.0.3.3/24 1.0.3.1/24
+# ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ----
+# |H1|--------------------------|R1|--------------------------|H2|
+# ---- N1 ---- N2 ----
+
+cleanup_traceroute_vrf()
+{
+ cleanup_all_ns
+}
+
+setup_traceroute_vrf()
+{
+ # Start clean
+ cleanup_traceroute_vrf
+
+ setup_ns h1 h2 router
+ create_ns "$h1"
+ create_ns "$h2"
+ create_ns "$router"
+
+ ip -n "$router" link add name vrf100 up type vrf table 100
+ ip -n "$router" addr add 1.0.4.1/24 dev vrf100
+
+ connect_ns "$h1" eth0 1.0.1.3/24 - \
+ "$router" eth1 1.0.1.1/24 -
+
+ ip -n "$h1" addr add 1.0.3.3/24 dev eth0
+ ip -n "$h1" route add default via 1.0.1.1
+
+ ip -n "$router" link set dev eth1 master vrf100
+ ip -n "$router" addr add 1.0.3.1/24 dev eth1
+ ip netns exec "$router" sysctl -qw \
+ net.ipv4.icmp_errors_use_inbound_ifaddr=1
+
+ connect_ns "$h2" eth0 1.0.2.4/24 - \
+ "$router" eth2 1.0.2.1/24 -
+
+ ip -n "$h2" route add default via 1.0.2.1
+
+ ip -n "$router" link set dev eth2 master vrf100
+
+ # Prime the network
+ ip netns exec "$h1" ping -c5 1.0.2.4 >/dev/null 2>&1
+}
+
+run_traceroute_vrf()
+{
+ setup_traceroute_vrf
+
+ RET=0
+
+ # traceroute host-2 from host-1. Expect a source IP that is on the same
+ # subnet as destination IP of the ICMP error message.
+ run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep 1.0.1.1"
+ check_err $? "traceroute did not return 1.0.1.1"
+ run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep 1.0.3.1"
+ check_err $? "traceroute did not return 1.0.3.1"
+ log_test "IPv4 traceroute with VRF"
+
+ cleanup_traceroute_vrf
+}
+
+################################################################################
+# traceroute with ICMP extensions test
+#
+# Verify that in this scenario
+#
+# ---- ---- ----
+# |H1|--------------------------|R1|--------------------------|H2|
+# ---- N1 ---- N2 ----
+#
+# ICMP extensions are correctly reported. The loopback interfaces on all the
+# nodes are assigned global addresses and the interfaces connecting the nodes
+# are assigned IPv6 link-local addresses.
+
+cleanup_traceroute_ext()
+{
+ cleanup_all_ns
+}
+
+setup_traceroute_ext()
+{
+ # Start clean
+ cleanup_traceroute_ext
+
+ setup_ns h1 r1 h2
+ create_ns "$h1"
+ create_ns "$r1"
+ create_ns "$h2"
+
+ # Setup N1
+ connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64
+ # Setup N2
+ connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64
+
+ # Setup H1
+ ip -n "$h1" address add 192.0.2.1/32 dev lo
+ ip -n "$h1" route add 0.0.0.0/0 nexthop via inet6 fe80::2 dev eth1
+
+ # Setup R1
+ ip -n "$r1" address add 192.0.2.2/32 dev lo
+ ip -n "$r1" route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1
+ ip -n "$r1" route add 192.0.2.3/32 nexthop via inet6 fe80::4 dev eth2
+
+ # Setup H2
+ ip -n "$h2" address add 192.0.2.3/32 dev lo
+ ip -n "$h2" route add 0.0.0.0/0 nexthop via inet6 fe80::3 dev eth2
+
+ # Prime the network
+ ip netns exec "$h1" ping -c5 192.0.2.3 >/dev/null 2>&1
+}
+
+traceroute_ext_iio_iif_test()
+{
+ local r1_ifindex h2_ifindex
+ local pkt_len=$1; shift
+
+ # Test that incoming interface info is not appended by default.
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC"
+ check_fail $? "Incoming interface info appended by default when should not"
+
+ # Test that the extension is appended when enabled.
+ run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv4/icmp_errors_extension_mask\""
+ check_err $? "Failed to enable incoming interface info extension on R1"
+
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC"
+ check_err $? "Incoming interface info not appended after enable"
+
+ # Test that the extension is not appended when disabled.
+ run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv4/icmp_errors_extension_mask\""
+ check_err $? "Failed to disable incoming interface info extension on R1"
+
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC"
+ check_fail $? "Incoming interface info appended after disable"
+
+ # Test that the extension is sent correctly from both R1 and H2.
+ run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01"
+ r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]')
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from R1"
+
+ run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01"
+ h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]')
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from H2"
+
+ # Add a global address on the incoming interface of R1 and check that
+ # it is reported.
+ run_cmd "$r1" "ip address add 198.51.100.1/24 dev eth1"
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,198.51.100.1,\"eth1\",mtu=1500>'"
+ check_err $? "Wrong incoming interface info reported from R1 after address addition"
+ run_cmd "$r1" "ip address del 198.51.100.1/24 dev eth1"
+
+ # Change name and MTU and make sure the result is still correct.
+ # Re-add the route towards H1 since it was deleted when we removed the
+ # last IPv4 address from eth1 on R1.
+ run_cmd "$r1" "ip route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1"
+ run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501"
+ run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'"
+ check_err $? "Wrong incoming interface info reported from R1 after name and MTU change"
+ run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500"
+
+ run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00"
+ run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00"
+}
+
+run_traceroute_ext()
+{
+ # Need at least version 2.1.5 for RFC 5837 support.
+ if ! check_traceroute_version 2.1.5; then
+ log_test_skip "traceroute too old, missing ICMP extensions support"
return
fi
- setup_traceroute
+ setup_traceroute_ext
- # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while.
- run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1"
- log_test $? 0 "IPV4 traceroute"
+ RET=0
- cleanup_traceroute
+ ## General ICMP extensions tests
+
+ # Test that ICMP extensions are disabled by default.
+ run_cmd "$h1" "sysctl net.ipv4.icmp_errors_extension_mask | grep \"= 0$\""
+ check_err $? "ICMP extensions are not disabled by default"
+
+ # Test that unsupported values are rejected. Do not use "sysctl" as
+ # older versions do not return an error code upon failure.
+ run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv4/icmp_errors_extension_mask\""
+ check_fail $? "Unsupported sysctl value was not rejected"
+
+ ## Extension-specific tests
+
+ # Incoming interface info test. Test with various packet sizes,
+ # including the default one.
+ traceroute_ext_iio_iif_test
+ traceroute_ext_iio_iif_test 127
+ traceroute_ext_iio_iif_test 128
+ traceroute_ext_iio_iif_test 129
+
+ log_test "IPv4 traceroute with ICMP extensions"
+
+ cleanup_traceroute_ext
}
################################################################################
@@ -288,15 +753,16 @@ run_traceroute()
run_tests()
{
run_traceroute6
+ run_traceroute6_vrf
+ run_traceroute6_ext
run_traceroute
+ run_traceroute_vrf
+ run_traceroute_ext
}
################################################################################
# main
-declare -i nfail=0
-declare -i nsuccess=0
-
while getopts :pv o
do
case $o in
@@ -306,7 +772,10 @@ do
esac
done
+require_command traceroute6
+require_command traceroute
+require_command jq
+
run_tests
-printf "\nTests passed: %3d\n" ${nsuccess}
-printf "Tests failed: %3d\n" ${nfail}
+exit "${EXIT_STATUS}"
diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c
index fa83918b62d1..0efc67b0357a 100644
--- a/tools/testing/selftests/net/tun.c
+++ b/tools/testing/selftests/net/tun.c
@@ -15,7 +15,7 @@
#include <sys/ioctl.h>
#include <sys/socket.h>
-#include "../kselftest_harness.h"
+#include "kselftest_harness.h"
static int tun_attach(int fd, char *dev)
{
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index dae91eb97d69..bcc14688661d 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -217,7 +217,7 @@ static void print_timestamp_usr(void)
static void print_timestamp(struct scm_timestamping *tss, int tstype,
int tskey, int payload_len)
{
- const char *tsname;
+ const char *tsname = NULL;
validate_key(tskey, tstype);
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index d5ffd8c9172e..b17e032a6d75 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -3,7 +3,7 @@
#
# Run a series of udpgro functional tests.
-source net_helper.sh
+source lib.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
@@ -48,7 +48,7 @@ run_one() {
cfg_veth
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} &
local PID1=$!
wait_local_port_listen ${PEER_NS} 8000 udp
@@ -95,7 +95,7 @@ run_one_nat() {
# will land on the 'plain' one
ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 &
local PID1=$!
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${family} -b ${addr2%/*} ${rx_args} &
local PID2=$!
wait_local_port_listen "${PEER_NS}" 8000 udp
@@ -117,9 +117,9 @@ run_one_2sock() {
cfg_veth
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} -p 12345 &
local PID1=$!
- ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} &
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 100 ${rx_args} &
local PID2=$!
wait_local_port_listen "${PEER_NS}" 12345 udp
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index c51ea90a1395..54fa4821bc5e 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -3,11 +3,11 @@
#
# Run a series of udpgro benchmarks
-source net_helper.sh
+source lib.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.bpf.o"
+BPF_FILE="lib/xdp_dummy.bpf.o"
cleanup() {
local -r jobs="$(jobs -p)"
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index 17404f49cdb6..9a2cfec1153e 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -3,11 +3,11 @@
#
# Run a series of udpgro benchmarks
-source net_helper.sh
+source lib.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.bpf.o"
+BPF_FILE="lib/xdp_dummy.bpf.o"
cleanup() {
local -r jobs="$(jobs -p)"
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index 550d8eb3e224..a39fdc4aa2ff 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -1,9 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-source net_helper.sh
+source lib.sh
-BPF_FILE="xdp_dummy.bpf.o"
+BPF_FILE="lib/xdp_dummy.bpf.o"
readonly BASE="ns-$(mktemp -u XXXXXX)"
readonly SRC=2
readonly DST=1
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index 3f2fca02fec5..36ff28af4b19 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -103,6 +103,19 @@ struct testcase testcases_v4[] = {
.r_num_mss = 1,
},
{
+ /* datalen <= MSS < gso_len: will fall back to no GSO */
+ .tlen = CONST_MSS_V4,
+ .gso_len = CONST_MSS_V4 + 1,
+ .r_num_mss = 0,
+ .r_len_last = CONST_MSS_V4,
+ },
+ {
+ /* MSS < datalen < gso_len: fail */
+ .tlen = CONST_MSS_V4 + 1,
+ .gso_len = CONST_MSS_V4 + 2,
+ .tfail = true,
+ },
+ {
/* send a single MSS + 1B */
.tlen = CONST_MSS_V4 + 1,
.gso_len = CONST_MSS_V4,
@@ -206,6 +219,19 @@ struct testcase testcases_v6[] = {
.r_num_mss = 1,
},
{
+ /* datalen <= MSS < gso_len: will fall back to no GSO */
+ .tlen = CONST_MSS_V6,
+ .gso_len = CONST_MSS_V6 + 1,
+ .r_num_mss = 0,
+ .r_len_last = CONST_MSS_V6,
+ },
+ {
+ /* MSS < datalen < gso_len: fail */
+ .tlen = CONST_MSS_V6 + 1,
+ .gso_len = CONST_MSS_V6 + 2,
+ .tfail = true
+ },
+ {
/* send a single MSS + 1B */
.tlen = CONST_MSS_V6 + 1,
.gso_len = CONST_MSS_V6,
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
index 640bc43452fa..88fa1d53ba2b 100755
--- a/tools/testing/selftests/net/udpgso_bench.sh
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -92,6 +92,9 @@ run_udp() {
echo "udp"
run_in_netns ${args}
+ echo "udp sendmmsg"
+ run_in_netns ${args} -m
+
echo "udp gso"
run_in_netns ${args} -S 0
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index 477392715a9a..86d80cce55b4 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -25,7 +25,7 @@
#include <sys/types.h>
#include <unistd.h>
-#include "../kselftest.h"
+#include "kselftest.h"
#ifndef ETH_MAX_MTU
#define ETH_MAX_MTU 0xFFFFU
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 6bb7dfaa30b6..9709dd067c72 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-BPF_FILE="xdp_dummy.bpf.o"
+BPF_FILE="lib/xdp_dummy.bpf.o"
readonly STATS="$(mktemp -p /tmp ns-XXXXXX)"
readonly BASE=`basename $STATS`
readonly SRC=2
diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh
new file mode 100755
index 000000000000..e8c02c64e03a
--- /dev/null
+++ b/tools/testing/selftests/net/vlan_bridge_binding.sh
@@ -0,0 +1,258 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+ALL_TESTS="
+ test_binding_on
+ test_binding_off
+ test_binding_toggle_on
+ test_binding_toggle_off
+ test_binding_toggle_on_when_upper_down
+ test_binding_toggle_off_when_upper_down
+ test_binding_toggle_on_when_lower_down
+ test_binding_toggle_off_when_lower_down
+"
+
+setup_prepare()
+{
+ local port
+
+ adf_ip_link_add br up type bridge vlan_filtering 1
+
+ for port in d1 d2 d3; do
+ adf_ip_link_add $port type veth peer name r$port
+ adf_ip_link_set_up $port
+ adf_ip_link_set_up r$port
+ adf_ip_link_set_master $port br
+ done
+
+ adf_bridge_vlan_add vid 11 dev br self
+ adf_bridge_vlan_add vid 11 dev d1 master
+
+ adf_bridge_vlan_add vid 12 dev br self
+ adf_bridge_vlan_add vid 12 dev d2 master
+
+ adf_bridge_vlan_add vid 13 dev br self
+ adf_bridge_vlan_add vid 13 dev d1 master
+ adf_bridge_vlan_add vid 13 dev d2 master
+
+ adf_bridge_vlan_add vid 14 dev br self
+ adf_bridge_vlan_add vid 14 dev d1 master
+ adf_bridge_vlan_add vid 14 dev d2 master
+ adf_bridge_vlan_add vid 14 dev d3 master
+}
+
+operstate_is()
+{
+ local dev=$1; shift
+ local expect=$1; shift
+
+ local operstate=$(ip -j link show $dev | jq -r .[].operstate)
+ if [[ $operstate == UP ]]; then
+ operstate=1
+ elif [[ $operstate == DOWN || $operstate == LOWERLAYERDOWN ]]; then
+ operstate=0
+ fi
+ echo -n $operstate
+ [[ $operstate == $expect ]]
+}
+
+check_operstate()
+{
+ local dev=$1; shift
+ local expect=$1; shift
+ local operstate
+
+ operstate=$(busywait 1000 \
+ operstate_is "$dev" "$expect")
+ check_err $? "Got operstate of $operstate, expected $expect"
+}
+
+add_one_vlan()
+{
+ local link=$1; shift
+ local id=$1; shift
+
+ adf_ip_link_add $link.$id link $link type vlan id $id "$@"
+}
+
+add_vlans()
+{
+ add_one_vlan br 11 "$@"
+ add_one_vlan br 12 "$@"
+ add_one_vlan br 13 "$@"
+ add_one_vlan br 14 "$@"
+}
+
+set_vlans()
+{
+ ip link set dev br.11 "$@"
+ ip link set dev br.12 "$@"
+ ip link set dev br.13 "$@"
+ ip link set dev br.14 "$@"
+}
+
+down_netdevs()
+{
+ local dev
+
+ for dev in "$@"; do
+ adf_ip_link_set_down $dev
+ done
+}
+
+check_operstates()
+{
+ local opst_11=$1; shift
+ local opst_12=$1; shift
+ local opst_13=$1; shift
+ local opst_14=$1; shift
+
+ check_operstate br.11 $opst_11
+ check_operstate br.12 $opst_12
+ check_operstate br.13 $opst_13
+ check_operstate br.14 $opst_14
+}
+
+do_test_binding()
+{
+ local inject=$1; shift
+ local what=$1; shift
+ local opsts_d1=$1; shift
+ local opsts_d2=$1; shift
+ local opsts_d12=$1; shift
+ local opsts_d123=$1; shift
+
+ RET=0
+
+ defer_scope_push
+ down_netdevs d1
+ $inject
+ check_operstates $opsts_d1
+ defer_scope_pop
+
+ defer_scope_push
+ down_netdevs d2
+ $inject
+ check_operstates $opsts_d2
+ defer_scope_pop
+
+ defer_scope_push
+ down_netdevs d1 d2
+ $inject
+ check_operstates $opsts_d12
+ defer_scope_pop
+
+ defer_scope_push
+ down_netdevs d1 d2 d3
+ $inject
+ check_operstates $opsts_d123
+ defer_scope_pop
+
+ log_test "Test bridge_binding $what"
+}
+
+do_test_binding_on()
+{
+ local inject=$1; shift
+ local what=$1; shift
+
+ do_test_binding "$inject" "$what" \
+ "0 1 1 1" \
+ "1 0 1 1" \
+ "0 0 0 1" \
+ "0 0 0 0"
+}
+
+do_test_binding_off()
+{
+ local inject=$1; shift
+ local what=$1; shift
+
+ do_test_binding "$inject" "$what" \
+ "1 1 1 1" \
+ "1 1 1 1" \
+ "1 1 1 1" \
+ "0 0 0 0"
+}
+
+test_binding_on()
+{
+ add_vlans bridge_binding on
+ set_vlans up
+ do_test_binding_on : "on"
+}
+
+test_binding_off()
+{
+ add_vlans bridge_binding off
+ set_vlans up
+ do_test_binding_off : "off"
+}
+
+test_binding_toggle_on()
+{
+ add_vlans bridge_binding off
+ set_vlans up
+ set_vlans type vlan bridge_binding on
+ do_test_binding_on : "off->on"
+}
+
+test_binding_toggle_off()
+{
+ add_vlans bridge_binding on
+ set_vlans up
+ set_vlans type vlan bridge_binding off
+ do_test_binding_off : "on->off"
+}
+
+adf_set_binding_on()
+{
+ set_vlans type vlan bridge_binding on
+ defer set_vlans type vlan bridge_binding off
+}
+
+adf_set_binding_off()
+{
+ set_vlans type vlan bridge_binding off
+ defer set_vlans type vlan bridge_binding on
+}
+
+test_binding_toggle_on_when_lower_down()
+{
+ add_vlans bridge_binding off
+ set_vlans up
+ do_test_binding_on adf_set_binding_on "off->on when lower down"
+}
+
+test_binding_toggle_off_when_lower_down()
+{
+ add_vlans bridge_binding on
+ set_vlans up
+ do_test_binding_off adf_set_binding_off "on->off when lower down"
+}
+
+test_binding_toggle_on_when_upper_down()
+{
+ add_vlans bridge_binding off
+ set_vlans type vlan bridge_binding on
+ set_vlans up
+ do_test_binding_on : "off->on when upper down"
+}
+
+test_binding_toggle_off_when_upper_down()
+{
+ add_vlans bridge_binding on
+ set_vlans type vlan bridge_binding off
+ set_vlans up
+ do_test_binding_off : "on->off when upper down"
+}
+
+require_command jq
+
+trap defer_scopes_cleanup EXIT
+setup_prepare
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh
index 7bc804ffaf7c..e195d5cab6f7 100755
--- a/tools/testing/selftests/net/vlan_hw_filter.sh
+++ b/tools/testing/selftests/net/vlan_hw_filter.sh
@@ -3,27 +3,101 @@
readonly NETNS="ns-$(mktemp -u XXXXXX)"
+ALL_TESTS="
+ test_vlan_filter_check
+ test_vlan0_del_crash_01
+ test_vlan0_del_crash_02
+ test_vlan0_del_crash_03
+ test_vid0_memleak
+"
+
ret=0
+setup() {
+ ip netns add ${NETNS}
+}
+
cleanup() {
- ip netns del $NETNS
+ ip netns del $NETNS 2>/dev/null
}
trap cleanup EXIT
fail() {
- echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
- ret=1
+ echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+ ret=1
+}
+
+tests_run()
+{
+ local current_test
+ for current_test in ${TESTS:-$ALL_TESTS}; do
+ $current_test
+ done
+}
+
+test_vlan_filter_check() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2
+ ip netns exec ${NETNS} ip link set bond_slave_1 master bond0
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0
+ ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0
+ ip netns exec ${NETNS} ip link set bond_slave_1 nomaster
+ ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function"
+ cleanup
}
-ip netns add ${NETNS}
-ip netns exec ${NETNS} ip link add bond0 type bond mode 0
-ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2
-ip netns exec ${NETNS} ip link set bond_slave_1 master bond0
-ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
-ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0
-ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0
-ip netns exec ${NETNS} ip link set bond_slave_1 nomaster
-ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function"
+#enable vlan_filter feature of real_dev with vlan0 during running time
+test_vlan0_del_crash_01() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ip link set dev bond0 up
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on
+ ip netns exec ${NETNS} ip link set dev bond0 down
+ ip netns exec ${NETNS} ip link set dev bond0 up
+ ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function"
+ cleanup
+}
+
+#enable vlan_filter feature and add vlan0 for real_dev during running time
+test_vlan0_del_crash_02() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ip link set dev bond0 up
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on
+ ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q
+ ip netns exec ${NETNS} ip link set dev bond0 down
+ ip netns exec ${NETNS} ip link set dev bond0 up
+ ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function"
+ cleanup
+}
+
+#enable vlan_filter feature of real_dev during running time
+#test kernel_bug of vlan unregister
+test_vlan0_del_crash_03() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ip link set dev bond0 up
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on
+ ip netns exec ${NETNS} ip link set dev bond0 down
+ ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function"
+ cleanup
+}
+
+test_vid0_memleak() {
+ setup
+ ip netns exec ${NETNS} ip link add bond0 up type bond mode 0
+ ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ ip netns exec ${NETNS} ip link del dev bond0 || fail "Please check vlan HW filter function"
+ cleanup
+}
+tests_run
exit $ret
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
index e9c2f71da207..ce34cb2e6e0b 100755
--- a/tools/testing/selftests/net/vrf_route_leaking.sh
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -275,7 +275,7 @@ setup_sym()
# Wait for ip config to settle
- sleep 2
+ slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
}
setup_asym()
@@ -370,7 +370,7 @@ setup_asym()
ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
# Wait for ip config to settle
- sleep 2
+ slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
}
check_connectivity()
diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk
index d43afe243779..793a2fc33d9f 100644
--- a/tools/testing/selftests/net/ynl.mk
+++ b/tools/testing/selftests/net/ynl.mk
@@ -5,10 +5,11 @@
# Inputs:
#
# YNL_GENS: families we need in the selftests
-# YNL_PROGS: TEST_PROGS which need YNL (TODO, none exist, yet)
+# YNL_GEN_PROGS: TEST_GEN_PROGS which need YNL
# YNL_GEN_FILES: TEST_GEN_FILES which need YNL
-YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES))
+YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) \
+ $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_PROGS))
YNL_SPECS := \
$(patsubst %,$(top_srcdir)/Documentation/netlink/specs/%.yaml,$(YNL_GENS))
@@ -27,11 +28,13 @@ $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig:
$(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig
$(Q)rm -f $(top_srcdir)/tools/net/ynl/libynl.a
- $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a
+ $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl \
+ GENS="$(YNL_GENS)" RSTS="" libynl.a
$(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a
EXTRA_CLEAN += \
- $(top_srcdir)/tools/net/ynl/lib/__pycache__ \
+ $(top_srcdir)/tools/net/ynl/pyynl/__pycache__ \
+ $(top_srcdir)/tools/net/ynl/pyynl/lib/__pycache__ \
$(top_srcdir)/tools/net/ynl/lib/*.[ado] \
$(OUTPUT)/.libynl-*.sig \
$(OUTPUT)/libynl.a