diff options
Diffstat (limited to 'tools/testing/selftests/net')
409 files changed, 36227 insertions, 6673 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 666ab7d9390b..6930fe926c58 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -2,11 +2,10 @@ bind_bhash bind_timewait bind_wildcard +busy_poller cmsg_sender -diag_uid epoll_busy_poll fin_ack_lat -gro hwtstamp_config io_uring_zerocopy_tx ioam6_parser @@ -15,9 +14,12 @@ ip_local_port_range ipsec ipv6_flowlabel ipv6_flowlabel_mgr +ipv6_fragmentation log.txt msg_zerocopy +netlink-dumps nettest +proc_net_pktgen psock_fanout psock_snd psock_tpacket @@ -30,22 +32,24 @@ reuseport_bpf_numa reuseport_dualstack rxtimestamp sctp_hello -scm_pidfd -scm_rights sk_bind_sendto_listen sk_connect_zero_addr +sk_so_peek_off +skf_net_off socket so_incoming_cpu so_netns_cookie so_txtime +so_rcv_listener stress_reuseport_listen tap tcp_fastopen_backup_key tcp_inq tcp_mmap +tcp_port_share +tfo timestamping tls -toeplitz tools tun txring_overwrite @@ -53,4 +57,3 @@ txtimestamp udpgso udpgso_bench_rx udpgso_bench_tx -unix_connect diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index bc3925200637..b66ba04f19d9 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -1,102 +1,200 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for net selftests -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES) # Additional include paths needed by kselftest.h CFLAGS += -I../ -TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \ - rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh -TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh ip_defrag.sh -TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh -TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh -TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh -TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh -TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh -TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh -TEST_PROGS += route_localnet.sh -TEST_PROGS += reuseaddr_ports_exhausted.sh -TEST_PROGS += txtimestamp.sh -TEST_PROGS += vrf-xfrm-tests.sh -TEST_PROGS += rxtimestamp.sh -TEST_PROGS += drop_monitor_tests.sh -TEST_PROGS += vrf_route_leaking.sh -TEST_PROGS += bareudp.sh -TEST_PROGS += amt.sh -TEST_PROGS += unicast_extensions.sh -TEST_PROGS += udpgro_fwd.sh -TEST_PROGS += udpgro_frglist.sh -TEST_PROGS += veth.sh -TEST_PROGS += ioam6.sh -TEST_PROGS += gro.sh -TEST_PROGS += gre_gso.sh -TEST_PROGS += cmsg_so_mark.sh -TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh -TEST_PROGS += netns-name.sh -TEST_PROGS += nl_netdev.py -TEST_PROGS += srv6_end_dt46_l3vpn_test.sh -TEST_PROGS += srv6_end_dt4_l3vpn_test.sh -TEST_PROGS += srv6_end_dt6_l3vpn_test.sh -TEST_PROGS += srv6_hencap_red_l3vpn_test.sh -TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh -TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh -TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh -TEST_PROGS += srv6_end_flavors_test.sh -TEST_PROGS += srv6_end_dx4_netfilter_test.sh -TEST_PROGS += srv6_end_dx6_netfilter_test.sh -TEST_PROGS += vrf_strict_mode_test.sh -TEST_PROGS += arp_ndisc_evict_nocarrier.sh -TEST_PROGS += ndisc_unsolicited_na_test.sh -TEST_PROGS += arp_ndisc_untracked_subnets.sh -TEST_PROGS += stress_reuseport_listen.sh -TEST_PROGS += l2_tos_ttl_inherit.sh -TEST_PROGS += bind_bhash.sh -TEST_PROGS += ip_local_port_range.sh -TEST_PROGS += rps_default_mask.sh -TEST_PROGS += big_tcp.sh -TEST_PROGS += netns-sysctl.sh -TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh -TEST_GEN_FILES = socket nettest -TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any -TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite -TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag -TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr so_netns_cookie -TEST_GEN_FILES += tcp_fastopen_backup_key -TEST_GEN_FILES += fin_ack_lat -TEST_GEN_FILES += reuseaddr_ports_exhausted -TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp -TEST_GEN_FILES += ipsec -TEST_GEN_FILES += ioam6_parser -TEST_GEN_FILES += gro -TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa -TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_poll -TEST_GEN_FILES += toeplitz -TEST_GEN_FILES += cmsg_sender -TEST_GEN_FILES += stress_reuseport_listen -TEST_PROGS += test_vxlan_vnifiltering.sh -TEST_GEN_FILES += io_uring_zerocopy_tx -TEST_PROGS += io_uring_zerocopy_tx.sh -TEST_GEN_FILES += bind_bhash -TEST_GEN_PROGS += sk_bind_sendto_listen -TEST_GEN_PROGS += sk_connect_zero_addr -TEST_PROGS += test_ingress_egress_chaining.sh -TEST_GEN_PROGS += so_incoming_cpu -TEST_PROGS += sctp_vrf.sh -TEST_GEN_FILES += sctp_hello -TEST_GEN_FILES += ip_local_port_range -TEST_GEN_FILES += bind_wildcard -TEST_PROGS += test_vxlan_mdb.sh -TEST_PROGS += test_bridge_neigh_suppress.sh -TEST_PROGS += test_vxlan_nolocalbypass.sh -TEST_PROGS += test_bridge_backup_port.sh -TEST_PROGS += fdb_flush.sh -TEST_PROGS += fq_band_pktlimit.sh -TEST_PROGS += vlan_hw_filter.sh -TEST_PROGS += bpf_offload.py +TEST_PROGS := \ + altnames.sh \ + amt.sh \ + arp_ndisc_evict_nocarrier.sh \ + arp_ndisc_untracked_subnets.sh \ + bareudp.sh \ + big_tcp.sh \ + bind_bhash.sh \ + bpf_offload.py \ + broadcast_ether_dst.sh \ + broadcast_pmtu.sh \ + busy_poll_test.sh \ + cmsg_ip.sh \ + cmsg_so_mark.sh \ + cmsg_so_priority.sh \ + cmsg_time.sh \ + drop_monitor_tests.sh \ + fcnal-ipv4.sh \ + fcnal-ipv6.sh \ + fcnal-other.sh \ + fdb_flush.sh \ + fdb_notify.sh \ + fib-onlink-tests.sh \ + fib_nexthop_multiprefix.sh \ + fib_nexthop_nongw.sh \ + fib_nexthops.sh \ + fib_rule_tests.sh \ + fib_tests.sh \ + fin_ack_lat.sh \ + fq_band_pktlimit.sh \ + gre_gso.sh \ + gre_ipv6_lladdr.sh \ + icmp.sh \ + icmp_redirect.sh \ + io_uring_zerocopy_tx.sh \ + ioam6.sh \ + ip6_gre_headroom.sh \ + ip_defrag.sh \ + ip_local_port_range.sh \ + ipv6_flowlabel.sh \ + ipv6_force_forwarding.sh \ + ipv6_route_update_soft_lockup.sh \ + l2_tos_ttl_inherit.sh \ + l2tp.sh \ + link_netns.py \ + lwt_dst_cache_ref_loop.sh \ + msg_zerocopy.sh \ + nat6to4.sh \ + ndisc_unsolicited_na_test.sh \ + netdev-l2addr.sh \ + netdevice.sh \ + netns-name.sh \ + netns-sysctl.sh \ + nl_netdev.py \ + pmtu.sh \ + psock_snd.sh \ + reuseaddr_ports_exhausted.sh \ + reuseport_addr_any.sh \ + route_hint.sh \ + route_localnet.sh \ + rps_default_mask.sh \ + rtnetlink.py \ + rtnetlink.sh \ + rtnetlink_notification.sh \ + run_afpackettests \ + run_netsocktests \ + rxtimestamp.sh \ + sctp_vrf.sh \ + skf_net_off.sh \ + so_txtime.sh \ + srv6_end_dt46_l3vpn_test.sh \ + srv6_end_dt4_l3vpn_test.sh \ + srv6_end_dt6_l3vpn_test.sh \ + srv6_end_dx4_netfilter_test.sh \ + srv6_end_dx6_netfilter_test.sh \ + srv6_end_flavors_test.sh \ + srv6_end_next_csid_l3vpn_test.sh \ + srv6_end_x_next_csid_l3vpn_test.sh \ + srv6_hencap_red_l3vpn_test.sh \ + srv6_hl2encap_red_l2vpn_test.sh \ + stress_reuseport_listen.sh \ + tcp_fastopen_backup_key.sh \ + test_bpf.sh \ + test_bridge_backup_port.sh \ + test_bridge_neigh_suppress.sh \ + test_ingress_egress_chaining.sh \ + test_neigh.sh \ + test_so_rcv.sh \ + test_vxlan_fdb_changelink.sh \ + test_vxlan_mdb.sh \ + test_vxlan_nh.sh \ + test_vxlan_nolocalbypass.sh \ + test_vxlan_under_vrf.sh \ + test_vxlan_vnifiltering.sh \ + tfo_passive.sh \ + traceroute.sh \ + txtimestamp.sh \ + udpgro.sh \ + udpgro_bench.sh \ + udpgro_frglist.sh \ + udpgro_fwd.sh \ + udpgso.sh \ + udpgso_bench.sh \ + unicast_extensions.sh \ + veth.sh \ + vlan_bridge_binding.sh \ + vlan_hw_filter.sh \ + vrf-xfrm-tests.sh \ + vrf_route_leaking.sh \ + vrf_strict_mode_test.sh \ + xfrm_policy.sh \ +# end of TEST_PROGS -TEST_FILES := settings -TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh +TEST_PROGS_EXTENDED := \ + xfrm_policy_add_speed.sh \ +# end of TEST_PROGS_EXTENDED + +TEST_GEN_FILES := \ + bind_bhash \ + cmsg_sender \ + fin_ack_lat \ + hwtstamp_config \ + io_uring_zerocopy_tx \ + ioam6_parser \ + ip_defrag \ + ip_local_port_range \ + ipsec \ + ipv6_flowlabel \ + ipv6_flowlabel_mgr \ + msg_zerocopy \ + nettest \ + psock_fanout \ + psock_snd \ + psock_tpacket \ + reuseaddr_ports_exhausted \ + reuseport_addr_any \ + rxtimestamp \ + sctp_hello \ + skf_net_off \ + so_netns_cookie \ + so_rcv_listener \ + so_txtime \ + socket \ + stress_reuseport_listen \ + tcp_fastopen_backup_key \ + tcp_inq \ + tcp_mmap \ + tfo \ + timestamping \ + txring_overwrite \ + txtimestamp \ + udpgso \ + udpgso_bench_rx \ + udpgso_bench_tx \ +# end of TEST_GEN_FILES + +TEST_GEN_PROGS := \ + bind_timewait \ + bind_wildcard \ + epoll_busy_poll \ + ipv6_fragmentation \ + proc_net_pktgen \ + reuseaddr_conflict \ + reuseport_bpf \ + reuseport_bpf_cpu \ + reuseport_bpf_numa \ + reuseport_dualstack \ + sk_bind_sendto_listen \ + sk_connect_zero_addr \ + sk_so_peek_off \ + so_incoming_cpu \ + tap \ + tcp_port_share \ + tls \ + tun \ +# end of TEST_GEN_PROGS + +TEST_FILES := \ + fcnal-test.sh \ + in_netns.sh \ + lib.sh \ + settings \ +# end of TEST_FILES + +# YNL files, must be before "include ..lib.mk" +YNL_GEN_FILES := busy_poller +YNL_GEN_PROGS := netlink-dumps +TEST_GEN_FILES += $(YNL_GEN_FILES) +TEST_GEN_PROGS += $(YNL_GEN_PROGS) TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) @@ -104,6 +202,10 @@ TEST_INCLUDES := forwarding/lib.sh include ../lib.mk +# YNL build +YNL_GENS := netdev +include ynl.mk + $(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto diff --git a/tools/testing/selftests/net/af_unix/.gitignore b/tools/testing/selftests/net/af_unix/.gitignore new file mode 100644 index 000000000000..240b26740c9e --- /dev/null +++ b/tools/testing/selftests/net/af_unix/.gitignore @@ -0,0 +1,8 @@ +diag_uid +msg_oob +scm_inq +scm_pidfd +scm_rights +so_peek_off +unix_connect +unix_connreset diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 50584479540b..3cd677b72072 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,14 @@ -CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect +CFLAGS += $(KHDR_INCLUDES) -Wall -Wflex-array-member-not-at-end + +TEST_GEN_PROGS := \ + diag_uid \ + msg_oob \ + scm_inq \ + scm_pidfd \ + scm_rights \ + so_peek_off \ + unix_connect \ + unix_connreset \ +# end of TEST_GEN_PROGS include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config index 37368567768c..b5429c15a53c 100644 --- a/tools/testing/selftests/net/af_unix/config +++ b/tools/testing/selftests/net/af_unix/config @@ -1,3 +1,3 @@ -CONFIG_UNIX=y CONFIG_AF_UNIX_OOB=y +CONFIG_UNIX=y CONFIG_UNIX_DIAG=m diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c index 79a3dd75590e..da7d50cedee6 100644 --- a/tools/testing/selftests/net/af_unix/diag_uid.c +++ b/tools/testing/selftests/net/af_unix/diag_uid.c @@ -14,7 +14,7 @@ #include <sys/types.h> #include <sys/un.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" FIXTURE(diag_uid) { diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c index 16d0c172eaeb..1b499d56656c 100644 --- a/tools/testing/selftests/net/af_unix/msg_oob.c +++ b/tools/testing/selftests/net/af_unix/msg_oob.c @@ -11,7 +11,7 @@ #include <sys/signalfd.h> #include <sys/socket.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" #define BUF_SZ 32 @@ -209,8 +209,8 @@ static void __sendpair(struct __test_metadata *_metadata, static void __recvpair(struct __test_metadata *_metadata, FIXTURE_DATA(msg_oob) *self, - const void *expected_buf, int expected_len, - int buf_len, int flags) + const char *expected_buf, int expected_len, + int buf_len, int flags, bool is_sender) { int i, ret[2], recv_errno[2], expected_errno = 0; char recv_buf[2][BUF_SZ] = {}; @@ -221,7 +221,9 @@ static void __recvpair(struct __test_metadata *_metadata, errno = 0; for (i = 0; i < 2; i++) { - ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags); + int index = is_sender ? i * 2 : i * 2 + 1; + + ret[i] = recv(self->fd[index], recv_buf[i], buf_len, flags); recv_errno[i] = errno; } @@ -308,6 +310,20 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, ASSERT_EQ(answ[0], answ[1]); } +static void __resetpair(struct __test_metadata *_metadata, + FIXTURE_DATA(msg_oob) *self, + const FIXTURE_VARIANT(msg_oob) *variant, + bool reset) +{ + int i; + + for (i = 0; i < 2; i++) + close(self->fd[i * 2 + 1]); + + __recvpair(_metadata, self, "", reset ? -ECONNRESET : 0, 1, + variant->peek ? MSG_PEEK : 0, true); +} + #define sendpair(buf, len, flags) \ __sendpair(_metadata, self, buf, len, flags) @@ -316,9 +332,10 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, if (variant->peek) \ __recvpair(_metadata, self, \ expected_buf, expected_len, \ - buf_len, (flags) | MSG_PEEK); \ + buf_len, (flags) | MSG_PEEK, false); \ __recvpair(_metadata, self, \ - expected_buf, expected_len, buf_len, flags); \ + expected_buf, expected_len, \ + buf_len, flags, false); \ } while (0) #define epollpair(oob_remaining) \ @@ -330,6 +347,9 @@ static void __siocatmarkpair(struct __test_metadata *_metadata, #define setinlinepair() \ __setinlinepair(_metadata, self) +#define resetpair(reset) \ + __resetpair(_metadata, self, variant, reset) + #define tcp_incompliant \ for (self->tcp_compliant = false; \ self->tcp_compliant == false; \ @@ -344,6 +364,21 @@ TEST_F(msg_oob, non_oob) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(true); +} + +TEST_F(msg_oob, non_oob_no_reset) +{ + sendpair("x", 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("x", 1, 1, 0); + epollpair(false); + siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob) @@ -355,6 +390,19 @@ TEST_F(msg_oob, oob) recvpair("x", 1, 1, MSG_OOB); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } +} + +TEST_F(msg_oob, oob_reset) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + resetpair(true); } TEST_F(msg_oob, oob_drop) @@ -370,6 +418,8 @@ TEST_F(msg_oob, oob_drop) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_ahead) @@ -385,6 +435,10 @@ TEST_F(msg_oob, oob_ahead) recvpair("hell", 4, 4, 0); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } } TEST_F(msg_oob, oob_break) @@ -403,6 +457,8 @@ TEST_F(msg_oob, oob_break) recvpair("", -EAGAIN, 1, 0); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_ahead_break) @@ -426,6 +482,8 @@ TEST_F(msg_oob, oob_ahead_break) recvpair("world", 5, 5, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, oob_break_drop) @@ -449,6 +507,8 @@ TEST_F(msg_oob, oob_break_drop) recvpair("", -EINVAL, 1, MSG_OOB); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, ex_oob_break) @@ -476,6 +536,8 @@ TEST_F(msg_oob, ex_oob_break) recvpair("ld", 2, 2, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, ex_oob_drop) @@ -498,6 +560,8 @@ TEST_F(msg_oob, ex_oob_drop) epollpair(false); siocatmarkpair(true); } + + resetpair(false); } TEST_F(msg_oob, ex_oob_drop_2) @@ -523,6 +587,79 @@ TEST_F(msg_oob, ex_oob_drop_2) epollpair(false); siocatmarkpair(true); } + + resetpair(false); +} + +TEST_F(msg_oob, ex_oob_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("", -EAGAIN, 1, 0); + epollpair(false); + siocatmarkpair(false); + + recvpair("", -EINVAL, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(false); + + resetpair(false); +} + +TEST_F(msg_oob, ex_oob_ex_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } +} + +TEST_F(msg_oob, ex_oob_ex_oob_oob) +{ + sendpair("x", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("x", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("y", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); + + recvpair("y", 1, 1, MSG_OOB); + epollpair(false); + siocatmarkpair(true); + + sendpair("z", 1, MSG_OOB); + epollpair(true); + siocatmarkpair(true); } TEST_F(msg_oob, ex_oob_ahead_break) @@ -553,6 +690,10 @@ TEST_F(msg_oob, ex_oob_ahead_break) recvpair("d", 1, 1, MSG_OOB); epollpair(false); siocatmarkpair(true); + + tcp_incompliant { + resetpair(false); /* TCP sets -ECONNRESET for ex-OOB. */ + } } TEST_F(msg_oob, ex_oob_siocatmark) @@ -572,6 +713,8 @@ TEST_F(msg_oob, ex_oob_siocatmark) recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ epollpair(true); siocatmarkpair(false); + + resetpair(true); } TEST_F(msg_oob, inline_oob) @@ -589,6 +732,8 @@ TEST_F(msg_oob, inline_oob) recvpair("x", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_oob_break) @@ -610,6 +755,8 @@ TEST_F(msg_oob, inline_oob_break) recvpair("o", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_oob_ahead_break) @@ -638,6 +785,8 @@ TEST_F(msg_oob, inline_oob_ahead_break) epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_break) @@ -663,6 +812,8 @@ TEST_F(msg_oob, inline_ex_oob_break) recvpair("rld", 3, 3, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_no_drop) @@ -684,6 +835,8 @@ TEST_F(msg_oob, inline_ex_oob_no_drop) recvpair("y", 1, 1, 0); epollpair(false); siocatmarkpair(false); + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_drop) @@ -708,6 +861,8 @@ TEST_F(msg_oob, inline_ex_oob_drop) epollpair(false); siocatmarkpair(false); } + + resetpair(false); } TEST_F(msg_oob, inline_ex_oob_siocatmark) @@ -729,6 +884,8 @@ TEST_F(msg_oob, inline_ex_oob_siocatmark) recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */ epollpair(true); siocatmarkpair(false); + + resetpair(true); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c new file mode 100644 index 000000000000..3a86be9bda17 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/scm_inq.c @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include <linux/sockios.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "kselftest_harness.h" + +#define NR_CHUNKS 100 +#define MSG_LEN 256 + +FIXTURE(scm_inq) +{ + int fd[2]; +}; + +FIXTURE_VARIANT(scm_inq) +{ + int type; +}; + +FIXTURE_VARIANT_ADD(scm_inq, stream) +{ + .type = SOCK_STREAM, +}; + +FIXTURE_VARIANT_ADD(scm_inq, dgram) +{ + .type = SOCK_DGRAM, +}; + +FIXTURE_VARIANT_ADD(scm_inq, seqpacket) +{ + .type = SOCK_SEQPACKET, +}; + +FIXTURE_SETUP(scm_inq) +{ + int err; + + err = socketpair(AF_UNIX, variant->type | SOCK_NONBLOCK, 0, self->fd); + ASSERT_EQ(0, err); +} + +FIXTURE_TEARDOWN(scm_inq) +{ + close(self->fd[0]); + close(self->fd[1]); +} + +static void send_chunks(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_inq) *self) +{ + char buf[MSG_LEN] = {}; + int i, ret; + + for (i = 0; i < NR_CHUNKS; i++) { + ret = send(self->fd[0], buf, sizeof(buf), 0); + ASSERT_EQ(sizeof(buf), ret); + } +} + +static void recv_chunks(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_inq) *self) +{ + char cmsg_buf[CMSG_SPACE(sizeof(int))]; + struct msghdr msg = {}; + struct iovec iov = {}; + struct cmsghdr *cmsg; + char buf[MSG_LEN]; + int i, ret; + int inq; + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = cmsg_buf; + msg.msg_controllen = sizeof(cmsg_buf); + + iov.iov_base = buf; + iov.iov_len = sizeof(buf); + + for (i = 0; i < NR_CHUNKS; i++) { + memset(buf, 0, sizeof(buf)); + memset(cmsg_buf, 0, sizeof(cmsg_buf)); + + ret = recvmsg(self->fd[1], &msg, 0); + ASSERT_EQ(MSG_LEN, ret); + + cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(NULL, cmsg); + ASSERT_EQ(CMSG_LEN(sizeof(int)), cmsg->cmsg_len); + ASSERT_EQ(SOL_SOCKET, cmsg->cmsg_level); + ASSERT_EQ(SCM_INQ, cmsg->cmsg_type); + + ret = ioctl(self->fd[1], SIOCINQ, &inq); + ASSERT_EQ(0, ret); + ASSERT_EQ(*(int *)CMSG_DATA(cmsg), inq); + } +} + +TEST_F(scm_inq, basic) +{ + int err, inq; + + err = setsockopt(self->fd[1], SOL_SOCKET, SO_INQ, &(int){1}, sizeof(int)); + if (variant->type != SOCK_STREAM) { + ASSERT_EQ(-ENOPROTOOPT, -errno); + return; + } + + ASSERT_EQ(0, err); + + err = ioctl(self->fd[1], SIOCINQ, &inq); + ASSERT_EQ(0, err); + ASSERT_EQ(0, inq); + + send_chunks(_metadata, self); + recv_chunks(_metadata, self); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c index 7e534594167e..2c18b92a2603 100644 --- a/tools/testing/selftests/net/af_unix/scm_pidfd.c +++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c @@ -15,7 +15,8 @@ #include <sys/types.h> #include <sys/wait.h> -#include "../../kselftest_harness.h" +#include "../../pidfd/pidfd.h" +#include "kselftest_harness.h" #define clean_errno() (errno == 0 ? "None" : strerror(errno)) #define log_err(MSG, ...) \ @@ -26,6 +27,8 @@ #define SCM_PIDFD 0x04 #endif +#define CHILD_EXIT_CODE_OK 123 + static void child_die() { exit(1); @@ -126,16 +129,64 @@ out: return result; } +struct cmsg_data { + struct ucred *ucred; + int *pidfd; +}; + +static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res) +{ + struct cmsghdr *cmsg; + + if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + log_err("recvmsg: truncated"); + return 1; + } + + for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_PIDFD) { + if (cmsg->cmsg_len < sizeof(*res->pidfd)) { + log_err("CMSG parse: SCM_PIDFD wrong len"); + return 1; + } + + res->pidfd = (void *)CMSG_DATA(cmsg); + } + + if (cmsg->cmsg_level == SOL_SOCKET && + cmsg->cmsg_type == SCM_CREDENTIALS) { + if (cmsg->cmsg_len < sizeof(*res->ucred)) { + log_err("CMSG parse: SCM_CREDENTIALS wrong len"); + return 1; + } + + res->ucred = (void *)CMSG_DATA(cmsg); + } + } + + if (!res->pidfd) { + log_err("CMSG parse: SCM_PIDFD not found"); + return 1; + } + + if (!res->ucred) { + log_err("CMSG parse: SCM_CREDENTIALS not found"); + return 1; + } + + return 0; +} + static int cmsg_check(int fd) { struct msghdr msg = { 0 }; - struct cmsghdr *cmsg; + struct cmsg_data res; struct iovec iov; - struct ucred *ucred = NULL; int data = 0; char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))] = { 0 }; - int *pidfd = NULL; pid_t parent_pid; int err; @@ -158,53 +209,98 @@ static int cmsg_check(int fd) return 1; } - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; - cmsg = CMSG_NXTHDR(&msg, cmsg)) { - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_PIDFD) { - if (cmsg->cmsg_len < sizeof(*pidfd)) { - log_err("CMSG parse: SCM_PIDFD wrong len"); - return 1; - } + /* send(pfd, "x", sizeof(char), 0) */ + if (data != 'x') { + log_err("recvmsg: data corruption"); + return 1; + } - pidfd = (void *)CMSG_DATA(cmsg); - } + if (parse_cmsg(&msg, &res)) { + log_err("CMSG parse: parse_cmsg() failed"); + return 1; + } - if (cmsg->cmsg_level == SOL_SOCKET && - cmsg->cmsg_type == SCM_CREDENTIALS) { - if (cmsg->cmsg_len < sizeof(*ucred)) { - log_err("CMSG parse: SCM_CREDENTIALS wrong len"); - return 1; - } + /* pidfd from SCM_PIDFD should point to the parent process PID */ + parent_pid = + get_pid_from_fdinfo_file(*res.pidfd, "Pid:", sizeof("Pid:") - 1); + if (parent_pid != getppid()) { + log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid()); + close(*res.pidfd); + return 1; + } - ucred = (void *)CMSG_DATA(cmsg); - } + close(*res.pidfd); + return 0; +} + +static int cmsg_check_dead(int fd, int expected_pid) +{ + int err; + struct msghdr msg = { 0 }; + struct cmsg_data res; + struct iovec iov; + int data = 0; + char control[CMSG_SPACE(sizeof(struct ucred)) + + CMSG_SPACE(sizeof(int))] = { 0 }; + struct pidfd_info info = { + .mask = PIDFD_INFO_EXIT, + }; + + iov.iov_base = &data; + iov.iov_len = sizeof(data); + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + err = recvmsg(fd, &msg, 0); + if (err < 0) { + log_err("recvmsg"); + return 1; } - /* send(pfd, "x", sizeof(char), 0) */ - if (data != 'x') { + if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + log_err("recvmsg: truncated"); + return 1; + } + + /* send(cfd, "y", sizeof(char), 0) */ + if (data != 'y') { log_err("recvmsg: data corruption"); return 1; } - if (!pidfd) { - log_err("CMSG parse: SCM_PIDFD not found"); + if (parse_cmsg(&msg, &res)) { + log_err("CMSG parse: parse_cmsg() failed"); return 1; } - if (!ucred) { - log_err("CMSG parse: SCM_CREDENTIALS not found"); + /* + * pidfd from SCM_PIDFD should point to the client_pid. + * Let's read exit information and check if it's what + * we expect to see. + */ + if (ioctl(*res.pidfd, PIDFD_GET_INFO, &info)) { + log_err("%s: ioctl(PIDFD_GET_INFO) failed", __func__); + close(*res.pidfd); return 1; } - /* pidfd from SCM_PIDFD should point to the parent process PID */ - parent_pid = - get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1); - if (parent_pid != getppid()) { - log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid()); + if (!(info.mask & PIDFD_INFO_EXIT)) { + log_err("%s: No exit information from ioctl(PIDFD_GET_INFO)", __func__); + close(*res.pidfd); + return 1; + } + + err = WIFEXITED(info.exit_code) ? WEXITSTATUS(info.exit_code) : 1; + if (err != CHILD_EXIT_CODE_OK) { + log_err("%s: wrong exit_code %d != %d", __func__, err, CHILD_EXIT_CODE_OK); + close(*res.pidfd); return 1; } + close(*res.pidfd); return 0; } @@ -291,6 +387,24 @@ static void fill_sockaddr(struct sock_addr *addr, bool abstract) memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name)); } +static int sk_enable_cred_pass(int sk) +{ + int on = 0; + + on = 1; + if (setsockopt(sk, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) { + log_err("Failed to set SO_PASSCRED"); + return 1; + } + + if (setsockopt(sk, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) { + log_err("Failed to set SO_PASSPIDFD"); + return 1; + } + + return 0; +} + static void client(FIXTURE_DATA(scm_pidfd) *self, const FIXTURE_VARIANT(scm_pidfd) *variant) { @@ -299,7 +413,6 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, struct ucred peer_cred; int peer_pidfd; pid_t peer_pid; - int on = 0; cfd = socket(AF_UNIX, variant->type, 0); if (cfd < 0) { @@ -322,14 +435,8 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, child_die(); } - on = 1; - if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) { - log_err("Failed to set SO_PASSCRED"); - child_die(); - } - - if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) { - log_err("Failed to set SO_PASSPIDFD"); + if (sk_enable_cred_pass(cfd)) { + log_err("sk_enable_cred_pass() failed"); child_die(); } @@ -340,6 +447,12 @@ static void client(FIXTURE_DATA(scm_pidfd) *self, child_die(); } + /* send something to the parent so it can receive SCM_PIDFD too and validate it */ + if (send(cfd, "y", sizeof(char), 0) == -1) { + log_err("Failed to send(cfd, \"y\", sizeof(char), 0)"); + child_die(); + } + /* skip further for SOCK_DGRAM as it's not applicable */ if (variant->type == SOCK_DGRAM) return; @@ -398,7 +511,13 @@ TEST_F(scm_pidfd, test) close(self->server); close(self->startup_pipe[0]); client(self, variant); - exit(0); + + /* + * It's a bit unusual, but in case of success we return non-zero + * exit code (CHILD_EXIT_CODE_OK) and then we expect to read it + * from ioctl(PIDFD_GET_INFO) in cmsg_check_dead(). + */ + exit(CHILD_EXIT_CODE_OK); } close(self->startup_pipe[1]); @@ -421,9 +540,17 @@ TEST_F(scm_pidfd, test) ASSERT_NE(-1, err); } - close(pfd); waitpid(self->client_pid, &child_status, 0); - ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1); + /* see comment before exit(CHILD_EXIT_CODE_OK) */ + ASSERT_EQ(CHILD_EXIT_CODE_OK, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1); + + err = sk_enable_cred_pass(pfd); + ASSERT_EQ(0, err); + + err = cmsg_check_dead(pfd, self->client_pid); + ASSERT_EQ(0, err); + + close(pfd); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c index d66336256580..d82a79c21c17 100644 --- a/tools/testing/selftests/net/af_unix/scm_rights.c +++ b/tools/testing/selftests/net/af_unix/scm_rights.c @@ -10,7 +10,7 @@ #include <sys/socket.h> #include <sys/un.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" FIXTURE(scm_rights) { @@ -23,6 +23,7 @@ FIXTURE_VARIANT(scm_rights) int type; int flags; bool test_listener; + bool disabled; }; FIXTURE_VARIANT_ADD(scm_rights, dgram) @@ -31,6 +32,16 @@ FIXTURE_VARIANT_ADD(scm_rights, dgram) .type = SOCK_DGRAM, .flags = 0, .test_listener = false, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, dgram_disabled) +{ + .name = "UNIX ", + .type = SOCK_DGRAM, + .flags = 0, + .test_listener = false, + .disabled = true, }; FIXTURE_VARIANT_ADD(scm_rights, stream) @@ -39,6 +50,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream) .type = SOCK_STREAM, .flags = 0, .test_listener = false, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_disabled) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = 0, + .test_listener = false, + .disabled = true, }; FIXTURE_VARIANT_ADD(scm_rights, stream_oob) @@ -47,6 +68,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_oob) .type = SOCK_STREAM, .flags = MSG_OOB, .test_listener = false, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_oob_disabled) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = MSG_OOB, + .test_listener = false, + .disabled = true, }; FIXTURE_VARIANT_ADD(scm_rights, stream_listener) @@ -55,6 +86,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener) .type = SOCK_STREAM, .flags = 0, .test_listener = true, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_listener_disabled) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = 0, + .test_listener = true, + .disabled = true, }; FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob) @@ -63,6 +104,16 @@ FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob) .type = SOCK_STREAM, .flags = MSG_OOB, .test_listener = true, + .disabled = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob_disabled) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = MSG_OOB, + .test_listener = true, + .disabled = true, }; static int count_sockets(struct __test_metadata *_metadata, @@ -105,6 +156,9 @@ FIXTURE_SETUP(scm_rights) ret = unshare(CLONE_NEWNET); ASSERT_EQ(0, ret); + if (variant->disabled) + return; + ret = count_sockets(_metadata, variant); ASSERT_EQ(0, ret); } @@ -113,6 +167,9 @@ FIXTURE_TEARDOWN(scm_rights) { int ret; + if (variant->disabled) + return; + sleep(1); ret = count_sockets(_metadata, variant); @@ -121,6 +178,7 @@ FIXTURE_TEARDOWN(scm_rights) static void create_listeners(struct __test_metadata *_metadata, FIXTURE_DATA(scm_rights) *self, + const FIXTURE_VARIANT(scm_rights) *variant, int n) { struct sockaddr_un addr = { @@ -140,6 +198,12 @@ static void create_listeners(struct __test_metadata *_metadata, ret = listen(self->fd[i], -1); ASSERT_EQ(0, ret); + if (variant->disabled) { + ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS, + &(int){0}, sizeof(int)); + ASSERT_EQ(0, ret); + } + addrlen = sizeof(addr); ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen); ASSERT_EQ(0, ret); @@ -164,6 +228,12 @@ static void create_socketpairs(struct __test_metadata *_metadata, for (i = 0; i < n * 2; i += 2) { ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i); ASSERT_EQ(0, ret); + + if (variant->disabled) { + ret = setsockopt(self->fd[i], SOL_SOCKET, SO_PASSRIGHTS, + &(int){0}, sizeof(int)); + ASSERT_EQ(0, ret); + } } } @@ -175,7 +245,7 @@ static void __create_sockets(struct __test_metadata *_metadata, ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0])); if (variant->test_listener) - create_listeners(_metadata, self, n); + create_listeners(_metadata, self, variant, n); else create_socketpairs(_metadata, self, variant, n); } @@ -201,20 +271,11 @@ void __send_fd(struct __test_metadata *_metadata, { #define MSG "x" #define MSGLEN 1 - struct { - struct cmsghdr cmsghdr; - int fd[2]; - } cmsg = { - .cmsghdr = { - .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)), - .cmsg_level = SOL_SOCKET, - .cmsg_type = SCM_RIGHTS, - }, - .fd = { - self->fd[inflight * 2], - self->fd[inflight * 2], - }, + int fds[2] = { + self->fd[inflight * 2], + self->fd[inflight * 2], }; + char cmsg_buf[CMSG_SPACE(sizeof(fds))]; struct iovec iov = { .iov_base = MSG, .iov_len = MSGLEN, @@ -224,13 +285,26 @@ void __send_fd(struct __test_metadata *_metadata, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, - .msg_control = &cmsg, - .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)), + .msg_control = cmsg_buf, + .msg_controllen = sizeof(cmsg_buf), }; + struct cmsghdr *cmsg; int ret; + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fds)); + memcpy(CMSG_DATA(cmsg), fds, sizeof(fds)); + ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags); - ASSERT_EQ(MSGLEN, ret); + + if (variant->disabled) { + ASSERT_EQ(-1, ret); + ASSERT_EQ(-EPERM, -errno); + } else { + ASSERT_EQ(MSGLEN, ret); + } } #define create_sockets(n) \ diff --git a/tools/testing/selftests/net/af_unix/so_peek_off.c b/tools/testing/selftests/net/af_unix/so_peek_off.c new file mode 100644 index 000000000000..86e7b0fb522d --- /dev/null +++ b/tools/testing/selftests/net/af_unix/so_peek_off.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2025 Google LLC */ + +#include <stdlib.h> +#include <unistd.h> + +#include <sys/socket.h> + +#include "../../kselftest_harness.h" + +FIXTURE(so_peek_off) +{ + int fd[2]; /* 0: sender, 1: receiver */ +}; + +FIXTURE_VARIANT(so_peek_off) +{ + int type; +}; + +FIXTURE_VARIANT_ADD(so_peek_off, stream) +{ + .type = SOCK_STREAM, +}; + +FIXTURE_VARIANT_ADD(so_peek_off, dgram) +{ + .type = SOCK_DGRAM, +}; + +FIXTURE_VARIANT_ADD(so_peek_off, seqpacket) +{ + .type = SOCK_SEQPACKET, +}; + +FIXTURE_SETUP(so_peek_off) +{ + struct timeval timeout = { + .tv_sec = 5, + .tv_usec = 0, + }; + int ret; + + ret = socketpair(AF_UNIX, variant->type, 0, self->fd); + ASSERT_EQ(0, ret); + + ret = setsockopt(self->fd[1], SOL_SOCKET, SO_RCVTIMEO_NEW, + &timeout, sizeof(timeout)); + ASSERT_EQ(0, ret); + + ret = setsockopt(self->fd[1], SOL_SOCKET, SO_PEEK_OFF, + &(int){0}, sizeof(int)); + ASSERT_EQ(0, ret); +} + +FIXTURE_TEARDOWN(so_peek_off) +{ + close_range(self->fd[0], self->fd[1], 0); +} + +#define sendeq(fd, str, flags) \ + do { \ + int bytes, len = strlen(str); \ + \ + bytes = send(fd, str, len, flags); \ + ASSERT_EQ(len, bytes); \ + } while (0) + +#define recveq(fd, str, buflen, flags) \ + do { \ + char buf[(buflen) + 1] = {}; \ + int bytes; \ + \ + bytes = recv(fd, buf, buflen, flags); \ + ASSERT_NE(-1, bytes); \ + ASSERT_STREQ(str, buf); \ + } while (0) + +#define async \ + for (pid_t pid = (pid = fork(), \ + pid < 0 ? \ + __TH_LOG("Failed to start async {}"), \ + _metadata->exit_code = KSFT_FAIL, \ + __bail(1, _metadata), \ + 0xdead : \ + pid); \ + !pid; exit(0)) + +TEST_F(so_peek_off, single_chunk) +{ + sendeq(self->fd[0], "aaaabbbb", 0); + + recveq(self->fd[1], "aaaa", 4, MSG_PEEK); + recveq(self->fd[1], "bbbb", 100, MSG_PEEK); +} + +TEST_F(so_peek_off, two_chunks) +{ + sendeq(self->fd[0], "aaaa", 0); + sendeq(self->fd[0], "bbbb", 0); + + recveq(self->fd[1], "aaaa", 4, MSG_PEEK); + recveq(self->fd[1], "bbbb", 100, MSG_PEEK); +} + +TEST_F(so_peek_off, two_chunks_blocking) +{ + async { + usleep(1000); + sendeq(self->fd[0], "aaaa", 0); + } + + recveq(self->fd[1], "aaaa", 4, MSG_PEEK); + + async { + usleep(1000); + sendeq(self->fd[0], "bbbb", 0); + } + + /* goto again; -> goto redo; in unix_stream_read_generic(). */ + recveq(self->fd[1], "bbbb", 100, MSG_PEEK); +} + +TEST_F(so_peek_off, two_chunks_overlap) +{ + sendeq(self->fd[0], "aaaa", 0); + recveq(self->fd[1], "aa", 2, MSG_PEEK); + + sendeq(self->fd[0], "bbbb", 0); + + if (variant->type == SOCK_STREAM) { + /* SOCK_STREAM tries to fill the buffer. */ + recveq(self->fd[1], "aabb", 4, MSG_PEEK); + recveq(self->fd[1], "bb", 100, MSG_PEEK); + } else { + /* SOCK_DGRAM and SOCK_SEQPACKET returns at the skb boundary. */ + recveq(self->fd[1], "aa", 100, MSG_PEEK); + recveq(self->fd[1], "bbbb", 100, MSG_PEEK); + } +} + +TEST_F(so_peek_off, two_chunks_overlap_blocking) +{ + async { + usleep(1000); + sendeq(self->fd[0], "aaaa", 0); + } + + recveq(self->fd[1], "aa", 2, MSG_PEEK); + + async { + usleep(1000); + sendeq(self->fd[0], "bbbb", 0); + } + + /* Even SOCK_STREAM does not wait if at least one byte is read. */ + recveq(self->fd[1], "aa", 100, MSG_PEEK); + + recveq(self->fd[1], "bbbb", 100, MSG_PEEK); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/af_unix/unix_connect.c b/tools/testing/selftests/net/af_unix/unix_connect.c index d799fd8f5c7c..870ca96fa8ea 100644 --- a/tools/testing/selftests/net/af_unix/unix_connect.c +++ b/tools/testing/selftests/net/af_unix/unix_connect.c @@ -10,7 +10,7 @@ #include <sys/socket.h> #include <sys/un.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" FIXTURE(unix_connect) { diff --git a/tools/testing/selftests/net/af_unix/unix_connreset.c b/tools/testing/selftests/net/af_unix/unix_connreset.c new file mode 100644 index 000000000000..08c1de8f5a98 --- /dev/null +++ b/tools/testing/selftests/net/af_unix/unix_connreset.c @@ -0,0 +1,180 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Selftest for AF_UNIX socket close and ECONNRESET behaviour. + * + * This test verifies: + * 1. SOCK_STREAM returns EOF when the peer closes normally. + * 2. SOCK_STREAM returns ECONNRESET if peer closes with unread data. + * 3. SOCK_SEQPACKET returns EOF when the peer closes normally. + * 4. SOCK_SEQPACKET returns ECONNRESET if the peer closes with unread data. + * 5. SOCK_DGRAM does not return ECONNRESET when the peer closes. + * + * These tests document the intended Linux behaviour. + * + */ + +#define _GNU_SOURCE +#include <string.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> +#include <sys/socket.h> +#include <sys/un.h> +#include "../../kselftest_harness.h" + +#define SOCK_PATH "/tmp/af_unix_connreset.sock" + +static void remove_socket_file(void) +{ + unlink(SOCK_PATH); +} + +FIXTURE(unix_sock) +{ + int server; + int client; + int child; +}; + +FIXTURE_VARIANT(unix_sock) +{ + int socket_type; + const char *name; +}; + +FIXTURE_VARIANT_ADD(unix_sock, stream) { + .socket_type = SOCK_STREAM, + .name = "SOCK_STREAM", +}; + +FIXTURE_VARIANT_ADD(unix_sock, dgram) { + .socket_type = SOCK_DGRAM, + .name = "SOCK_DGRAM", +}; + +FIXTURE_VARIANT_ADD(unix_sock, seqpacket) { + .socket_type = SOCK_SEQPACKET, + .name = "SOCK_SEQPACKET", +}; + +FIXTURE_SETUP(unix_sock) +{ + struct sockaddr_un addr = {}; + int err; + + addr.sun_family = AF_UNIX; + strcpy(addr.sun_path, SOCK_PATH); + remove_socket_file(); + + self->server = socket(AF_UNIX, variant->socket_type, 0); + ASSERT_LT(-1, self->server); + + err = bind(self->server, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(0, err); + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + err = listen(self->server, 1); + ASSERT_EQ(0, err); + } + + self->client = socket(AF_UNIX, variant->socket_type | SOCK_NONBLOCK, 0); + ASSERT_LT(-1, self->client); + + err = connect(self->client, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(0, err); +} + +FIXTURE_TEARDOWN(unix_sock) +{ + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) + close(self->child); + + close(self->client); + close(self->server); + remove_socket_file(); +} + +/* Test 1: peer closes normally */ +TEST_F(unix_sock, eof) +{ + char buf[16] = {}; + ssize_t n; + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + self->child = accept(self->server, NULL, NULL); + ASSERT_LT(-1, self->child); + + close(self->child); + } else { + close(self->server); + } + + n = recv(self->client, buf, sizeof(buf), 0); + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + ASSERT_EQ(0, n); + } else { + ASSERT_EQ(-1, n); + ASSERT_EQ(EAGAIN, errno); + } +} + +/* Test 2: peer closes with unread data */ +TEST_F(unix_sock, reset_unread_behavior) +{ + char buf[16] = {}; + ssize_t n; + + /* Send data that will remain unread */ + send(self->client, "hello", 5, 0); + + if (variant->socket_type == SOCK_DGRAM) { + /* No real connection, just close the server */ + close(self->server); + } else { + self->child = accept(self->server, NULL, NULL); + ASSERT_LT(-1, self->child); + + /* Peer closes before client reads */ + close(self->child); + } + + n = recv(self->client, buf, sizeof(buf), 0); + ASSERT_EQ(-1, n); + + if (variant->socket_type == SOCK_STREAM || + variant->socket_type == SOCK_SEQPACKET) { + ASSERT_EQ(ECONNRESET, errno); + } else { + ASSERT_EQ(EAGAIN, errno); + } +} + +/* Test 3: closing unaccepted (embryo) server socket should reset client. */ +TEST_F(unix_sock, reset_closed_embryo) +{ + char buf[16] = {}; + ssize_t n; + + if (variant->socket_type == SOCK_DGRAM) { + snprintf(_metadata->results->reason, + sizeof(_metadata->results->reason), + "Test only applies to SOCK_STREAM and SOCK_SEQPACKET"); + exit(KSFT_XFAIL); + } + + /* Close server without accept()ing */ + close(self->server); + + n = recv(self->client, buf, sizeof(buf), 0); + + ASSERT_EQ(-1, n); + ASSERT_EQ(ECONNRESET, errno); +} + +TEST_HARNESS_MAIN + diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh index d458b45c775b..3ef209cacb8e 100755 --- a/tools/testing/selftests/net/amt.sh +++ b/tools/testing/selftests/net/amt.sh @@ -194,15 +194,21 @@ test_remote_ip() send_mcast_torture4() { - ip netns exec "${SOURCE}" bash -c \ - 'cat /dev/urandom | head -c 1G | nc -w 1 -u 239.0.0.1 4001' + for i in `seq 10`; do + ip netns exec "${SOURCE}" bash -c \ + 'cat /dev/urandom | head -c 100M | nc -w 1 -u 239.0.0.1 4001' + echo -n "." + done } send_mcast_torture6() { - ip netns exec "${SOURCE}" bash -c \ - 'cat /dev/urandom | head -c 1G | nc -w 1 -u ff0e::5:6 6001' + for i in `seq 10`; do + ip netns exec "${SOURCE}" bash -c \ + 'cat /dev/urandom | head -c 100M | nc -w 1 -u ff0e::5:6 6001' + echo -n "." + done } check_features() @@ -278,10 +284,12 @@ wait $pid || err=$? if [ $err -eq 1 ]; then ERR=1 fi +printf "TEST: %-50s" "IPv4 amt traffic forwarding torture" send_mcast_torture4 -printf "TEST: %-60s [ OK ]\n" "IPv4 amt traffic forwarding torture" +printf " [ OK ]\n" +printf "TEST: %-50s" "IPv6 amt traffic forwarding torture" send_mcast_torture6 -printf "TEST: %-60s [ OK ]\n" "IPv6 amt traffic forwarding torture" +printf " [ OK ]\n" sleep 5 if [ "${ERR}" -eq 1 ]; then echo "Some tests failed." >&2 diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh index 92eb880c52f2..00758f00efbf 100755 --- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh +++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh @@ -75,7 +75,7 @@ setup_v4() { ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1 if [ $? -ne 0 ]; then cleanup_v4 - echo "failed" + echo "failed; is the system using MACAddressPolicy=persistent ?" exit 1 fi diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh index f366cadbc5e8..d9e5b967f815 100755 --- a/tools/testing/selftests/net/bareudp.sh +++ b/tools/testing/selftests/net/bareudp.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # Test various bareudp tunnel configurations. @@ -106,26 +106,16 @@ # | | # +-----------------------------------------------------------------------+ +. ./lib.sh + ERR=4 # Return 4 by default, which is the SKIP code for kselftest PING6="ping" PAUSE_ON_FAIL="no" -readonly NS0=$(mktemp -u ns0-XXXXXXXX) -readonly NS1=$(mktemp -u ns1-XXXXXXXX) -readonly NS2=$(mktemp -u ns2-XXXXXXXX) -readonly NS3=$(mktemp -u ns3-XXXXXXXX) - # Exit the script after having removed the network namespaces it created -# -# Parameters: -# -# * The list of network namespaces to delete before exiting. -# exit_cleanup() { - for ns in "$@"; do - ip netns delete "${ns}" 2>/dev/null || true - done + cleanup_all_ns if [ "${ERR}" -eq 4 ]; then echo "Error: Setting up the testing environment failed." >&2 @@ -140,17 +130,7 @@ exit_cleanup() # namespaces created by this script are deleted. create_namespaces() { - ip netns add "${NS0}" || exit_cleanup - ip netns add "${NS1}" || exit_cleanup "${NS0}" - ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}" - ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}" -} - -# The trap function handler -# -exit_cleanup_all() -{ - exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}" + setup_ns NS0 NS1 NS2 NS3 || exit_cleanup } # Configure a network interface using a host route @@ -188,10 +168,6 @@ iface_config() # setup_underlay() { - for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do - ip -netns "${ns}" link set dev lo up - done; - ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}" ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}" ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}" @@ -234,14 +210,6 @@ setup_overlay_ipv4() ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1 ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10 ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33 - - # The intermediate namespaces don't have routes for the reverse path, - # as it will be handled by tc. So we need to ensure that rp_filter is - # not going to block the traffic. - ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0 - ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0 } setup_overlay_ipv6() @@ -521,13 +489,10 @@ done check_features -# Create namespaces before setting up the exit trap. -# Otherwise, exit_cleanup_all() could delete namespaces that were not created -# by this script. -create_namespaces - set -e -trap exit_cleanup_all EXIT +trap exit_cleanup EXIT + +create_namespaces setup_underlay setup_overlay_ipv4 diff --git a/tools/testing/selftests/net/bench/Makefile b/tools/testing/selftests/net/bench/Makefile new file mode 100644 index 000000000000..2546c45e42f7 --- /dev/null +++ b/tools/testing/selftests/net/bench/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_GEN_MODS_DIR := page_pool + +TEST_PROGS += test_bench_page_pool.sh + +include ../../lib.mk diff --git a/tools/testing/selftests/net/bench/page_pool/Makefile b/tools/testing/selftests/net/bench/page_pool/Makefile new file mode 100644 index 000000000000..0549a16ba275 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/Makefile @@ -0,0 +1,17 @@ +BENCH_PAGE_POOL_SIMPLE_TEST_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST))))) +KDIR ?= /lib/modules/$(shell uname -r)/build + +ifeq ($(V),1) +Q = +else +Q = @ +endif + +obj-m += bench_page_pool.o +bench_page_pool-y += bench_page_pool_simple.o time_bench.o + +all: + +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) modules + +clean: + +$(Q)make -C $(KDIR) M=$(BENCH_PAGE_POOL_SIMPLE_TEST_DIR) clean diff --git a/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c new file mode 100644 index 000000000000..cb6468adbda4 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/bench_page_pool_simple.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Benchmark module for page_pool. + * + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/interrupt.h> +#include <linux/limits.h> +#include <linux/module.h> +#include <linux/mutex.h> +#include <net/page_pool/helpers.h> + +#include "time_bench.h" + +static int verbose = 1; +#define MY_POOL_SIZE 1024 + +/* Makes tests selectable. Useful for perf-record to analyze a single test. + * Hint: Bash shells support writing binary number like: $((2#101010) + * + * # modprobe bench_page_pool_simple run_flags=$((2#100)) + */ +static unsigned long run_flags = 0xFFFFFFFF; +module_param(run_flags, ulong, 0); +MODULE_PARM_DESC(run_flags, "Limit which bench test that runs"); + +/* Count the bit number from the enum */ +enum benchmark_bit { + bit_run_bench_baseline, + bit_run_bench_no_softirq01, + bit_run_bench_no_softirq02, + bit_run_bench_no_softirq03, +}; + +#define bit(b) (1 << (b)) +#define enabled(b) ((run_flags & (bit(b)))) + +/* notice time_bench is limited to U32_MAX nr loops */ +static unsigned long loops = 10000000; +module_param(loops, ulong, 0); +MODULE_PARM_DESC(loops, "Specify loops bench will run"); + +/* Timing at the nanosec level, we need to know the overhead + * introduced by the for loop itself + */ +static int time_bench_for_loop(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + int i; + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + } + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +static int time_bench_atomic_inc(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + atomic_t cnt; + int i; + + atomic_set(&cnt, 0); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + atomic_inc(&cnt); + barrier(); /* avoid compiler to optimize this loop */ + } + loops_cnt = atomic_read(&cnt); + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +/* The ptr_ping in page_pool uses a spinlock. We need to know the minimum + * overhead of taking+releasing a spinlock, to know the cycles that can be saved + * by e.g. amortizing this via bulking. + */ +static int time_bench_lock(struct time_bench_record *rec, void *data) +{ + uint64_t loops_cnt = 0; + spinlock_t lock; + int i; + + spin_lock_init(&lock); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + spin_lock(&lock); + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + spin_unlock(&lock); + } + time_bench_stop(rec, loops_cnt); + return loops_cnt; +} + +/* Helper for filling some page's into ptr_ring */ +static void pp_fill_ptr_ring(struct page_pool *pp, int elems) +{ + /* GFP_ATOMIC needed when under run softirq */ + gfp_t gfp_mask = GFP_ATOMIC; + struct page **array; + int i; + + array = kcalloc(elems, sizeof(struct page *), gfp_mask); + + for (i = 0; i < elems; i++) + array[i] = page_pool_alloc_pages(pp, gfp_mask); + for (i = 0; i < elems; i++) + page_pool_put_page(pp, array[i], -1, false); + + kfree(array); +} + +enum test_type { type_fast_path, type_ptr_ring, type_page_allocator }; + +/* Depends on compile optimizing this function */ +static int time_bench_page_pool(struct time_bench_record *rec, void *data, + enum test_type type, const char *func) +{ + uint64_t loops_cnt = 0; + gfp_t gfp_mask = GFP_ATOMIC; /* GFP_ATOMIC is not really needed */ + int i, err; + + struct page_pool *pp; + struct page *page; + + struct page_pool_params pp_params = { + .order = 0, + .flags = 0, + .pool_size = MY_POOL_SIZE, + .nid = NUMA_NO_NODE, + .dev = NULL, /* Only use for DMA mapping */ + .dma_dir = DMA_BIDIRECTIONAL, + }; + + pp = page_pool_create(&pp_params); + if (IS_ERR(pp)) { + err = PTR_ERR(pp); + pr_warn("%s: Error(%d) creating page_pool\n", func, err); + goto out; + } + pp_fill_ptr_ring(pp, 64); + + if (in_serving_softirq()) + pr_warn("%s(): in_serving_softirq fast-path\n", func); + else + pr_warn("%s(): Cannot use page_pool fast-path\n", func); + + time_bench_start(rec); + /** Loop to measure **/ + for (i = 0; i < rec->loops; i++) { + /* Common fast-path alloc that depend on in_serving_softirq() */ + page = page_pool_alloc_pages(pp, gfp_mask); + if (!page) + break; + loops_cnt++; + barrier(); /* avoid compiler to optimize this loop */ + + /* The benchmarks purpose it to test different return paths. + * Compiler should inline optimize other function calls out + */ + if (type == type_fast_path) { + /* Fast-path recycling e.g. XDP_DROP use-case */ + page_pool_recycle_direct(pp, page); + + } else if (type == type_ptr_ring) { + /* Normal return path */ + page_pool_put_page(pp, page, -1, false); + + } else if (type == type_page_allocator) { + /* Test if not pages are recycled, but instead + * returned back into systems page allocator + */ + get_page(page); /* cause no-recycling */ + page_pool_put_page(pp, page, -1, false); + put_page(page); + } else { + BUILD_BUG(); + } + } + time_bench_stop(rec, loops_cnt); +out: + page_pool_destroy(pp); + return loops_cnt; +} + +static int time_bench_page_pool01_fast_path(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_fast_path, __func__); +} + +static int time_bench_page_pool02_ptr_ring(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_ptr_ring, __func__); +} + +static int time_bench_page_pool03_slow(struct time_bench_record *rec, + void *data) +{ + return time_bench_page_pool(rec, data, type_page_allocator, __func__); +} + +static int run_benchmark_tests(void) +{ + uint32_t nr_loops = loops; + + /* Baseline tests */ + if (enabled(bit_run_bench_baseline)) { + time_bench_loop(nr_loops * 10, 0, "for_loop", NULL, + time_bench_for_loop); + time_bench_loop(nr_loops * 10, 0, "atomic_inc", NULL, + time_bench_atomic_inc); + time_bench_loop(nr_loops, 0, "lock", NULL, time_bench_lock); + } + + /* This test cannot activate correct code path, due to no-softirq ctx */ + if (enabled(bit_run_bench_no_softirq01)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool01", NULL, + time_bench_page_pool01_fast_path); + if (enabled(bit_run_bench_no_softirq02)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool02", NULL, + time_bench_page_pool02_ptr_ring); + if (enabled(bit_run_bench_no_softirq03)) + time_bench_loop(nr_loops, 0, "no-softirq-page_pool03", NULL, + time_bench_page_pool03_slow); + + return 0; +} + +static int __init bench_page_pool_simple_module_init(void) +{ + if (verbose) + pr_info("Loaded\n"); + + if (loops > U32_MAX) { + pr_err("Module param loops(%lu) exceeded U32_MAX(%u)\n", loops, + U32_MAX); + return -ECHRNG; + } + + run_benchmark_tests(); + + return 0; +} +module_init(bench_page_pool_simple_module_init); + +static void __exit bench_page_pool_simple_module_exit(void) +{ + if (verbose) + pr_info("Unloaded\n"); +} +module_exit(bench_page_pool_simple_module_exit); + +MODULE_DESCRIPTION("Benchmark of page_pool simple cases"); +MODULE_AUTHOR("Jesper Dangaard Brouer <netoptimizer@brouer.com>"); +MODULE_LICENSE("GPL"); diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.c b/tools/testing/selftests/net/bench/page_pool/time_bench.c new file mode 100644 index 000000000000..073bb36ec5f2 --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/time_bench.c @@ -0,0 +1,394 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Benchmarking code execution time inside the kernel + * + * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/time.h> + +#include <linux/perf_event.h> /* perf_event_create_kernel_counter() */ + +/* For concurrency testing */ +#include <linux/completion.h> +#include <linux/sched.h> +#include <linux/workqueue.h> +#include <linux/kthread.h> + +#include "time_bench.h" + +static int verbose = 1; + +/** TSC (Time-Stamp Counter) based ** + * See: linux/time_bench.h + * tsc_start_clock() and tsc_stop_clock() + */ + +/** Wall-clock based ** + */ + +/** PMU (Performance Monitor Unit) based ** + */ +#define PERF_FORMAT \ + (PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \ + PERF_FORMAT_TOTAL_TIME_RUNNING) + +struct raw_perf_event { + uint64_t config; /* event */ + uint64_t config1; /* umask */ + struct perf_event *save; + char *desc; +}; + +/* if HT is enable a maximum of 4 events (5 if one is instructions + * retired can be specified, if HT is disabled a maximum of 8 (9 if + * one is instructions retired) can be specified. + * + * From Table 19-1. Architectural Performance Events + * Architectures Software Developer’s Manual Volume 3: System Programming + * Guide + */ +struct raw_perf_event perf_events[] = { + { 0x3c, 0x00, NULL, "Unhalted CPU Cycles" }, + { 0xc0, 0x00, NULL, "Instruction Retired" } +}; + +#define NUM_EVTS (ARRAY_SIZE(perf_events)) + +/* WARNING: PMU config is currently broken! + */ +bool time_bench_PMU_config(bool enable) +{ + int i; + struct perf_event_attr perf_conf; + struct perf_event *perf_event; + int cpu; + + preempt_disable(); + cpu = smp_processor_id(); + pr_info("DEBUG: cpu:%d\n", cpu); + preempt_enable(); + + memset(&perf_conf, 0, sizeof(struct perf_event_attr)); + perf_conf.type = PERF_TYPE_RAW; + perf_conf.size = sizeof(struct perf_event_attr); + perf_conf.read_format = PERF_FORMAT; + perf_conf.pinned = 1; + perf_conf.exclude_user = 1; /* No userspace events */ + perf_conf.exclude_kernel = 0; /* Only kernel events */ + + for (i = 0; i < NUM_EVTS; i++) { + perf_conf.disabled = enable; + //perf_conf.disabled = (i == 0) ? 1 : 0; + perf_conf.config = perf_events[i].config; + perf_conf.config1 = perf_events[i].config1; + if (verbose) + pr_info("%s() enable PMU counter: %s\n", + __func__, perf_events[i].desc); + perf_event = perf_event_create_kernel_counter(&perf_conf, cpu, + NULL /* task */, + NULL /* overflow_handler*/, + NULL /* context */); + if (perf_event) { + perf_events[i].save = perf_event; + pr_info("%s():DEBUG perf_event success\n", __func__); + + perf_event_enable(perf_event); + } else { + pr_info("%s():DEBUG perf_event is NULL\n", __func__); + } + } + + return true; +} + +/** Generic functions ** + */ + +/* Calculate stats, store results in record */ +bool time_bench_calc_stats(struct time_bench_record *rec) +{ +#define NANOSEC_PER_SEC 1000000000 /* 10^9 */ + uint64_t ns_per_call_tmp_rem = 0; + uint32_t ns_per_call_remainder = 0; + uint64_t pmc_ipc_tmp_rem = 0; + uint32_t pmc_ipc_remainder = 0; + uint32_t pmc_ipc_div = 0; + uint32_t invoked_cnt_precision = 0; + uint32_t invoked_cnt = 0; /* 32-bit due to div_u64_rem() */ + + if (rec->flags & TIME_BENCH_LOOP) { + if (rec->invoked_cnt < 1000) { + pr_err("ERR: need more(>1000) loops(%llu) for timing\n", + rec->invoked_cnt); + return false; + } + if (rec->invoked_cnt > ((1ULL << 32) - 1)) { + /* div_u64_rem() can only support div with 32bit*/ + pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n", + rec->invoked_cnt); + return false; + } + invoked_cnt = (uint32_t)rec->invoked_cnt; + } + + /* TSC (Time-Stamp Counter) records */ + if (rec->flags & TIME_BENCH_TSC) { + rec->tsc_interval = rec->tsc_stop - rec->tsc_start; + if (rec->tsc_interval == 0) { + pr_err("ABORT: timing took ZERO TSC time\n"); + return false; + } + /* Calculate stats */ + if (rec->flags & TIME_BENCH_LOOP) + rec->tsc_cycles = rec->tsc_interval / invoked_cnt; + else + rec->tsc_cycles = rec->tsc_interval; + } + + /* Wall-clock time calc */ + if (rec->flags & TIME_BENCH_WALLCLOCK) { + rec->time_start = rec->ts_start.tv_nsec + + (NANOSEC_PER_SEC * rec->ts_start.tv_sec); + rec->time_stop = rec->ts_stop.tv_nsec + + (NANOSEC_PER_SEC * rec->ts_stop.tv_sec); + rec->time_interval = rec->time_stop - rec->time_start; + if (rec->time_interval == 0) { + pr_err("ABORT: timing took ZERO wallclock time\n"); + return false; + } + /* Calculate stats */ + /*** Division in kernel it tricky ***/ + /* Orig: time_sec = (time_interval / NANOSEC_PER_SEC); */ + /* remainder only correct because NANOSEC_PER_SEC is 10^9 */ + rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC, + &rec->time_sec_remainder); + //TODO: use existing struct timespec records instead of div? + + if (rec->flags & TIME_BENCH_LOOP) { + /*** Division in kernel it tricky ***/ + /* Orig: ns = ((double)time_interval / invoked_cnt); */ + /* First get quotient */ + rec->ns_per_call_quotient = + div_u64_rem(rec->time_interval, invoked_cnt, + &ns_per_call_remainder); + /* Now get decimals .xxx precision (incorrect roundup)*/ + ns_per_call_tmp_rem = ns_per_call_remainder; + invoked_cnt_precision = invoked_cnt / 1000; + if (invoked_cnt_precision > 0) { + rec->ns_per_call_decimal = + div_u64_rem(ns_per_call_tmp_rem, + invoked_cnt_precision, + &ns_per_call_remainder); + } + } + } + + /* Performance Monitor Unit (PMU) counters */ + if (rec->flags & TIME_BENCH_PMU) { + //FIXME: Overflow handling??? + rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start; + rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start; + + /* Calc Instruction Per Cycle (IPC) */ + /* First get quotient */ + rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk, + &pmc_ipc_remainder); + /* Now get decimals .xxx precision (incorrect roundup)*/ + pmc_ipc_tmp_rem = pmc_ipc_remainder; + pmc_ipc_div = rec->pmc_clk / 1000; + if (pmc_ipc_div > 0) { + rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem, + pmc_ipc_div, + &pmc_ipc_remainder); + } + } + + return true; +} + +/* Generic function for invoking a loop function and calculating + * execution time stats. The function being called/timed is assumed + * to perform a tight loop, and update the timing record struct. + */ +bool time_bench_loop(uint32_t loops, int step, char *txt, void *data, + int (*func)(struct time_bench_record *record, void *data)) +{ + struct time_bench_record rec; + + /* Setup record */ + memset(&rec, 0, sizeof(rec)); /* zero func might not update all */ + rec.version_abi = 1; + rec.loops = loops; + rec.step = step; + rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK); + + /*** Loop function being timed ***/ + if (!func(&rec, data)) { + pr_err("ABORT: function being timed failed\n"); + return false; + } + + if (rec.invoked_cnt < loops) + pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n", + rec.invoked_cnt, loops); + + /* Calculate stats */ + time_bench_calc_stats(&rec); + + pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n", + txt, rec.tsc_cycles, rec.ns_per_call_quotient, + rec.ns_per_call_decimal, rec.step, rec.time_sec, + rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt, + rec.tsc_interval); + if (rec.flags & TIME_BENCH_PMU) + pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n", + txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient, + rec.pmc_ipc_decimal); + return true; +} + +/* Function getting invoked by kthread */ +static int invoke_test_on_cpu_func(void *private) +{ + struct time_bench_cpu *cpu = private; + struct time_bench_sync *sync = cpu->sync; + cpumask_t newmask = CPU_MASK_NONE; + void *data = cpu->data; + + /* Restrict CPU */ + cpumask_set_cpu(cpu->rec.cpu, &newmask); + set_cpus_allowed_ptr(current, &newmask); + + /* Synchronize start of concurrency test */ + atomic_inc(&sync->nr_tests_running); + wait_for_completion(&sync->start_event); + + /* Start benchmark function */ + if (!cpu->bench_func(&cpu->rec, data)) { + pr_err("ERROR: function being timed failed on CPU:%d(%d)\n", + cpu->rec.cpu, smp_processor_id()); + } else { + if (verbose) + pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu, + smp_processor_id()); + } + cpu->did_bench_run = true; + + /* End test */ + atomic_dec(&sync->nr_tests_running); + /* Wait for kthread_stop() telling us to stop */ + while (!kthread_should_stop()) { + set_current_state(TASK_INTERRUPTIBLE); + schedule(); + } + __set_current_state(TASK_RUNNING); + return 0; +} + +void time_bench_print_stats_cpumask(const char *desc, + struct time_bench_cpu *cpu_tasks, + const struct cpumask *mask) +{ + uint64_t average = 0; + int cpu; + int step = 0; + struct sum { + uint64_t tsc_cycles; + int records; + } sum = { 0 }; + + /* Get stats */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + struct time_bench_record *rec = &c->rec; + + /* Calculate stats */ + time_bench_calc_stats(rec); + + pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n", + desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient, + rec->ns_per_call_decimal, rec->step, rec->time_sec, + rec->time_sec_remainder, rec->time_interval, + rec->invoked_cnt, rec->tsc_interval); + + /* Collect average */ + sum.records++; + sum.tsc_cycles += rec->tsc_cycles; + step = rec->step; + } + + if (sum.records) /* avoid div-by-zero */ + average = sum.tsc_cycles / sum.records; + pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc, + average, sum.records, step); +} + +void time_bench_run_concurrent(uint32_t loops, int step, void *data, + const struct cpumask *mask, /* Support masking outsome CPUs*/ + struct time_bench_sync *sync, + struct time_bench_cpu *cpu_tasks, + int (*func)(struct time_bench_record *record, void *data)) +{ + int cpu, running = 0; + + if (verbose) // DEBUG + pr_warn("%s() Started on CPU:%d\n", __func__, + smp_processor_id()); + + /* Reset sync conditions */ + atomic_set(&sync->nr_tests_running, 0); + init_completion(&sync->start_event); + + /* Spawn off jobs on all CPUs */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + + running++; + c->sync = sync; /* Send sync variable along */ + c->data = data; /* Send opaque along */ + + /* Init benchmark record */ + memset(&c->rec, 0, sizeof(struct time_bench_record)); + c->rec.version_abi = 1; + c->rec.loops = loops; + c->rec.step = step; + c->rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | + TIME_BENCH_WALLCLOCK); + c->rec.cpu = cpu; + c->bench_func = func; + c->task = kthread_run(invoke_test_on_cpu_func, c, + "time_bench%d", cpu); + if (IS_ERR(c->task)) { + pr_err("%s(): Failed to start test func\n", __func__); + return; /* Argh, what about cleanup?! */ + } + } + + /* Wait until all processes are running */ + while (atomic_read(&sync->nr_tests_running) < running) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(10); + } + /* Kick off all CPU concurrently on completion event */ + complete_all(&sync->start_event); + + /* Wait for CPUs to finish */ + while (atomic_read(&sync->nr_tests_running)) { + set_current_state(TASK_UNINTERRUPTIBLE); + schedule_timeout(10); + } + + /* Stop the kthreads */ + for_each_cpu(cpu, mask) { + struct time_bench_cpu *c = &cpu_tasks[cpu]; + + kthread_stop(c->task); + } + + if (verbose) // DEBUG - happens often, finish on another CPU + pr_warn("%s() Finished on CPU:%d\n", __func__, + smp_processor_id()); +} diff --git a/tools/testing/selftests/net/bench/page_pool/time_bench.h b/tools/testing/selftests/net/bench/page_pool/time_bench.h new file mode 100644 index 000000000000..e113fcf341dc --- /dev/null +++ b/tools/testing/selftests/net/bench/page_pool/time_bench.h @@ -0,0 +1,238 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Benchmarking code execution time inside the kernel + * + * Copyright (C) 2014, Red Hat, Inc., Jesper Dangaard Brouer + * for licensing details see kernel-base/COPYING + */ +#ifndef _LINUX_TIME_BENCH_H +#define _LINUX_TIME_BENCH_H + +/* Main structure used for recording a benchmark run */ +struct time_bench_record { + uint32_t version_abi; + uint32_t loops; /* Requested loop invocations */ + uint32_t step; /* option for e.g. bulk invocations */ + + uint32_t flags; /* Measurements types enabled */ +#define TIME_BENCH_LOOP BIT(0) +#define TIME_BENCH_TSC BIT(1) +#define TIME_BENCH_WALLCLOCK BIT(2) +#define TIME_BENCH_PMU BIT(3) + + uint32_t cpu; /* Used when embedded in time_bench_cpu */ + + /* Records */ + uint64_t invoked_cnt; /* Returned actual invocations */ + uint64_t tsc_start; + uint64_t tsc_stop; + struct timespec64 ts_start; + struct timespec64 ts_stop; + /* PMU counters for instruction and cycles + * instructions counter including pipelined instructions + */ + uint64_t pmc_inst_start; + uint64_t pmc_inst_stop; + /* CPU unhalted clock counter */ + uint64_t pmc_clk_start; + uint64_t pmc_clk_stop; + + /* Result records */ + uint64_t tsc_interval; + uint64_t time_start, time_stop, time_interval; /* in nanosec */ + uint64_t pmc_inst, pmc_clk; + + /* Derived result records */ + uint64_t tsc_cycles; // +decimal? + uint64_t ns_per_call_quotient, ns_per_call_decimal; + uint64_t time_sec; + uint32_t time_sec_remainder; + uint64_t pmc_ipc_quotient, pmc_ipc_decimal; /* inst per cycle */ +}; + +/* For synchronizing parallel CPUs to run concurrently */ +struct time_bench_sync { + atomic_t nr_tests_running; + struct completion start_event; +}; + +/* Keep track of CPUs executing our bench function. + * + * Embed a time_bench_record for storing info per cpu + */ +struct time_bench_cpu { + struct time_bench_record rec; + struct time_bench_sync *sync; /* back ptr */ + struct task_struct *task; + /* "data" opaque could have been placed in time_bench_sync, + * but to avoid any false sharing, place it per CPU + */ + void *data; + /* Support masking outsome CPUs, mark if it ran */ + bool did_bench_run; + /* int cpu; // note CPU stored in time_bench_record */ + int (*bench_func)(struct time_bench_record *record, void *data); +}; + +/* + * Below TSC assembler code is not compatible with other archs, and + * can also fail on guests if cpu-flags are not correct. + * + * The way TSC reading is used, many iterations, does not require as + * high accuracy as described below (in Intel Doc #324264). + * + * Considering changing to use get_cycles() (#include <asm/timex.h>). + */ + +/** TSC (Time-Stamp Counter) based ** + * Recommend reading, to understand details of reading TSC accurately: + * Intel Doc #324264, "How to Benchmark Code Execution Times on Intel" + * + * Consider getting exclusive ownership of CPU by using: + * unsigned long flags; + * preempt_disable(); + * raw_local_irq_save(flags); + * _your_code_ + * raw_local_irq_restore(flags); + * preempt_enable(); + * + * Clobbered registers: "%rax", "%rbx", "%rcx", "%rdx" + * RDTSC only change "%rax" and "%rdx" but + * CPUID clears the high 32-bits of all (rax/rbx/rcx/rdx) + */ +static __always_inline uint64_t tsc_start_clock(void) +{ + /* See: Intel Doc #324264 */ + unsigned int hi, lo; + + asm volatile("CPUID\n\t" + "RDTSC\n\t" + "mov %%edx, %0\n\t" + "mov %%eax, %1\n\t" + : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx"); + //FIXME: on 32bit use clobbered %eax + %edx + return ((uint64_t)lo) | (((uint64_t)hi) << 32); +} + +static __always_inline uint64_t tsc_stop_clock(void) +{ + /* See: Intel Doc #324264 */ + unsigned int hi, lo; + + asm volatile("RDTSCP\n\t" + "mov %%edx, %0\n\t" + "mov %%eax, %1\n\t" + "CPUID\n\t" + : "=r"(hi), "=r"(lo)::"%rax", "%rbx", "%rcx", "%rdx"); + return ((uint64_t)lo) | (((uint64_t)hi) << 32); +} + +/** Wall-clock based ** + * + * use: getnstimeofday() + * getnstimeofday(&rec->ts_start); + * getnstimeofday(&rec->ts_stop); + * + * API changed see: Documentation/core-api/timekeeping.rst + * https://www.kernel.org/doc/html/latest/core-api/timekeeping.html#c.getnstimeofday + * + * We should instead use: ktime_get_real_ts64() is a direct + * replacement, but consider using monotonic time (ktime_get_ts64()) + * and/or a ktime_t based interface (ktime_get()/ktime_get_real()). + */ + +/** PMU (Performance Monitor Unit) based ** + * + * Needed for calculating: Instructions Per Cycle (IPC) + * - The IPC number tell how efficient the CPU pipelining were + */ +//lookup: perf_event_create_kernel_counter() + +bool time_bench_PMU_config(bool enable); + +/* Raw reading via rdpmc() using fixed counters + * + * From: https://github.com/andikleen/simple-pmu + */ +enum { + FIXED_SELECT = (1U << 30), /* == 0x40000000 */ + FIXED_INST_RETIRED_ANY = 0, + FIXED_CPU_CLK_UNHALTED_CORE = 1, + FIXED_CPU_CLK_UNHALTED_REF = 2, +}; + +static __always_inline unsigned int long long p_rdpmc(unsigned int in) +{ + unsigned int d, a; + + asm volatile("rdpmc" : "=d"(d), "=a"(a) : "c"(in) : "memory"); + return ((unsigned long long)d << 32) | a; +} + +/* These PMU counter needs to be enabled, but I don't have the + * configure code implemented. My current hack is running: + * sudo perf stat -e cycles:k -e instructions:k insmod lib/ring_queue_test.ko + */ +/* Reading all pipelined instruction */ +static __always_inline unsigned long long pmc_inst(void) +{ + return p_rdpmc(FIXED_SELECT | FIXED_INST_RETIRED_ANY); +} + +/* Reading CPU clock cycles */ +static __always_inline unsigned long long pmc_clk(void) +{ + return p_rdpmc(FIXED_SELECT | FIXED_CPU_CLK_UNHALTED_CORE); +} + +/* Raw reading via MSR rdmsr() is likely wrong + * FIXME: How can I know which raw MSR registers are conf for what? + */ +#define MSR_IA32_PCM0 0x400000C1 /* PERFCTR0 */ +#define MSR_IA32_PCM1 0x400000C2 /* PERFCTR1 */ +#define MSR_IA32_PCM2 0x400000C3 +static inline uint64_t msr_inst(unsigned long long *msr_result) +{ + return rdmsrq_safe(MSR_IA32_PCM0, msr_result); +} + +/** Generic functions ** + */ +bool time_bench_loop(uint32_t loops, int step, char *txt, void *data, + int (*func)(struct time_bench_record *rec, void *data)); +bool time_bench_calc_stats(struct time_bench_record *rec); + +void time_bench_run_concurrent(uint32_t loops, int step, void *data, + const struct cpumask *mask, /* Support masking outsome CPUs*/ + struct time_bench_sync *sync, struct time_bench_cpu *cpu_tasks, + int (*func)(struct time_bench_record *record, void *data)); +void time_bench_print_stats_cpumask(const char *desc, + struct time_bench_cpu *cpu_tasks, + const struct cpumask *mask); + +//FIXME: use rec->flags to select measurement, should be MACRO +static __always_inline void time_bench_start(struct time_bench_record *rec) +{ + //getnstimeofday(&rec->ts_start); + ktime_get_real_ts64(&rec->ts_start); + if (rec->flags & TIME_BENCH_PMU) { + rec->pmc_inst_start = pmc_inst(); + rec->pmc_clk_start = pmc_clk(); + } + rec->tsc_start = tsc_start_clock(); +} + +static __always_inline void time_bench_stop(struct time_bench_record *rec, + uint64_t invoked_cnt) +{ + rec->tsc_stop = tsc_stop_clock(); + if (rec->flags & TIME_BENCH_PMU) { + rec->pmc_inst_stop = pmc_inst(); + rec->pmc_clk_stop = pmc_clk(); + } + //getnstimeofday(&rec->ts_stop); + ktime_get_real_ts64(&rec->ts_stop); + rec->invoked_cnt = invoked_cnt; +} + +#endif /* _LINUX_TIME_BENCH_H */ diff --git a/tools/testing/selftests/net/bench/test_bench_page_pool.sh b/tools/testing/selftests/net/bench/test_bench_page_pool.sh new file mode 100755 index 000000000000..7b8b18cfedce --- /dev/null +++ b/tools/testing/selftests/net/bench/test_bench_page_pool.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# + +set -e + +DRIVER="./page_pool/bench_page_pool.ko" +result="" + +function run_test() +{ + rmmod "bench_page_pool.ko" || true + insmod $DRIVER > /dev/null 2>&1 + result=$(dmesg | tail -10) + echo "$result" + + echo + echo "Fast path results:" + echo "${result}" | grep -o -E "no-softirq-page_pool01 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" + + echo + echo "ptr_ring results:" + echo "${result}" | grep -o -E "no-softirq-page_pool02 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" + + echo + echo "slow path results:" + echo "${result}" | grep -o -E "no-softirq-page_pool03 Per elem: ([0-9]+) cycles\(tsc\) ([0-9]+\.[0-9]+) ns" +} + +run_test + +exit 0 diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c index 57ff67a3751e..da04b0b19b73 100644 --- a/tools/testing/selftests/net/bind_bhash.c +++ b/tools/testing/selftests/net/bind_bhash.c @@ -75,7 +75,7 @@ static void *setup(void *arg) int *array = (int *)arg; for (i = 0; i < MAX_CONNECTIONS; i++) { - sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr); + sock_fd = bind_socket(SO_REUSEPORT, setup_addr); if (sock_fd < 0) { ret = sock_fd; pthread_exit(&ret); @@ -103,7 +103,7 @@ int main(int argc, const char *argv[]) setup_addr = use_v6 ? setup_addr_v6 : setup_addr_v4; - listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr); + listener_fd = bind_socket(SO_REUSEPORT, setup_addr); if (listen(listener_fd, 100) < 0) { perror("listen failed"); return -1; diff --git a/tools/testing/selftests/net/bind_timewait.c b/tools/testing/selftests/net/bind_timewait.c index cb9fdf51ea59..40126f9b901e 100644 --- a/tools/testing/selftests/net/bind_timewait.c +++ b/tools/testing/selftests/net/bind_timewait.c @@ -4,7 +4,7 @@ #include <sys/socket.h> #include <netinet/in.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" FIXTURE(bind_timewait) { diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c index b7b54d646b93..7d11548b2c61 100644 --- a/tools/testing/selftests/net/bind_wildcard.c +++ b/tools/testing/selftests/net/bind_wildcard.c @@ -4,7 +4,7 @@ #include <sys/socket.h> #include <netinet/in.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" static const __u32 in4addr_any = INADDR_ANY; static const __u32 in4addr_loopback = INADDR_LOOPBACK; diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py index 3efe44f6e92a..c856d266c8f3 100755 --- a/tools/testing/selftests/net/bpf_offload.py +++ b/tools/testing/selftests/net/bpf_offload.py @@ -184,8 +184,8 @@ def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True): progs = [ p for p in progs if not p['orphaned'] ] if expected is not None: if len(progs) != expected: - fail(True, "%d BPF programs loaded, expected %d" % - (len(progs), expected)) + fail(True, "%d BPF programs loaded, expected %d\nLoaded Progs:\n%s" % + (len(progs), expected, pp.pformat(progs))) return progs def bpftool_map_list(expected=None, ns=""): @@ -207,20 +207,24 @@ def bpftool_prog_list_wait(expected=0, n_retry=20): raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) def bpftool_map_list_wait(expected=0, n_retry=20, ns=""): + nmaps = None for i in range(n_retry): maps = bpftool_map_list(ns=ns) - if len(maps) == expected: + nmaps = len(maps) + if nmaps == expected: return maps time.sleep(0.05) raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None, - fail=True, include_stderr=False): + fail=True, include_stderr=False, dev_bind=None): args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name) if prog_type is not None: args += " type " + prog_type if dev is not None: args += " dev " + dev + elif dev_bind is not None: + args += " xdpmeta_dev " + dev_bind if len(maps): args += " map " + " map ".join(maps) @@ -594,8 +598,9 @@ def check_extack_nsim(output, reference, args): check_extack(output, "netdevsim: " + reference, args) def check_no_extack(res, needle): - fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"), - "Found '%s' in command output, leaky extack?" % (needle)) + haystack = (res[1] + res[2]).strip() + fail(haystack.count(needle) or haystack.count("Warning:"), + "Unexpected command output, leaky extack? ('%s', '%s')" % (needle, haystack)) def check_verifier_log(output, reference): lines = output.split("\n") @@ -707,6 +712,7 @@ _, base_maps = bpftool("map") base_map_names = [ 'pid_iter.rodata', # created on each bpftool invocation 'libbpf_det_bind', # created on each bpftool invocation + 'libbpf_global', ] # Check netdevsim @@ -979,6 +985,16 @@ try: rm("/sys/fs/bpf/offload") sim.wait_for_flush() + bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/devbound", + dev_bind=sim['ifname']) + devbound = bpf_pinned("/sys/fs/bpf/devbound") + start_test("Test dev-bound program in generic mode...") + ret, _, err = sim.set_xdp(devbound, "generic", fail=False, include_stderr=True) + fail(ret == 0, "devbound program in generic mode allowed") + check_extack(err, "Can't attach device-bound programs in generic mode.", args) + rm("/sys/fs/bpf/devbound") + sim.wait_for_flush() + start_test("Test XDP load failure...") sim.dfs["dev/bpf_bind_verifier_accept"] = 0 ret, _, err = bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload", diff --git a/tools/testing/selftests/net/broadcast_ether_dst.sh b/tools/testing/selftests/net/broadcast_ether_dst.sh new file mode 100755 index 000000000000..334a7eca8a80 --- /dev/null +++ b/tools/testing/selftests/net/broadcast_ether_dst.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Author: Brett A C Sheffield <bacs@librecast.net> +# Author: Oscar Maes <oscmaes92@gmail.com> +# +# Ensure destination ethernet field is correctly set for +# broadcast packets + +source lib.sh + +CLIENT_IP4="192.168.0.1" +GW_IP4="192.168.0.2" + +setup() { + setup_ns CLIENT_NS SERVER_NS + + ip -net "${SERVER_NS}" link add link1 type veth \ + peer name link0 netns "${CLIENT_NS}" + + ip -net "${CLIENT_NS}" link set link0 up + ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}"/24 dev link0 + + ip -net "${SERVER_NS}" link set link1 up + + ip -net "${CLIENT_NS}" route add default via "${GW_IP4}" + ip netns exec "${CLIENT_NS}" arp -s "${GW_IP4}" 00:11:22:33:44:55 +} + +cleanup() { + rm -f "${CAPFILE}" "${OUTPUT}" + ip -net "${SERVER_NS}" link del link1 + cleanup_ns "${CLIENT_NS}" "${SERVER_NS}" +} + +test_broadcast_ether_dst() { + local rc=0 + CAPFILE=$(mktemp -u cap.XXXXXXXXXX) + OUTPUT=$(mktemp -u out.XXXXXXXXXX) + + echo "Testing ethernet broadcast destination" + + # start tcpdump listening for icmp + # tcpdump will exit after receiving a single packet + # timeout will kill tcpdump if it is still running after 2s + timeout 2s ip netns exec "${CLIENT_NS}" \ + tcpdump -i link0 -c 1 -w "${CAPFILE}" icmp &> "${OUTPUT}" & + pid=$! + slowwait 1 grep -qs "listening" "${OUTPUT}" + + # send broadcast ping + ip netns exec "${CLIENT_NS}" \ + ping -W0.01 -c1 -b 255.255.255.255 &> /dev/null + + # wait for tcpdump for exit after receiving packet + wait "${pid}" + + # compare ethernet destination field to ff:ff:ff:ff:ff:ff + ether_dst=$(tcpdump -r "${CAPFILE}" -tnne 2>/dev/null | \ + awk '{sub(/,/,"",$3); print $3}') + if [[ "${ether_dst}" == "ff:ff:ff:ff:ff:ff" ]]; then + echo "[ OK ]" + rc="${ksft_pass}" + else + echo "[FAIL] expected dst ether addr to be ff:ff:ff:ff:ff:ff," \ + "got ${ether_dst}" + rc="${ksft_fail}" + fi + + return "${rc}" +} + +if [ ! -x "$(command -v tcpdump)" ]; then + echo "SKIP: Could not run test without tcpdump tool" + exit "${ksft_skip}" +fi + +trap cleanup EXIT + +setup +test_broadcast_ether_dst + +exit $? diff --git a/tools/testing/selftests/net/broadcast_pmtu.sh b/tools/testing/selftests/net/broadcast_pmtu.sh new file mode 100755 index 000000000000..726eb5d25839 --- /dev/null +++ b/tools/testing/selftests/net/broadcast_pmtu.sh @@ -0,0 +1,47 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Ensures broadcast route MTU is respected + +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +CLIENT_IP4="192.168.0.1/24" +CLIENT_BROADCAST_ADDRESS="192.168.0.255" + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +SERVER_IP4="192.168.0.2/24" + +setup() { + ip netns add "${CLIENT_NS}" + ip netns add "${SERVER_NS}" + + ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}" + + ip -net "${CLIENT_NS}" link set link0 up + ip -net "${CLIENT_NS}" link set link0 mtu 9000 + ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}" dev link0 + + ip -net "${SERVER_NS}" link set link1 up + ip -net "${SERVER_NS}" link set link1 mtu 1500 + ip -net "${SERVER_NS}" addr add "${SERVER_IP4}" dev link1 + + read -r -a CLIENT_BROADCAST_ENTRY <<< "$(ip -net "${CLIENT_NS}" route show table local type broadcast)" + ip -net "${CLIENT_NS}" route del "${CLIENT_BROADCAST_ENTRY[@]}" + ip -net "${CLIENT_NS}" route add "${CLIENT_BROADCAST_ENTRY[@]}" mtu 1500 + + ip net exec "${SERVER_NS}" sysctl -wq net.ipv4.icmp_echo_ignore_broadcasts=0 +} + +cleanup() { + ip -net "${SERVER_NS}" link del link1 + ip netns del "${CLIENT_NS}" + ip netns del "${SERVER_NS}" +} + +trap cleanup EXIT + +setup && + echo "Testing for broadcast route MTU" && + ip net exec "${CLIENT_NS}" ping -f -M want -q -c 1 -s 8000 -w 1 -b "${CLIENT_BROADCAST_ADDRESS}" > /dev/null 2>&1 + +exit $? + diff --git a/tools/testing/selftests/net/busy_poll_test.sh b/tools/testing/selftests/net/busy_poll_test.sh new file mode 100755 index 000000000000..5ec1c85c1623 --- /dev/null +++ b/tools/testing/selftests/net/busy_poll_test.sh @@ -0,0 +1,187 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source lib.sh + +NSIM_SV_ID=$((256 + RANDOM % 256)) +NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID +NSIM_CL_ID=$((512 + RANDOM % 256)) +NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device +NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device +NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device + +SERVER_IP=192.168.1.1 +CLIENT_IP=192.168.1.2 +SERVER_PORT=48675 + +# busy poll config +MAX_EVENTS=8 +BUSY_POLL_USECS=0 +BUSY_POLL_BUDGET=16 +PREFER_BUSY_POLL=1 + +# IRQ deferral config +NAPI_DEFER_HARD_IRQS=100 +GRO_FLUSH_TIMEOUT=50000 +SUSPEND_TIMEOUT=20000000 + +NAPI_THREADED_MODE_BUSY_POLL=2 + +setup_ns() +{ + set -e + ip netns add nssv + ip netns add nscl + + NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_SV_SYS/net -exec basename {} \;) + NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_CL_SYS/net -exec basename {} \;) + + # ensure the server has 1 queue + ethtool -L $NSIM_SV_NAME combined 1 2>/dev/null + + ip link set $NSIM_SV_NAME netns nssv + ip link set $NSIM_CL_NAME netns nscl + + ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME + ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME + + ip netns exec nssv ip link set dev $NSIM_SV_NAME up + ip netns exec nscl ip link set dev $NSIM_CL_NAME up + + set +e +} + +cleanup_ns() +{ + ip netns del nscl + ip netns del nssv +} + +test_busypoll() +{ + suspend_value=${1:-0} + napi_threaded_value=${2:-0} + prefer_busy_poll_value=${3:-$PREFER_BUSY_POLL} + + tmp_file=$(mktemp) + out_file=$(mktemp) + + # fill a test file with random data + dd if=/dev/urandom of=${tmp_file} bs=1M count=1 2> /dev/null + + timeout -k 1s 30s ip netns exec nssv ./busy_poller \ + -p${SERVER_PORT} \ + -b${SERVER_IP} \ + -m${MAX_EVENTS} \ + -u${BUSY_POLL_USECS} \ + -P${prefer_busy_poll_value} \ + -g${BUSY_POLL_BUDGET} \ + -i${NSIM_SV_IFIDX} \ + -s${suspend_value} \ + -t${napi_threaded_value} \ + -o${out_file}& + + wait_local_port_listen nssv ${SERVER_PORT} tcp + + ip netns exec nscl socat -u $tmp_file TCP:${SERVER_IP}:${SERVER_PORT} + + wait + + tmp_file_md5sum=$(md5sum $tmp_file | cut -f1 -d' ') + out_file_md5sum=$(md5sum $out_file | cut -f1 -d' ') + + if [ "$tmp_file_md5sum" = "$out_file_md5sum" ]; then + res=0 + else + echo "md5sum mismatch" + echo "input file md5sum: ${tmp_file_md5sum}"; + echo "output file md5sum: ${out_file_md5sum}"; + res=1 + fi + + rm $out_file $tmp_file + + return $res +} + +test_busypoll_with_suspend() +{ + test_busypoll ${SUSPEND_TIMEOUT} + + return $? +} + +test_busypoll_with_napi_threaded() +{ + # Only enable napi threaded poll. Set suspend timeout and prefer busy + # poll to 0. + test_busypoll 0 ${NAPI_THREADED_MODE_BUSY_POLL} 0 + + return $? +} + +### +### Code start +### + +modprobe netdevsim + +# linking + +echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW +echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW +udevadm settle + +setup_ns + +NSIM_SV_FD=$((256 + RANDOM % 256)) +exec {NSIM_SV_FD}</var/run/netns/nssv +NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex) + +NSIM_CL_FD=$((256 + RANDOM % 256)) +exec {NSIM_CL_FD}</var/run/netns/nscl +NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex) + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \ + $NSIM_DEV_SYS_LINK + +if [ $? -ne 0 ]; then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup_ns + exit 1 +fi + +test_busypoll +if [ $? -ne 0 ]; then + echo "test_busypoll failed" + cleanup_ns + exit 1 +fi + +test_busypoll_with_suspend +if [ $? -ne 0 ]; then + echo "test_busypoll_with_suspend failed" + cleanup_ns + exit 1 +fi + +test_busypoll_with_napi_threaded +if [ $? -ne 0 ]; then + echo "test_busypoll_with_napi_threaded failed" + cleanup_ns + exit 1 +fi + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK + +echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL + +cleanup_ns + +modprobe -r netdevsim + +exit 0 diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c new file mode 100644 index 000000000000..3a81f9c94795 --- /dev/null +++ b/tools/testing/selftests/net/busy_poller.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <assert.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <inttypes.h> +#include <limits.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <ynl.h> + +#include <arpa/inet.h> +#include <netinet/in.h> + +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include <linux/genetlink.h> +#include <linux/netlink.h> + +#include "netdev-user.h" + +/* The below ifdef blob is required because: + * + * - sys/epoll.h does not (yet) have the ioctl definitions included. So, + * systems with older glibcs will not have them available. However, + * sys/epoll.h does include the type definition for epoll_data, which is + * needed by the user program (e.g. epoll_event.data.fd) + * + * - linux/eventpoll.h does not define the epoll_data type, it is simply an + * opaque __u64. It does, however, include the ioctl definition. + * + * Including both headers is impossible (types would be redefined), so I've + * opted instead to take sys/epoll.h, and include the blob below. + * + * Someday, when glibc is globally up to date, the blob below can be removed. + */ +#if !defined(EPOLL_IOC_TYPE) +struct epoll_params { + uint32_t busy_poll_usecs; + uint16_t busy_poll_budget; + uint8_t prefer_busy_poll; + + /* pad the struct to a multiple of 64bits */ + uint8_t __pad; +}; + +#define EPOLL_IOC_TYPE 0x8A +#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params) +#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) +#endif + +static uint16_t cfg_port = 8000; +static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY }; +static char *cfg_outfile; +static int cfg_max_events = 8; +static uint32_t cfg_ifindex; + +/* busy poll params */ +static uint32_t cfg_busy_poll_usecs; +static uint16_t cfg_busy_poll_budget; +static uint8_t cfg_prefer_busy_poll; + +/* NAPI params */ +static uint32_t cfg_defer_hard_irqs; +static uint64_t cfg_gro_flush_timeout; +static uint64_t cfg_irq_suspend_timeout; +static enum netdev_napi_threaded cfg_napi_threaded_poll = NETDEV_NAPI_THREADED_DISABLED; + +static void usage(const char *filepath) +{ + error(1, 0, + "Usage: %s -p<port> -b<addr> -m<max_events> -u<busy_poll_usecs> -P<prefer_busy_poll> -g<busy_poll_budget> -o<outfile> -d<defer_hard_irqs> -r<gro_flush_timeout> -s<irq_suspend_timeout> -t<napi_threaded_poll> -i<ifindex>", + filepath); +} + +static void parse_opts(int argc, char **argv) +{ + unsigned long long tmp; + int ret; + int c; + + if (argc <= 1) + usage(argv[0]); + + while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:t:")) != -1) { + /* most options take integer values, except o and b, so reduce + * code duplication a bit for the common case by calling + * strtoull here and leave bounds checking and casting per + * option below. + */ + if (c != 'o' && c != 'b') + tmp = strtoull(optarg, NULL, 0); + + switch (c) { + case 'u': + if (tmp == ULLONG_MAX || tmp > UINT32_MAX) + error(1, ERANGE, "busy_poll_usecs too large"); + + cfg_busy_poll_usecs = (uint32_t)tmp; + break; + case 'P': + if (tmp == ULLONG_MAX || tmp > 1) + error(1, ERANGE, + "prefer busy poll should be 0 or 1"); + + cfg_prefer_busy_poll = (uint8_t)tmp; + break; + case 'g': + if (tmp == ULLONG_MAX || tmp > UINT16_MAX) + error(1, ERANGE, + "busy poll budget must be [0, UINT16_MAX]"); + + cfg_busy_poll_budget = (uint16_t)tmp; + break; + case 'p': + if (tmp == ULLONG_MAX || tmp > UINT16_MAX) + error(1, ERANGE, "port must be <= 65535"); + + cfg_port = (uint16_t)tmp; + break; + case 'b': + ret = inet_aton(optarg, &cfg_bind_addr); + if (ret == 0) + error(1, errno, + "bind address %s invalid", optarg); + break; + case 'o': + cfg_outfile = strdup(optarg); + if (!cfg_outfile) + error(1, 0, "outfile invalid"); + break; + case 'm': + if (tmp == ULLONG_MAX || tmp > INT_MAX) + error(1, ERANGE, + "max events must be > 0 and <= INT_MAX"); + + cfg_max_events = (int)tmp; + break; + case 'd': + if (tmp == ULLONG_MAX || tmp > INT32_MAX) + error(1, ERANGE, + "defer_hard_irqs must be <= INT32_MAX"); + + cfg_defer_hard_irqs = (uint32_t)tmp; + break; + case 'r': + if (tmp == ULLONG_MAX || tmp > UINT64_MAX) + error(1, ERANGE, + "gro_flush_timeout must be < UINT64_MAX"); + + cfg_gro_flush_timeout = (uint64_t)tmp; + break; + case 's': + if (tmp == ULLONG_MAX || tmp > UINT64_MAX) + error(1, ERANGE, + "irq_suspend_timeout must be < ULLONG_MAX"); + + cfg_irq_suspend_timeout = (uint64_t)tmp; + break; + case 'i': + if (tmp == ULLONG_MAX || tmp > INT_MAX) + error(1, ERANGE, + "ifindex must be <= INT_MAX"); + + cfg_ifindex = (int)tmp; + break; + case 't': + if (tmp > 2) + error(1, ERANGE, "napi threaded poll value must be 0-2"); + + cfg_napi_threaded_poll = (enum netdev_napi_threaded)tmp; + break; + } + } + + if (!cfg_ifindex) + usage(argv[0]); + + if (optind != argc) + usage(argv[0]); +} + +static void epoll_ctl_add(int epfd, int fd, uint32_t events) +{ + struct epoll_event ev; + + ev.events = events; + ev.data.fd = fd; + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev) == -1) + error(1, errno, "epoll_ctl add fd: %d", fd); +} + +static void setnonblock(int sockfd) +{ + int flags; + + flags = fcntl(sockfd, F_GETFL, 0); + + if (fcntl(sockfd, F_SETFL, flags | O_NONBLOCK) == -1) + error(1, errno, "unable to set socket to nonblocking mode"); +} + +static void write_chunk(int fd, char *buf, ssize_t buflen) +{ + ssize_t remaining = buflen; + char *buf_offset = buf; + ssize_t writelen = 0; + ssize_t write_result; + + while (writelen < buflen) { + write_result = write(fd, buf_offset, remaining); + if (write_result == -1) + error(1, errno, "unable to write data to outfile"); + + writelen += write_result; + remaining -= write_result; + buf_offset += write_result; + } +} + +static void setup_queue(void) +{ + struct netdev_napi_get_list *napi_list = NULL; + struct netdev_napi_get_req_dump *req = NULL; + struct netdev_napi_set_req *set_req = NULL; + struct ynl_sock *ys; + struct ynl_error yerr; + uint32_t napi_id = 0; + + ys = ynl_sock_create(&ynl_netdev_family, &yerr); + if (!ys) + error(1, 0, "YNL: %s", yerr.msg); + + req = netdev_napi_get_req_dump_alloc(); + netdev_napi_get_req_dump_set_ifindex(req, cfg_ifindex); + napi_list = netdev_napi_get_dump(ys, req); + + /* assume there is 1 NAPI configured and take the first */ + if (napi_list->obj._present.id) + napi_id = napi_list->obj.id; + else + error(1, 0, "napi ID not present?"); + + set_req = netdev_napi_set_req_alloc(); + netdev_napi_set_req_set_id(set_req, napi_id); + netdev_napi_set_req_set_defer_hard_irqs(set_req, cfg_defer_hard_irqs); + netdev_napi_set_req_set_gro_flush_timeout(set_req, + cfg_gro_flush_timeout); + netdev_napi_set_req_set_irq_suspend_timeout(set_req, + cfg_irq_suspend_timeout); + + if (cfg_napi_threaded_poll) + netdev_napi_set_req_set_threaded(set_req, cfg_napi_threaded_poll); + + if (netdev_napi_set(ys, set_req)) + error(1, 0, "can't set NAPI params: %s\n", yerr.msg); + + netdev_napi_get_list_free(napi_list); + netdev_napi_get_req_dump_free(req); + netdev_napi_set_req_free(set_req); + ynl_sock_destroy(ys); +} + +static void run_poller(void) +{ + struct epoll_event events[cfg_max_events]; + struct epoll_params epoll_params = {0}; + struct sockaddr_in server_addr; + int i, epfd, nfds; + ssize_t readlen; + int outfile_fd; + char buf[1024]; + int sockfd; + int conn; + int val; + + outfile_fd = open(cfg_outfile, O_WRONLY | O_CREAT, 0644); + if (outfile_fd == -1) + error(1, errno, "unable to open outfile: %s", cfg_outfile); + + sockfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sockfd == -1) + error(1, errno, "unable to create listen socket"); + + server_addr.sin_family = AF_INET; + server_addr.sin_port = htons(cfg_port); + server_addr.sin_addr = cfg_bind_addr; + + /* these values are range checked during parse_opts, so casting is safe + * here + */ + epoll_params.busy_poll_usecs = cfg_busy_poll_usecs; + epoll_params.busy_poll_budget = cfg_busy_poll_budget; + epoll_params.prefer_busy_poll = cfg_prefer_busy_poll; + epoll_params.__pad = 0; + + val = 1; + if (setsockopt(sockfd, SOL_SOCKET, SO_REUSEADDR, &val, sizeof(val))) + error(1, errno, "poller setsockopt reuseaddr"); + + setnonblock(sockfd); + + if (bind(sockfd, (struct sockaddr *)&server_addr, + sizeof(struct sockaddr_in))) + error(0, errno, "poller bind to port: %d\n", cfg_port); + + if (listen(sockfd, 1)) + error(1, errno, "poller listen"); + + epfd = epoll_create1(0); + if (ioctl(epfd, EPIOCSPARAMS, &epoll_params) == -1) + error(1, errno, "unable to set busy poll params"); + + epoll_ctl_add(epfd, sockfd, EPOLLIN | EPOLLOUT | EPOLLET); + + for (;;) { + nfds = epoll_wait(epfd, events, cfg_max_events, -1); + for (i = 0; i < nfds; i++) { + if (events[i].data.fd == sockfd) { + conn = accept(sockfd, NULL, NULL); + if (conn == -1) + error(1, errno, + "accepting incoming connection failed"); + + setnonblock(conn); + epoll_ctl_add(epfd, conn, + EPOLLIN | EPOLLET | EPOLLRDHUP | + EPOLLHUP); + } else if (events[i].events & EPOLLIN) { + for (;;) { + readlen = read(events[i].data.fd, buf, + sizeof(buf)); + if (readlen > 0) + write_chunk(outfile_fd, buf, + readlen); + else + break; + } + } else { + /* spurious event ? */ + } + if (events[i].events & (EPOLLRDHUP | EPOLLHUP)) { + epoll_ctl(epfd, EPOLL_CTL_DEL, + events[i].data.fd, NULL); + close(events[i].data.fd); + close(outfile_fd); + return; + } + } + } +} + +int main(int argc, char *argv[]) +{ + parse_opts(argc, argv); + setup_queue(); + run_poller(); + + if (cfg_outfile) + free(cfg_outfile); + + return 0; +} diff --git a/tools/testing/selftests/net/can/.gitignore b/tools/testing/selftests/net/can/.gitignore new file mode 100644 index 000000000000..764a53fc837f --- /dev/null +++ b/tools/testing/selftests/net/can/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +test_raw_filter diff --git a/tools/testing/selftests/net/can/Makefile b/tools/testing/selftests/net/can/Makefile new file mode 100644 index 000000000000..5b82e60a03e7 --- /dev/null +++ b/tools/testing/selftests/net/can/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 + +top_srcdir = ../../../../.. + +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) + +TEST_PROGS := test_raw_filter.sh + +TEST_GEN_FILES := test_raw_filter + +include ../../lib.mk diff --git a/tools/testing/selftests/net/can/config b/tools/testing/selftests/net/can/config new file mode 100644 index 000000000000..188f79796670 --- /dev/null +++ b/tools/testing/selftests/net/can/config @@ -0,0 +1,3 @@ +CONFIG_CAN=m +CONFIG_CAN_DEV=m +CONFIG_CAN_VCAN=m diff --git a/tools/testing/selftests/net/can/test_raw_filter.c b/tools/testing/selftests/net/can/test_raw_filter.c new file mode 100644 index 000000000000..bb8ae8854273 --- /dev/null +++ b/tools/testing/selftests/net/can/test_raw_filter.c @@ -0,0 +1,405 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) +/* + * Copyright (c) 2011 Volkswagen Group Electronic Research + * All rights reserved. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> + +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <net/if.h> +#include <linux/if.h> + +#include <linux/can.h> +#include <linux/can/raw.h> + +#include "kselftest_harness.h" + +#define ID 0x123 + +char CANIF[IFNAMSIZ]; + +static int send_can_frames(int sock, int testcase) +{ + struct can_frame frame; + + frame.can_dlc = 1; + frame.data[0] = testcase; + + frame.can_id = ID; + if (write(sock, &frame, sizeof(frame)) < 0) + goto write_err; + + frame.can_id = (ID | CAN_RTR_FLAG); + if (write(sock, &frame, sizeof(frame)) < 0) + goto write_err; + + frame.can_id = (ID | CAN_EFF_FLAG); + if (write(sock, &frame, sizeof(frame)) < 0) + goto write_err; + + frame.can_id = (ID | CAN_EFF_FLAG | CAN_RTR_FLAG); + if (write(sock, &frame, sizeof(frame)) < 0) + goto write_err; + + return 0; + +write_err: + perror("write"); + return 1; +} + +FIXTURE(can_filters) { + int sock; +}; + +FIXTURE_SETUP(can_filters) +{ + struct sockaddr_can addr; + struct ifreq ifr; + int recv_own_msgs = 1; + int s, ret; + + s = socket(PF_CAN, SOCK_RAW, CAN_RAW); + ASSERT_GE(s, 0) + TH_LOG("failed to create CAN_RAW socket: %d", errno); + + strncpy(ifr.ifr_name, CANIF, sizeof(ifr.ifr_name)); + ret = ioctl(s, SIOCGIFINDEX, &ifr); + ASSERT_GE(ret, 0) + TH_LOG("failed SIOCGIFINDEX: %d", errno); + + addr.can_family = AF_CAN; + addr.can_ifindex = ifr.ifr_ifindex; + + setsockopt(s, SOL_CAN_RAW, CAN_RAW_RECV_OWN_MSGS, + &recv_own_msgs, sizeof(recv_own_msgs)); + + ret = bind(s, (struct sockaddr *)&addr, sizeof(addr)); + ASSERT_EQ(ret, 0) + TH_LOG("failed bind socket: %d", errno); + + self->sock = s; +} + +FIXTURE_TEARDOWN(can_filters) +{ + close(self->sock); +} + +FIXTURE_VARIANT(can_filters) { + int testcase; + canid_t id; + canid_t mask; + int exp_num_rx; + canid_t exp_flags[]; +}; + +/* Receive all frames when filtering for the ID in standard frame format */ +FIXTURE_VARIANT_ADD(can_filters, base) { + .testcase = 1, + .id = ID, + .mask = CAN_SFF_MASK, + .exp_num_rx = 4, + .exp_flags = { + 0, + CAN_RTR_FLAG, + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Ignore EFF flag in filter ID if not covered by filter mask */ +FIXTURE_VARIANT_ADD(can_filters, base_eff) { + .testcase = 2, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_SFF_MASK, + .exp_num_rx = 4, + .exp_flags = { + 0, + CAN_RTR_FLAG, + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Ignore RTR flag in filter ID if not covered by filter mask */ +FIXTURE_VARIANT_ADD(can_filters, base_rtr) { + .testcase = 3, + .id = ID | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK, + .exp_num_rx = 4, + .exp_flags = { + 0, + CAN_RTR_FLAG, + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Ignore EFF and RTR flags in filter ID if not covered by filter mask */ +FIXTURE_VARIANT_ADD(can_filters, base_effrtr) { + .testcase = 4, + .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK, + .exp_num_rx = 4, + .exp_flags = { + 0, + CAN_RTR_FLAG, + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only SFF frames when expecting no EFF flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_eff) { + .testcase = 5, + .id = ID, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG, + .exp_num_rx = 2, + .exp_flags = { + 0, + CAN_RTR_FLAG, + }, +}; + +/* Receive only EFF frames when filter id and filter mask include EFF flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_eff_eff) { + .testcase = 6, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG, + .exp_num_rx = 2, + .exp_flags = { + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only SFF frames when expecting no EFF flag, ignoring RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_eff_rtr) { + .testcase = 7, + .id = ID | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG, + .exp_num_rx = 2, + .exp_flags = { + 0, + CAN_RTR_FLAG, + }, +}; + +/* Receive only EFF frames when filter id and filter mask include EFF flag, + * ignoring RTR flag + */ +FIXTURE_VARIANT_ADD(can_filters, filter_eff_effrtr) { + .testcase = 8, + .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG, + .exp_num_rx = 2, + .exp_flags = { + CAN_EFF_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive no remote frames when filtering for no RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_rtr) { + .testcase = 9, + .id = ID, + .mask = CAN_SFF_MASK | CAN_RTR_FLAG, + .exp_num_rx = 2, + .exp_flags = { + 0, + CAN_EFF_FLAG, + }, +}; + +/* Receive no remote frames when filtering for no RTR flag, ignoring EFF flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_rtr_eff) { + .testcase = 10, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_SFF_MASK | CAN_RTR_FLAG, + .exp_num_rx = 2, + .exp_flags = { + 0, + CAN_EFF_FLAG, + }, +}; + +/* Receive only remote frames when filter includes RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_rtr_rtr) { + .testcase = 11, + .id = ID | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_RTR_FLAG, + .exp_num_rx = 2, + .exp_flags = { + CAN_RTR_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only remote frames when filter includes RTR flag, ignoring EFF + * flag + */ +FIXTURE_VARIANT_ADD(can_filters, filter_rtr_effrtr) { + .testcase = 12, + .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_RTR_FLAG, + .exp_num_rx = 2, + .exp_flags = { + CAN_RTR_FLAG, + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only SFF data frame when filtering for no flags */ +FIXTURE_VARIANT_ADD(can_filters, filter_effrtr) { + .testcase = 13, + .id = ID, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + 0, + }, +}; + +/* Receive only EFF data frame when filtering for EFF but no RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_eff) { + .testcase = 14, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + CAN_EFF_FLAG, + }, +}; + +/* Receive only SFF remote frame when filtering for RTR but no EFF flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_rtr) { + .testcase = 15, + .id = ID | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + CAN_RTR_FLAG, + }, +}; + +/* Receive only EFF remote frame when filtering for EFF and RTR flag */ +FIXTURE_VARIANT_ADD(can_filters, filter_effrtr_effrtr) { + .testcase = 16, + .id = ID | CAN_EFF_FLAG | CAN_RTR_FLAG, + .mask = CAN_SFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + CAN_EFF_FLAG | CAN_RTR_FLAG, + }, +}; + +/* Receive only SFF data frame when filtering for no EFF flag and no RTR flag + * but based on EFF mask + */ +FIXTURE_VARIANT_ADD(can_filters, eff) { + .testcase = 17, + .id = ID, + .mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + 0, + }, +}; + +/* Receive only EFF data frame when filtering for EFF flag and no RTR flag but + * based on EFF mask + */ +FIXTURE_VARIANT_ADD(can_filters, eff_eff) { + .testcase = 18, + .id = ID | CAN_EFF_FLAG, + .mask = CAN_EFF_MASK | CAN_EFF_FLAG | CAN_RTR_FLAG, + .exp_num_rx = 1, + .exp_flags = { + CAN_EFF_FLAG, + }, +}; + +/* This test verifies that the raw CAN filters work, by checking if only frames + * with the expected set of flags are received. For each test case, the given + * filter (id and mask) is added and four CAN frames are sent with every + * combination of set/unset EFF/RTR flags. + */ +TEST_F(can_filters, test_filter) +{ + struct can_filter rfilter; + int ret; + + rfilter.can_id = variant->id; + rfilter.can_mask = variant->mask; + setsockopt(self->sock, SOL_CAN_RAW, CAN_RAW_FILTER, + &rfilter, sizeof(rfilter)); + + TH_LOG("filters: can_id = 0x%08X can_mask = 0x%08X", + rfilter.can_id, rfilter.can_mask); + + ret = send_can_frames(self->sock, variant->testcase); + ASSERT_EQ(ret, 0) + TH_LOG("failed to send CAN frames"); + + for (int i = 0; i <= variant->exp_num_rx; i++) { + struct can_frame frame; + struct timeval tv = { + .tv_sec = 0, + .tv_usec = 50000, /* 50ms timeout */ + }; + fd_set rdfs; + + FD_ZERO(&rdfs); + FD_SET(self->sock, &rdfs); + + ret = select(self->sock + 1, &rdfs, NULL, NULL, &tv); + ASSERT_GE(ret, 0) + TH_LOG("failed select for frame %d, err: %d)", i, errno); + + ret = FD_ISSET(self->sock, &rdfs); + if (i == variant->exp_num_rx) { + ASSERT_EQ(ret, 0) + TH_LOG("too many frames received"); + } else { + ASSERT_NE(ret, 0) + TH_LOG("too few frames received"); + + ret = read(self->sock, &frame, sizeof(frame)); + ASSERT_GE(ret, 0) + TH_LOG("failed to read frame %d, err: %d", i, errno); + + TH_LOG("rx: can_id = 0x%08X rx = %d", frame.can_id, i); + + ASSERT_EQ(ID, frame.can_id & CAN_SFF_MASK) + TH_LOG("received wrong can_id"); + ASSERT_EQ(variant->testcase, frame.data[0]) + TH_LOG("received wrong test case"); + + ASSERT_EQ(frame.can_id & ~CAN_ERR_MASK, + variant->exp_flags[i]) + TH_LOG("received unexpected flags"); + } + } +} + +int main(int argc, char **argv) +{ + char *ifname = getenv("CANIF"); + + if (!ifname) { + printf("CANIF environment variable must contain the test interface\n"); + return KSFT_FAIL; + } + + strncpy(CANIF, ifname, sizeof(CANIF) - 1); + + return test_harness_run(argc, argv); +} diff --git a/tools/testing/selftests/net/can/test_raw_filter.sh b/tools/testing/selftests/net/can/test_raw_filter.sh new file mode 100755 index 000000000000..276d6c06ac95 --- /dev/null +++ b/tools/testing/selftests/net/can/test_raw_filter.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + test_raw_filter +" + +net_dir=$(dirname $0)/.. +source $net_dir/lib.sh + +export CANIF=${CANIF:-"vcan0"} +BITRATE=${BITRATE:-500000} + +setup() +{ + if [[ $CANIF == vcan* ]]; then + ip link add name $CANIF type vcan || exit $ksft_skip + else + ip link set dev $CANIF type can bitrate $BITRATE || exit $ksft_skip + fi + ip link set dev $CANIF up + pwd +} + +cleanup() +{ + ip link set dev $CANIF down + if [[ $CANIF == vcan* ]]; then + ip link delete $CANIF + fi +} + +test_raw_filter() +{ + ./test_raw_filter + check_err $? + log_test "test_raw_filter" +} + +trap cleanup EXIT +setup + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/cmsg_ip.sh b/tools/testing/selftests/net/cmsg_ip.sh new file mode 100755 index 000000000000..b55680e081ad --- /dev/null +++ b/tools/testing/selftests/net/cmsg_ip.sh @@ -0,0 +1,187 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +IP4=172.16.0.1/24 +TGT4=172.16.0.2 +IP6=2001:db8:1::1/64 +TGT6=2001:db8:1::2 +TMPF=$(mktemp --suffix ".pcap") + +cleanup() +{ + rm -f $TMPF + cleanup_ns $NS +} + +trap cleanup EXIT + +tcpdump -h | grep immediate-mode >> /dev/null +if [ $? -ne 0 ]; then + echo "SKIP - tcpdump with --immediate-mode option required" + exit $ksft_skip +fi + +# Namespaces +setup_ns NS +NSEXE="ip netns exec $NS" + +$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null + +# Connectivity +ip -netns $NS link add type dummy +ip -netns $NS link set dev dummy0 up +ip -netns $NS addr add $IP4 dev dummy0 +ip -netns $NS addr add $IP6 dev dummy0 + +# Test +BAD=0 +TOTAL=0 + +check_result() { + ((TOTAL++)) + if [ $1 -ne $2 ]; then + echo " Case $3 returned $1, expected $2" + ((BAD++)) + fi +} + +# IPV6_DONTFRAG +for ovr in setsock cmsg both diff; do + for df in 0 1; do + for p in u U i r; do + [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP + [ $p == "i" ] && prot=ICMP + [ $p == "r" ] && prot=RAW + + [ $ovr == "setsock" ] && m="-F $df" + [ $ovr == "cmsg" ] && m="-f $df" + [ $ovr == "both" ] && m="-F $df -f $df" + [ $ovr == "diff" ] && m="-F $((1 - df)) -f $df" + + $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234 + check_result $? $df "DONTFRAG $prot $ovr" + done + done +done + +# IP_TOS + IPV6_TCLASS + +test_dscp() { + local -r IPVER=$1 + local -r TGT=$2 + local -r MATCH=$3 + + local -r TOS=0x10 + local -r TOS2=0x20 + local -r ECN=0x3 + + ip $IPVER -netns $NS rule add tos $TOS lookup 300 + ip $IPVER -netns $NS route add table 300 prohibit any + + for ovr in setsock cmsg both diff; do + for p in u U i r; do + [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP + [ $p == "i" ] && prot=ICMP + [ $p == "r" ] && prot=RAW + + [ $ovr == "setsock" ] && m="-C" + [ $ovr == "cmsg" ] && m="-c" + [ $ovr == "both" ] && m="-C $((TOS2)) -c" + [ $ovr == "diff" ] && m="-C $((TOS )) -c" + + $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & + BG=$! + sleep 0.05 + + $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS2)) $TGT 1234 + check_result $? 0 "$MATCH $prot $ovr - pass" + + while [ -d /proc/$BG ]; do + $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS2)) $TGT 1234 + done + + tcpdump -r $TMPF -v 2>&1 | grep "$MATCH $TOS2" >> /dev/null + check_result $? 0 "$MATCH $prot $ovr - packet data" + rm $TMPF + + [ $ovr == "both" ] && m="-C $((TOS )) -c" + [ $ovr == "diff" ] && m="-C $((TOS2)) -c" + + # Match prohibit rule: expect failure + $NSEXE ./cmsg_sender $IPVER -p $p $m $((TOS)) -s $TGT 1234 + check_result $? 1 "$MATCH $prot $ovr - rejection" + + # Match prohibit rule: IPv4 masks ECN: expect failure + if [[ "$IPVER" == "-4" ]]; then + $NSEXE ./cmsg_sender $IPVER -p $p $m "$((TOS | ECN))" -s $TGT 1234 + check_result $? 1 "$MATCH $prot $ovr - rejection (ECN)" + fi + done + done +} + +test_dscp -4 $TGT4 tos +test_dscp -6 $TGT6 class + +# IP_TTL + IPV6_HOPLIMIT +test_ttl_hoplimit() { + local -r IPVER=$1 + local -r TGT=$2 + local -r MATCH=$3 + + local -r LIM=4 + + for ovr in setsock cmsg both diff; do + for p in u U i r; do + [ $p == "u" ] && prot=UDP + [ $p == "U" ] && prot=UDP + [ $p == "i" ] && prot=ICMP + [ $p == "r" ] && prot=RAW + + [ $ovr == "setsock" ] && m="-L" + [ $ovr == "cmsg" ] && m="-l" + [ $ovr == "both" ] && m="-L $LIM -l" + [ $ovr == "diff" ] && m="-L $((LIM + 1)) -l" + + $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & + BG=$! + sleep 0.05 + + $NSEXE ./cmsg_sender $IPVER -p $p $m $LIM $TGT 1234 + check_result $? 0 "$MATCH $prot $ovr - pass" + + while [ -d /proc/$BG ]; do + $NSEXE ./cmsg_sender $IPVER -p $p $m $LIM $TGT 1234 + done + + tcpdump -r $TMPF -v 2>&1 | grep "$MATCH $LIM[^0-9]" >> /dev/null + check_result $? 0 "$MATCH $prot $ovr - packet data" + rm $TMPF + done + done +} + +test_ttl_hoplimit -4 $TGT4 ttl +test_ttl_hoplimit -6 $TGT6 hlim + +# IPV6 exthdr +for p in u U i r; do + # Very basic "does it crash" test + for h in h d r; do + $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234 + check_result $? 0 "ExtHdr $prot $ovr - pass" + done +done + +# Summary +if [ $BAD -ne 0 ]; then + echo "FAIL - $BAD/$TOTAL cases failed" + exit 1 +else + echo "OK" + exit 0 +fi diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh deleted file mode 100755 index 8bc23fb4c82b..000000000000 --- a/tools/testing/selftests/net/cmsg_ipv6.sh +++ /dev/null @@ -1,154 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -source lib.sh - -IP6=2001:db8:1::1/64 -TGT6=2001:db8:1::2 -TMPF=$(mktemp --suffix ".pcap") - -cleanup() -{ - rm -f $TMPF - cleanup_ns $NS -} - -trap cleanup EXIT - -tcpdump -h | grep immediate-mode >> /dev/null -if [ $? -ne 0 ]; then - echo "SKIP - tcpdump with --immediate-mode option required" - exit $ksft_skip -fi - -# Namespaces -setup_ns NS -NSEXE="ip netns exec $NS" - -$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null - -# Connectivity -ip -netns $NS link add type dummy -ip -netns $NS link set dev dummy0 up -ip -netns $NS addr add $IP6 dev dummy0 - -# Test -BAD=0 -TOTAL=0 - -check_result() { - ((TOTAL++)) - if [ $1 -ne $2 ]; then - echo " Case $3 returned $1, expected $2" - ((BAD++)) - fi -} - -# IPV6_DONTFRAG -for ovr in setsock cmsg both diff; do - for df in 0 1; do - for p in u i r; do - [ $p == "u" ] && prot=UDP - [ $p == "i" ] && prot=ICMP - [ $p == "r" ] && prot=RAW - - [ $ovr == "setsock" ] && m="-F $df" - [ $ovr == "cmsg" ] && m="-f $df" - [ $ovr == "both" ] && m="-F $df -f $df" - [ $ovr == "diff" ] && m="-F $((1 - df)) -f $df" - - $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234 - check_result $? $df "DONTFRAG $prot $ovr" - done - done -done - -# IPV6_TCLASS -TOS=0x10 -TOS2=0x20 - -ip -6 -netns $NS rule add tos $TOS lookup 300 -ip -6 -netns $NS route add table 300 prohibit any - -for ovr in setsock cmsg both diff; do - for p in u i r; do - [ $p == "u" ] && prot=UDP - [ $p == "i" ] && prot=ICMP - [ $p == "r" ] && prot=RAW - - [ $ovr == "setsock" ] && m="-C" - [ $ovr == "cmsg" ] && m="-c" - [ $ovr == "both" ] && m="-C $((TOS2)) -c" - [ $ovr == "diff" ] && m="-C $((TOS )) -c" - - $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & - BG=$! - sleep 0.05 - - $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234 - check_result $? 0 "TCLASS $prot $ovr - pass" - - while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234 - done - - tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null - check_result $? 0 "TCLASS $prot $ovr - packet data" - rm $TMPF - - [ $ovr == "both" ] && m="-C $((TOS )) -c" - [ $ovr == "diff" ] && m="-C $((TOS2)) -c" - - $NSEXE ./cmsg_sender -6 -p $p $m $((TOS)) -s $TGT6 1234 - check_result $? 1 "TCLASS $prot $ovr - rejection" - done -done - -# IPV6_HOPLIMIT -LIM=4 - -for ovr in setsock cmsg both diff; do - for p in u i r; do - [ $p == "u" ] && prot=UDP - [ $p == "i" ] && prot=ICMP - [ $p == "r" ] && prot=RAW - - [ $ovr == "setsock" ] && m="-L" - [ $ovr == "cmsg" ] && m="-l" - [ $ovr == "both" ] && m="-L $LIM -l" - [ $ovr == "diff" ] && m="-L $((LIM + 1)) -l" - - $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null & - BG=$! - sleep 0.05 - - $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234 - check_result $? 0 "HOPLIMIT $prot $ovr - pass" - - while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234 - done - - tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null - check_result $? 0 "HOPLIMIT $prot $ovr - packet data" - rm $TMPF - done -done - -# IPV6 exthdr -for p in u i r; do - # Very basic "does it crash" test - for h in h d r; do - $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234 - check_result $? 0 "ExtHdr $prot $ovr - pass" - done -done - -# Summary -if [ $BAD -ne 0 ]; then - echo "FAIL - $BAD/$TOTAL cases failed" - exit 1 -else - echo "OK" - exit 0 -fi diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 876c2db02a63..67a72b1a2f3d 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -16,7 +16,7 @@ #include <linux/udp.h> #include <sys/socket.h> -#include "../kselftest.h" +#include "kselftest.h" enum { ERN_SUCCESS = 0, @@ -33,6 +33,7 @@ enum { ERN_RECVERR, ERN_CMSG_RD, ERN_CMSG_RCV, + ERN_SEND_MORE, }; struct option_cmsg_u32 { @@ -46,6 +47,7 @@ struct options { const char *service; unsigned int size; unsigned int num_pkt; + bool msg_more; struct { unsigned int mark; unsigned int dontfrag; @@ -59,6 +61,7 @@ struct options { unsigned int proto; } sock; struct option_cmsg_u32 mark; + struct option_cmsg_u32 priority; struct { bool ena; unsigned int delay; @@ -71,7 +74,7 @@ struct options { struct option_cmsg_u32 tclass; struct option_cmsg_u32 hlimit; struct option_cmsg_u32 exthdr; - } v6; + } cmsg; } opt = { .size = 13, .num_pkt = 1, @@ -93,21 +96,24 @@ static void __attribute__((noreturn)) cs_usage(const char *bin) "\t\t-S send() size\n" "\t\t-4/-6 Force IPv4 / IPv6 only\n" "\t\t-p prot Socket protocol\n" - "\t\t (u = UDP (default); i = ICMP; r = RAW)\n" + "\t\t (u = UDP (default); i = ICMP; r = RAW;\n" + "\t\t U = UDP with MSG_MORE)\n" "\n" "\t\t-m val Set SO_MARK with given value\n" "\t\t-M val Set SO_MARK via setsockopt\n" + "\t\t-P val Set SO_PRIORITY via setsockopt\n" + "\t\t-Q val Set SO_PRIORITY via cmsg\n" "\t\t-d val Set SO_TXTIME with given delay (usec)\n" "\t\t-t Enable time stamp reporting\n" "\t\t-f val Set don't fragment via cmsg\n" "\t\t-F val Set don't fragment via setsockopt\n" - "\t\t-c val Set TCLASS via cmsg\n" - "\t\t-C val Set TCLASS via setsockopt\n" - "\t\t-l val Set HOPLIMIT via cmsg\n" - "\t\t-L val Set HOPLIMIT via setsockopt\n" + "\t\t-c val Set TOS/TCLASS via cmsg\n" + "\t\t-C val Set TOS/TCLASS via setsockopt\n" + "\t\t-l val Set TTL/HOPLIMIT via cmsg\n" + "\t\t-L val Set TTL/HOPLIMIT via setsockopt\n" "\t\t-H type Add an IPv6 header option\n" - "\t\t (h = HOP; d = DST; r = RTDST)" - ""); + "\t\t (h = HOP; d = DST; r = RTDST)\n" + "\n"); exit(ERN_HELP); } @@ -115,7 +121,7 @@ static void cs_parse_args(int argc, char *argv[]) { int o; - while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) { + while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:Q:")) != -1) { switch (o) { case 's': opt.silent_send = true; @@ -130,8 +136,11 @@ static void cs_parse_args(int argc, char *argv[]) opt.sock.family = AF_INET6; break; case 'p': - if (*optarg == 'u' || *optarg == 'U') { + if (*optarg == 'u') { opt.sock.proto = IPPROTO_UDP; + } else if (*optarg == 'U') { + opt.sock.proto = IPPROTO_UDP; + opt.msg_more = true; } else if (*optarg == 'i' || *optarg == 'I') { opt.sock.proto = IPPROTO_ICMP; } else if (*optarg == 'r') { @@ -148,6 +157,10 @@ static void cs_parse_args(int argc, char *argv[]) opt.mark.ena = true; opt.mark.val = atoi(optarg); break; + case 'Q': + opt.priority.ena = true; + opt.priority.val = atoi(optarg); + break; case 'M': opt.sockopt.mark = atoi(optarg); break; @@ -162,37 +175,37 @@ static void cs_parse_args(int argc, char *argv[]) opt.ts.ena = true; break; case 'f': - opt.v6.dontfrag.ena = true; - opt.v6.dontfrag.val = atoi(optarg); + opt.cmsg.dontfrag.ena = true; + opt.cmsg.dontfrag.val = atoi(optarg); break; case 'F': opt.sockopt.dontfrag = atoi(optarg); break; case 'c': - opt.v6.tclass.ena = true; - opt.v6.tclass.val = atoi(optarg); + opt.cmsg.tclass.ena = true; + opt.cmsg.tclass.val = atoi(optarg); break; case 'C': opt.sockopt.tclass = atoi(optarg); break; case 'l': - opt.v6.hlimit.ena = true; - opt.v6.hlimit.val = atoi(optarg); + opt.cmsg.hlimit.ena = true; + opt.cmsg.hlimit.val = atoi(optarg); break; case 'L': opt.sockopt.hlimit = atoi(optarg); break; case 'H': - opt.v6.exthdr.ena = true; + opt.cmsg.exthdr.ena = true; switch (optarg[0]) { case 'h': - opt.v6.exthdr.val = IPV6_HOPOPTS; + opt.cmsg.exthdr.val = IPV6_HOPOPTS; break; case 'd': - opt.v6.exthdr.val = IPV6_DSTOPTS; + opt.cmsg.exthdr.val = IPV6_DSTOPTS; break; case 'r': - opt.v6.exthdr.val = IPV6_RTHDRDSTOPTS; + opt.cmsg.exthdr.val = IPV6_RTHDRDSTOPTS; break; default: printf("Error: hdr type: %s\n", optarg); @@ -253,11 +266,21 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, SOL_SOCKET, SO_MARK, &opt.mark); ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, - SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag); - ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, - SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass); - ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, - SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit); + SOL_SOCKET, SO_PRIORITY, &opt.priority); + + if (opt.sock.family == AF_INET) { + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IP, IP_TOS, &opt.cmsg.tclass); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IP, IP_TTL, &opt.cmsg.hlimit); + } else { + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IPV6, IPV6_DONTFRAG, &opt.cmsg.dontfrag); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IPV6, IPV6_TCLASS, &opt.cmsg.tclass); + ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len, + SOL_IPV6, IPV6_HOPLIMIT, &opt.cmsg.hlimit); + } if (opt.txtime.ena) { __u64 txtime; @@ -288,14 +311,14 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) *(__u32 *)CMSG_DATA(cmsg) = SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE; } - if (opt.v6.exthdr.ena) { + if (opt.cmsg.exthdr.ena) { cmsg = (struct cmsghdr *)(cbuf + cmsg_len); cmsg_len += CMSG_SPACE(8); if (cbuf_sz < cmsg_len) error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small"); cmsg->cmsg_level = SOL_IPV6; - cmsg->cmsg_type = opt.v6.exthdr.val; + cmsg->cmsg_type = opt.cmsg.exthdr.val; cmsg->cmsg_len = CMSG_LEN(8); *(__u64 *)CMSG_DATA(cmsg) = 0; } @@ -396,23 +419,35 @@ static void ca_set_sockopts(int fd) setsockopt(fd, SOL_SOCKET, SO_MARK, &opt.sockopt.mark, sizeof(opt.sockopt.mark))) error(ERN_SOCKOPT, errno, "setsockopt SO_MARK"); - if (opt.sockopt.dontfrag && - setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG, - &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag))) - error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG"); - if (opt.sockopt.tclass && - setsockopt(fd, SOL_IPV6, IPV6_TCLASS, - &opt.sockopt.tclass, sizeof(opt.sockopt.tclass))) - error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS"); - if (opt.sockopt.hlimit && - setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, - &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) - error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT"); if (opt.sockopt.priority && setsockopt(fd, SOL_SOCKET, SO_PRIORITY, &opt.sockopt.priority, sizeof(opt.sockopt.priority))) error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY"); + if (opt.sock.family == AF_INET) { + if (opt.sockopt.tclass && + setsockopt(fd, SOL_IP, IP_TOS, + &opt.sockopt.tclass, sizeof(opt.sockopt.tclass))) + error(ERN_SOCKOPT, errno, "setsockopt IP_TOS"); + if (opt.sockopt.hlimit && + setsockopt(fd, SOL_IP, IP_TTL, + &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) + error(ERN_SOCKOPT, errno, "setsockopt IP_TTL"); + } else { + if (opt.sockopt.dontfrag && + setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG, + &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag))) + error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG"); + if (opt.sockopt.tclass && + setsockopt(fd, SOL_IPV6, IPV6_TCLASS, + &opt.sockopt.tclass, sizeof(opt.sockopt.tclass))) + error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS"); + if (opt.sockopt.hlimit && + setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, + &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) + error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT"); + } + if (opt.txtime.ena) { struct sock_txtime so_txtime = { .clockid = CLOCK_MONOTONIC, @@ -456,7 +491,8 @@ int main(int argc, char *argv[]) if (err) { fprintf(stderr, "Can't resolve address [%s]:%s\n", opt.host, opt.service); - return ERN_SOCK_CREATE; + err = ERN_SOCK_CREATE; + goto err_free_buff; } if (ai->ai_family == AF_INET6 && opt.sock.proto == IPPROTO_ICMP) @@ -465,8 +501,8 @@ int main(int argc, char *argv[]) fd = socket(ai->ai_family, opt.sock.type, opt.sock.proto); if (fd < 0) { fprintf(stderr, "Can't open socket: %s\n", strerror(errno)); - freeaddrinfo(ai); - return ERN_RESOLVE; + err = ERN_RESOLVE; + goto err_free_info; } if (opt.sock.proto == IPPROTO_ICMP) { @@ -502,7 +538,7 @@ int main(int argc, char *argv[]) cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf)); for (i = 0; i < opt.num_pkt; i++) { - err = sendmsg(fd, &msg, 0); + err = sendmsg(fd, &msg, opt.msg_more ? MSG_MORE : 0); if (err < 0) { if (!opt.silent_send) fprintf(stderr, "send failed: %s\n", strerror(errno)); @@ -513,6 +549,14 @@ int main(int argc, char *argv[]) err = ERN_SEND_SHORT; goto err_out; } + if (opt.msg_more) { + err = write(fd, NULL, 0); + if (err < 0) { + fprintf(stderr, "send more: %s\n", strerror(errno)); + err = ERN_SEND_MORE; + goto err_out; + } + } } err = ERN_SUCCESS; @@ -531,6 +575,9 @@ int main(int argc, char *argv[]) err_out: close(fd); +err_free_info: freeaddrinfo(ai); +err_free_buff: + free(buf); return err; } diff --git a/tools/testing/selftests/net/cmsg_so_priority.sh b/tools/testing/selftests/net/cmsg_so_priority.sh new file mode 100755 index 000000000000..ee07d8653262 --- /dev/null +++ b/tools/testing/selftests/net/cmsg_so_priority.sh @@ -0,0 +1,151 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +readonly KSFT_SKIP=4 + +IP4=192.0.2.1/24 +TGT4=192.0.2.2 +TGT4_RAW=192.0.2.3 +IP6=2001:db8::1/64 +TGT6=2001:db8::2 +TGT6_RAW=2001:db8::3 +PORT=1234 +TOTAL_TESTS=0 +FAILED_TESTS=0 + +if ! command -v jq &> /dev/null; then + echo "SKIP cmsg_so_priroity.sh test: jq is not installed." >&2 + exit "$KSFT_SKIP" +fi + +check_result() { + ((TOTAL_TESTS++)) + if [ "$1" -ne 0 ]; then + ((FAILED_TESTS++)) + fi +} + +cleanup() +{ + cleanup_ns $NS +} + +trap cleanup EXIT + +setup_ns NS + +create_filter() { + local handle=$1 + local vlan_prio=$2 + local ip_type=$3 + local proto=$4 + local dst_ip=$5 + local ip_proto + + if [[ "$proto" == "u" ]]; then + ip_proto="udp" + elif [[ "$ip_type" == "ipv4" && "$proto" == "i" ]]; then + ip_proto="icmp" + elif [[ "$ip_type" == "ipv6" && "$proto" == "i" ]]; then + ip_proto="icmpv6" + fi + + tc -n $NS filter add dev dummy1 \ + egress pref 1 handle "$handle" proto 802.1q \ + flower vlan_prio "$vlan_prio" vlan_ethtype "$ip_type" \ + dst_ip "$dst_ip" ${ip_proto:+ip_proto $ip_proto} \ + action pass +} + +ip -n $NS link set dev lo up +ip -n $NS link add name dummy1 up type dummy + +ip -n $NS link add link dummy1 name dummy1.10 up type vlan id 10 \ + egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 + +ip -n $NS address add $IP4 dev dummy1.10 +ip -n $NS address add $IP6 dev dummy1.10 nodad + +ip netns exec $NS sysctl -wq net.ipv4.ping_group_range='0 2147483647' + +ip -n $NS neigh add $TGT4 lladdr 00:11:22:33:44:55 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT6 lladdr 00:11:22:33:44:55 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT4_RAW lladdr 00:11:22:33:44:66 nud permanent \ + dev dummy1.10 +ip -n $NS neigh add $TGT6_RAW lladdr 00:11:22:33:44:66 nud permanent \ + dev dummy1.10 + +tc -n $NS qdisc add dev dummy1 clsact + +FILTER_COUNTER=10 + +for i in 4 6; do + for proto in u i r; do + echo "Test IPV$i, prot: $proto" + for priority in {0..7}; do + if [[ $i == 4 && $proto == "r" ]]; then + TGT=$TGT4_RAW + elif [[ $i == 6 && $proto == "r" ]]; then + TGT=$TGT6_RAW + elif [ $i == 4 ]; then + TGT=$TGT4 + else + TGT=$TGT6 + fi + + handle="${FILTER_COUNTER}${priority}" + + create_filter $handle $priority ipv$i $proto $TGT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + + if [[ $pkts == 0 ]]; then + check_result 0 + else + echo "prio $priority: expected 0, got $pkts" + check_result 1 + fi + + ip netns exec $NS ./cmsg_sender -$i -Q $priority \ + -p $proto $TGT $PORT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + if [[ $pkts == 1 ]]; then + check_result 0 + else + echo "prio $priority -Q: expected 1, got $pkts" + check_result 1 + fi + + ip netns exec $NS ./cmsg_sender -$i -P $priority \ + -p $proto $TGT $PORT + + pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \ + | jq ".[] | select(.options.handle == ${handle}) | \ + .options.actions[0].stats.packets") + if [[ $pkts == 2 ]]; then + check_result 0 + else + echo "prio $priority -P: expected 2, got $pkts" + check_result 1 + fi + done + FILTER_COUNTER=$((FILTER_COUNTER + 10)) + done +done + +if [ $FAILED_TESTS -ne 0 ]; then + echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed" + exit 1 +else + echo "OK - All $TOTAL_TESTS tests passed" + exit 0 +fi diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh index 1d7e756644bc..478af0aefa97 100755 --- a/tools/testing/selftests/net/cmsg_time.sh +++ b/tools/testing/selftests/net/cmsg_time.sh @@ -34,13 +34,28 @@ BAD=0 TOTAL=0 check_result() { + local ret=$1 + local got=$2 + local exp=$3 + local case=$4 + local xfail=$5 + local xf= + local inc= + + if [ "$xfail" == "xfail" ]; then + xf="(XFAIL)" + inc=0 + else + inc=1 + fi + ((TOTAL++)) - if [ $1 -ne 0 ]; then - echo " Case $4 returned $1, expected 0" - ((BAD++)) + if [ $ret -ne 0 ]; then + echo " Case $case returned $ret, expected 0 $xf" + ((BAD+=inc)) elif [ "$2" != "$3" ]; then - echo " Case $4 returned '$2', expected '$3'" - ((BAD++)) + echo " Case $case returned '$got', expected '$exp' $xf" + ((BAD+=inc)) fi } @@ -66,14 +81,14 @@ for i in "-4 $TGT4" "-6 $TGT6"; do awk '/SND/ { if ($3 > 1000) print "OK"; }') check_result $? "$ts" "OK" "$prot - TXTIME abs" - [ "$KSFT_MACHINE_SLOW" = yes ] && delay=8000 || delay=1000 + [ "$KSFT_MACHINE_SLOW" = yes ] && xfail=xfail - ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d $delay | + ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 | awk '/SND/ {snd=$3} /SCHED/ {sch=$3} - END { if (snd - sch > '$((delay/2))') print "OK"; - else print snd, "-", sch, "<", '$((delay/2))'; }') - check_result $? "$ts" "OK" "$prot - TXTIME rel" + END { if (snd - sch > 500) print "OK"; + else print snd, "-", sch, "<", 500; }') + check_result $? "$ts" "OK" "$prot - TXTIME rel" $xfail done done diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 5b9baf708950..1e1f253118f5 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -1,109 +1,130 @@ -CONFIG_USER_NS=y -CONFIG_NET_NS=y +CONFIG_AMT=m +CONFIG_BAREUDP=m CONFIG_BONDING=m CONFIG_BPF_SYSCALL=y -CONFIG_TEST_BPF=m -CONFIG_NUMA=y -CONFIG_RPS=y -CONFIG_SYSFS=y -CONFIG_PROC_SYSCTL=y -CONFIG_NET_VRF=y -CONFIG_NET_L3_MASTER_DEV=y -CONFIG_IPV6=y -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_VETH=y -CONFIG_NET_IPVTI=y -CONFIG_IPV6_VTI=y -CONFIG_DUMMY=y -CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_BRIDGE=y +CONFIG_BRIDGE_VLAN_FILTERING=y +CONFIG_CAN=m +CONFIG_CAN_DEV=m +CONFIG_CAN_VXCAN=m +CONFIG_CRYPTO_ARIA=y CONFIG_CRYPTO_CHACHA20POLY1305=m -CONFIG_VLAN_8021Q=y +CONFIG_CRYPTO_SHA1=y +CONFIG_CRYPTO_SM4_GENERIC=y +CONFIG_DEBUG_INFO_BTF=y +CONFIG_DEBUG_INFO_BTF_MODULES=n +CONFIG_DUMMY=y CONFIG_GENEVE=m CONFIG_IFB=y CONFIG_INET_DIAG=y CONFIG_INET_ESP=y CONFIG_INET_ESP_OFFLOAD=y -CONFIG_NET_FOU=y -CONFIG_NET_FOU_IP_TUNNELS=y -CONFIG_NETFILTER=y -CONFIG_NETFILTER_ADVANCED=y -CONFIG_NF_CONNTRACK=m -CONFIG_IPV6_MROUTE=y -CONFIG_IPV6_SIT=y -CONFIG_IP_DCCP=m -CONFIG_NF_NAT=m +CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP_NF_IPTABLES=m +CONFIG_IP6_NF_IPTABLES_LEGACY=m +CONFIG_IP6_NF_MANGLE=m +CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_RAW=m +CONFIG_IP6_NF_TARGET_REJECT=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_IPTABLES_LEGACY=m +CONFIG_IP_NF_MANGLE=m +CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP_NF_NAT=m CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IP_SCTP=m +CONFIG_IPV6=y CONFIG_IPV6_GRE=m +CONFIG_IPV6_ILA=m +CONFIG_IPV6_IOAM6_LWTUNNEL=y +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6_RPL_LWTUNNEL=y CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_IPV6_SIT=y +CONFIG_IPV6_VTI=y +CONFIG_IPVLAN=m +CONFIG_KALLSYMS=y +CONFIG_L2TP=m CONFIG_L2TP_ETH=m CONFIG_L2TP_IP=m -CONFIG_L2TP=m CONFIG_L2TP_V3=y CONFIG_MACSEC=m CONFIG_MACVLAN=y CONFIG_MACVTAP=y CONFIG_MPLS=y +CONFIG_MPLS_IPTUNNEL=m +CONFIG_MPLS_ROUTING=m CONFIG_MPTCP=y -CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_IPV6=y -CONFIG_NF_TABLES_IPV4=y -CONFIG_NFT_NAT=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_GACT=m +CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_FLOWER=m CONFIG_NET_CLS_MATCHALL=m CONFIG_NET_CLS_U32=m -CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NETDEVSIM=m +CONFIG_NET_DROP_MONITOR=m +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_XTABLES_LEGACY=y +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_POLICY=m +CONFIG_NETFILTER_XT_NAT=m +CONFIG_NETFILTER_XT_TARGET_HL=m +CONFIG_NET_FOU=y +CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPIP=y +CONFIG_NET_IPVTI=y +CONFIG_NETKIT=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_NS=y +CONFIG_NET_PKTGEN=m +CONFIG_NET_SCH_ETF=m +CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_FQ_CODEL=m CONFIG_NET_SCH_HTB=m -CONFIG_NET_SCH_FQ=m -CONFIG_NET_SCH_ETF=m +CONFIG_NET_SCH_INGRESS=m CONFIG_NET_SCH_NETEM=y CONFIG_NET_SCH_PRIO=m -CONFIG_NFT_COMPAT=m +CONFIG_NET_VRF=y +CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_OVS=y CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_NAT=m +CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_IPV4=y +CONFIG_NF_TABLES_IPV6=y +CONFIG_NFT_COMPAT=m +CONFIG_NFT_NAT=m +CONFIG_NUMA=y CONFIG_OPENVSWITCH=m CONFIG_OPENVSWITCH_GENEVE=m CONFIG_OPENVSWITCH_GRE=m CONFIG_OPENVSWITCH_VXLAN=m +CONFIG_PROC_SYSCTL=y CONFIG_PSAMPLE=m +CONFIG_RPS=y +CONFIG_SYSFS=y CONFIG_TCP_MD5SIG=y CONFIG_TEST_BLACKHOLE_DEV=m -CONFIG_KALLSYMS=y +CONFIG_TEST_BPF=m CONFIG_TLS=m CONFIG_TRACEPOINTS=y -CONFIG_NET_DROP_MONITOR=m -CONFIG_NETDEVSIM=m -CONFIG_MPLS_ROUTING=m -CONFIG_MPLS_IPTUNNEL=m -CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_CLS_FLOWER=m -CONFIG_NET_ACT_TUNNEL_KEY=m -CONFIG_NET_ACT_MIRRED=m -CONFIG_BAREUDP=m -CONFIG_IPV6_IOAM6_LWTUNNEL=y -CONFIG_CRYPTO_SM4_GENERIC=y -CONFIG_AMT=m CONFIG_TUN=y +CONFIG_USER_NS=y +CONFIG_VETH=y +CONFIG_VLAN_8021Q=y CONFIG_VXLAN=m -CONFIG_IP_SCTP=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_CRYPTO_ARIA=y CONFIG_XFRM_INTERFACE=m CONFIG_XFRM_USER=m -CONFIG_IP_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RPFILTER=m diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh index 7c4818c971fc..507d0a82f5f0 100755 --- a/tools/testing/selftests/net/drop_monitor_tests.sh +++ b/tools/testing/selftests/net/drop_monitor_tests.sh @@ -77,7 +77,7 @@ sw_drops_test() rm ${dir}/packets.pcap - { kill %% && wait %%; } 2>/dev/null + kill_process %% timeout 5 dwdump -o sw -w ${dir}/packets.pcap (( $(tshark -r ${dir}/packets.pcap \ -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) == 0)) diff --git a/tools/testing/selftests/net/epoll_busy_poll.c b/tools/testing/selftests/net/epoll_busy_poll.c index 16e457c2f877..adf8dd0b5e0b 100644 --- a/tools/testing/selftests/net/epoll_busy_poll.c +++ b/tools/testing/selftests/net/epoll_busy_poll.c @@ -23,7 +23,7 @@ #include <sys/ioctl.h> #include <sys/socket.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" /* if the headers haven't been updated, we need to define some things */ #if !defined(EPOLL_IOC_TYPE) diff --git a/tools/testing/selftests/net/fcnal-ipv4.sh b/tools/testing/selftests/net/fcnal-ipv4.sh new file mode 100755 index 000000000000..82f9c867c3e8 --- /dev/null +++ b/tools/testing/selftests/net/fcnal-ipv4.sh @@ -0,0 +1,2 @@ +#!/bin/sh +./fcnal-test.sh -t ipv4 diff --git a/tools/testing/selftests/net/fcnal-ipv6.sh b/tools/testing/selftests/net/fcnal-ipv6.sh new file mode 100755 index 000000000000..ab1fc7aa3caf --- /dev/null +++ b/tools/testing/selftests/net/fcnal-ipv6.sh @@ -0,0 +1,2 @@ +#!/bin/sh +./fcnal-test.sh -t ipv6 diff --git a/tools/testing/selftests/net/fcnal-other.sh b/tools/testing/selftests/net/fcnal-other.sh new file mode 100755 index 000000000000..a840cf80b32e --- /dev/null +++ b/tools/testing/selftests/net/fcnal-other.sh @@ -0,0 +1,2 @@ +#!/bin/sh +./fcnal-test.sh -t other diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 386ebd829df5..844a580ae74e 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -189,7 +189,7 @@ show_hint() kill_procs() { killall nettest ping ping6 >/dev/null 2>&1 - sleep 1 + slowwait 2 sh -c 'test -z "$(pgrep '"'^(nettest|ping|ping6)$'"')"' } set_ping_group() @@ -424,6 +424,8 @@ create_ns() ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0 + ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.accept_dad=0 } # create veth pair to connect namespaces and apply addresses. @@ -875,7 +877,7 @@ ipv4_tcp_md5_novrf() # basic use case log_start run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: Single address config" @@ -883,7 +885,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Server no config, client uses password" @@ -891,7 +893,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" @@ -899,7 +901,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s -M ${MD5_PW} -m ${NSB_LO_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" @@ -910,7 +912,7 @@ ipv4_tcp_md5_novrf() # client in prefix log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: Prefix config" @@ -918,7 +920,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Prefix config, client uses wrong password" @@ -926,7 +928,7 @@ ipv4_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -943,7 +945,7 @@ ipv4_tcp_md5() # basic use case log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" @@ -951,7 +953,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since server does not have MD5 auth" run_cmd nettest -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" @@ -959,7 +961,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" @@ -967,7 +969,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since server config differs from client" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" @@ -978,7 +980,7 @@ ipv4_tcp_md5() # client in prefix log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" @@ -986,7 +988,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" @@ -994,7 +996,7 @@ ipv4_tcp_md5() log_start show_hint "Should timeout since client address is outside of prefix" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" @@ -1005,14 +1007,14 @@ ipv4_tcp_md5() log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" @@ -1020,7 +1022,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" @@ -1028,21 +1030,21 @@ ipv4_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" @@ -1050,7 +1052,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" @@ -1058,7 +1060,7 @@ ipv4_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} & run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" @@ -1082,14 +1084,14 @@ test_ipv4_md5_vrf__vrf_server__no_bind_ifindex() log_start show_hint "Simulates applications using VRF without TCP_MD5SIG_FLAG_IFINDEX" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: VRF-bound server, unbound key accepts connection" log_start show_hint "Binding both the socket and the key is not required but it works" run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: VRF-bound server, bound key accepts connection" } @@ -1103,25 +1105,25 @@ test_ipv4_md5_vrf__global_server__bind_ifindex0() log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Global server, Key bound to ifindex=0 rejects VRF connection" log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Global server, key bound to ifindex=0 accepts non-VRF connection" log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts VRF connection" log_start run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts non-VRF connection" @@ -1193,7 +1195,7 @@ ipv4_tcp_novrf() do log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -1201,7 +1203,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1221,13 +1223,13 @@ ipv4_tcp_novrf() do log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client" log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" @@ -1249,7 +1251,7 @@ ipv4_tcp_novrf() do log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -0 ${a} -1 ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -1257,7 +1259,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -1266,7 +1268,7 @@ ipv4_tcp_novrf() log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" run_cmd nettest -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" done @@ -1274,7 +1276,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -0 ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Global server, device client, local connection" @@ -1283,7 +1285,7 @@ ipv4_tcp_novrf() log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, device client, local connection" done @@ -1291,7 +1293,7 @@ ipv4_tcp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" @@ -1323,19 +1325,19 @@ ipv4_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 1 "Global server" log_start run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1352,7 +1354,7 @@ ipv4_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, local connection" @@ -1374,14 +1376,14 @@ ipv4_tcp_vrf() log_start show_hint "client socket should be bound to VRF" run_cmd nettest -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Global server" log_start show_hint "client socket should be bound to VRF" run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -1396,7 +1398,7 @@ ipv4_tcp_vrf() log_start show_hint "client socket should be bound to device" run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1406,7 +1408,7 @@ ipv4_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since client is not bound to VRF" run_cmd nettest -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "Global server, local connection" done @@ -1418,13 +1420,13 @@ ipv4_tcp_vrf() do log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -d ${VRF} log_test_addr ${a} $? 0 "Client, VRF bind" log_start run_cmd_nsb nettest -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" @@ -1443,7 +1445,7 @@ ipv4_tcp_vrf() do log_start run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" done @@ -1451,26 +1453,26 @@ ipv4_tcp_vrf() a=${NSA_IP} log_start run_cmd nettest -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "VRF server, device client, local connection" log_start show_hint "Should fail 'No route to host' since client is out of VRF scope" run_cmd nettest -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local connection" log_start run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" } @@ -1509,7 +1511,7 @@ ipv4_udp_novrf() do log_start run_cmd nettest -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -1522,7 +1524,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Device server" @@ -1533,31 +1535,31 @@ ipv4_udp_novrf() do log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client, device bind" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client, device send via cmsg" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} -U log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF, with connect()" @@ -1580,7 +1582,7 @@ ipv4_udp_novrf() do log_start run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -0 ${a} -1 ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -1588,7 +1590,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -1597,7 +1599,7 @@ ipv4_udp_novrf() log_start show_hint "Should fail 'Connection refused' since address is out of device scope" run_cmd nettest -s -D -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" done @@ -1605,25 +1607,25 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local connection" log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -C -r ${a} log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection" log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection" log_start run_cmd nettest -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} -U log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" @@ -1636,28 +1638,28 @@ ipv4_udp_novrf() log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 2 "Global server, device client, local connection" log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -C log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection" log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection" log_start show_hint "Should fail since addresses on loopback are out of device scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -U log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" @@ -1667,7 +1669,7 @@ ipv4_udp_novrf() a=${NSA_IP} log_start run_cmd nettest -D -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -1709,19 +1711,19 @@ ipv4_udp_vrf() log_start show_hint "Fails because ingress is in a VRF and global server is disabled" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 1 "Global server" log_start run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -1733,7 +1735,7 @@ ipv4_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is out of scope" run_cmd nettest -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 1 "Global server, VRF client, local connection" done @@ -1741,26 +1743,26 @@ ipv4_udp_vrf() a=${NSA_IP} log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, enslaved device client, local connection" a=${NSA_IP} log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -1775,19 +1777,19 @@ ipv4_udp_vrf() do log_start run_cmd nettest -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Global server" log_start run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "VRF server" log_start run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" @@ -1802,13 +1804,13 @@ ipv4_udp_vrf() # log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -d ${VRF} -D -r ${NSB_IP} -1 ${NSA_IP} log_test $? 0 "VRF client" log_start run_cmd_nsb nettest -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -d ${NSA_DEV} -D -r ${NSB_IP} -1 ${NSA_IP} log_test $? 0 "Enslaved device client" @@ -1829,31 +1831,31 @@ ipv4_udp_vrf() a=${NSA_IP} log_start run_cmd nettest -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -1861,7 +1863,7 @@ ipv4_udp_vrf() do log_start run_cmd nettest -D -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" done @@ -1870,7 +1872,7 @@ ipv4_udp_vrf() do log_start run_cmd nettest -s -D -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" done @@ -2093,7 +2095,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2107,7 +2109,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2120,7 +2122,7 @@ ipv4_rt() a=${NSA_IP} log_start run_cmd nettest ${varg} -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2134,7 +2136,7 @@ ipv4_rt() # log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP} & sleep 3 run_cmd ip link del ${VRF} @@ -2145,7 +2147,7 @@ ipv4_rt() log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP} & sleep 3 run_cmd ip link del ${VRF} @@ -2161,7 +2163,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2175,7 +2177,7 @@ ipv4_rt() do log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2189,7 +2191,7 @@ ipv4_rt() log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2200,7 +2202,7 @@ ipv4_rt() log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2211,7 +2213,7 @@ ipv4_rt() log_start run_cmd nettest ${varg} -I ${NSA_DEV} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -2561,7 +2563,7 @@ ipv6_tcp_md5_novrf() # basic use case log_start run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: Single address config" @@ -2569,7 +2571,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Server no config, client uses password" @@ -2577,7 +2579,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Client uses wrong password" @@ -2585,7 +2587,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_LO_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Client address does not match address configured with password" @@ -2596,7 +2598,7 @@ ipv6_tcp_md5_novrf() # client in prefix log_start run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: Prefix config" @@ -2604,7 +2606,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: Prefix config, client uses wrong password" @@ -2612,7 +2614,7 @@ ipv6_tcp_md5_novrf() log_start show_hint "Should timeout due to MD5 mismatch" run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: Prefix config, client address not in configured prefix" } @@ -2629,7 +2631,7 @@ ipv6_tcp_md5() # basic use case log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config" @@ -2637,7 +2639,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since server does not have MD5 auth" run_cmd nettest -6 -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Server no config, client uses password" @@ -2645,7 +2647,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Client uses wrong password" @@ -2653,7 +2655,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since server config differs from client" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Client address does not match address configured with password" @@ -2664,7 +2666,7 @@ ipv6_tcp_md5() # client in prefix log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config" @@ -2672,7 +2674,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client uses wrong password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password" @@ -2680,7 +2682,7 @@ ipv6_tcp_md5() log_start show_hint "Should timeout since client address is outside of prefix" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix" @@ -2691,14 +2693,14 @@ ipv6_tcp_md5() log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF" @@ -2706,7 +2708,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw" @@ -2714,21 +2716,21 @@ ipv6_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw" log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF" log_start run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF" @@ -2736,7 +2738,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client in default VRF uses VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw" @@ -2744,7 +2746,7 @@ ipv6_tcp_md5() show_hint "Should timeout since client in VRF uses default VRF password" run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} & run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW} log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw" @@ -2772,7 +2774,7 @@ ipv6_tcp_novrf() do log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -2793,7 +2795,7 @@ ipv6_tcp_novrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 0 "Client" done @@ -2802,7 +2804,7 @@ ipv6_tcp_novrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" done @@ -2822,7 +2824,7 @@ ipv6_tcp_novrf() do log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -2830,7 +2832,7 @@ ipv6_tcp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -2839,7 +2841,7 @@ ipv6_tcp_novrf() log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" run_cmd nettest -6 -s -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "Device server, unbound client, local connection" done @@ -2847,7 +2849,7 @@ ipv6_tcp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Global server, device client, local connection" @@ -2856,7 +2858,7 @@ ipv6_tcp_novrf() log_start show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, device client, local connection" done @@ -2865,7 +2867,7 @@ ipv6_tcp_novrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" done @@ -2898,7 +2900,7 @@ ipv6_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server" done @@ -2907,7 +2909,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -2916,7 +2918,7 @@ ipv6_tcp_vrf() a=${NSA_LINKIP6}%${NSB_DEV} log_start run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2924,7 +2926,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Device server" done @@ -2943,7 +2945,7 @@ ipv6_tcp_vrf() log_start show_hint "Should fail 'Connection refused' since global server with VRF is disabled" run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, local connection" @@ -2964,7 +2966,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -2973,7 +2975,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -2982,13 +2984,13 @@ ipv6_tcp_vrf() a=${NSA_LINKIP6}%${NSB_DEV} log_start run_cmd nettest -6 -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Global server" log_start run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "VRF server" @@ -2996,7 +2998,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 0 "Device server" done @@ -3016,7 +3018,7 @@ ipv6_tcp_vrf() log_start show_hint "Fails 'Connection refused' since client is not in VRF" run_cmd nettest -6 -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server, local connection" done @@ -3029,7 +3031,7 @@ ipv6_tcp_vrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} log_test_addr ${a} $? 0 "Client, VRF bind" done @@ -3038,7 +3040,7 @@ ipv6_tcp_vrf() log_start show_hint "Fails since VRF device does not allow linklocal addresses" run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} log_test_addr ${a} $? 1 "Client, VRF bind" @@ -3046,7 +3048,7 @@ ipv6_tcp_vrf() do log_start run_cmd_nsb nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 0 "Client, device bind" done @@ -3071,7 +3073,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local connection" done @@ -3079,7 +3081,7 @@ ipv6_tcp_vrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "VRF server, device client, local connection" @@ -3087,13 +3089,13 @@ ipv6_tcp_vrf() log_start show_hint "Should fail since unbound client is out of VRF scope" run_cmd nettest -6 -s -I ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} log_test_addr ${a} $? 1 "VRF server, unbound client, local connection" log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local connection" @@ -3101,7 +3103,7 @@ ipv6_tcp_vrf() do log_start run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local connection" done @@ -3141,13 +3143,13 @@ ipv6_udp_novrf() do log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Device server" done @@ -3155,7 +3157,7 @@ ipv6_udp_novrf() a=${NSA_LO_IP6} log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" @@ -3165,7 +3167,7 @@ ipv6_udp_novrf() #log_start #show_hint "Should fail since loopback address is out of scope" #run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - #sleep 1 + wait_local_port_listen ${NSA} 12345 udp #run_cmd_nsb nettest -6 -D -r ${a} #log_test_addr ${a} $? 1 "Device server" @@ -3185,25 +3187,25 @@ ipv6_udp_novrf() do log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client" log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client, device bind" log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client, device send via cmsg" log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP6} log_test_addr ${a} $? 0 "Client, device bind via IPV6_UNICAST_IF" @@ -3225,7 +3227,7 @@ ipv6_udp_novrf() do log_start run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -0 ${a} -1 ${a} log_test_addr ${a} $? 0 "Global server, local connection" done @@ -3233,7 +3235,7 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -D -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Device server, unbound client, local connection" @@ -3242,7 +3244,7 @@ ipv6_udp_novrf() log_start show_hint "Should fail 'Connection refused' since address is out of device scope" run_cmd nettest -6 -s -D -I ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} log_test_addr ${a} $? 1 "Device server, local connection" done @@ -3250,19 +3252,19 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local connection" log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -C -r ${a} log_test_addr ${a} $? 0 "Global server, device send via cmsg, local connection" log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -S -r ${a} log_test_addr ${a} $? 0 "Global server, device client via IPV6_UNICAST_IF, local connection" @@ -3271,28 +3273,28 @@ ipv6_udp_novrf() log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} log_test_addr ${a} $? 1 "Global server, device client, local connection" log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -C log_test_addr ${a} $? 1 "Global server, device send via cmsg, local connection" log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection" log_start show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -U log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()" done @@ -3300,7 +3302,7 @@ ipv6_udp_novrf() a=${NSA_IP6} log_start run_cmd nettest -6 -D -s -I ${NSA_DEV} -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} -0 ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -3314,7 +3316,7 @@ ipv6_udp_novrf() run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV} log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${NSA_IP6} log_test $? 0 "UDP in - LLA to GUA" @@ -3338,7 +3340,7 @@ ipv6_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is disabled" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 1 "Global server" done @@ -3347,7 +3349,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -3356,7 +3358,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" done @@ -3378,7 +3380,7 @@ ipv6_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is disabled" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 1 "Global server, VRF client, local conn" done @@ -3387,7 +3389,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" done @@ -3396,25 +3398,25 @@ ipv6_udp_vrf() log_start show_hint "Should fail 'Connection refused' since global server is disabled" run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 1 "Global server, device client, local conn" log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn" @@ -3429,7 +3431,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Global server" done @@ -3438,7 +3440,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "VRF server" done @@ -3447,7 +3449,7 @@ ipv6_udp_vrf() do log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${a} log_test_addr ${a} $? 0 "Enslaved device server" done @@ -3465,7 +3467,7 @@ ipv6_udp_vrf() # log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${NSB_IP6} log_test $? 0 "VRF client" @@ -3476,7 +3478,7 @@ ipv6_udp_vrf() log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_IP6} log_test $? 0 "Enslaved device client" @@ -3491,13 +3493,13 @@ ipv6_udp_vrf() a=${NSA_IP6} log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" #log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3505,13 +3507,13 @@ ipv6_udp_vrf() a=${VRF_IP6} log_start run_cmd nettest -6 -D -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Global server, VRF client, local conn" log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${VRF} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "VRF server, VRF client, local conn" @@ -3527,25 +3529,25 @@ ipv6_udp_vrf() a=${NSA_IP6} log_start run_cmd nettest -6 -D -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Global server, device client, local conn" log_start run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "VRF server, device client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${VRF} -r ${a} log_test_addr ${a} $? 0 "Device server, VRF client, local conn" log_start run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} log_test_addr ${a} $? 0 "Device server, device client, local conn" @@ -3557,7 +3559,7 @@ ipv6_udp_vrf() # link local addresses log_start run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -d ${NSB_DEV} -r ${NSA_LINKIP6} log_test $? 0 "Global server, linklocal IP" @@ -3568,7 +3570,7 @@ ipv6_udp_vrf() log_start run_cmd_nsb nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSB_LINKIP6} log_test $? 0 "Enslaved device client, linklocal IP" @@ -3579,7 +3581,7 @@ ipv6_udp_vrf() log_start run_cmd nettest -6 -D -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd nettest -6 -D -d ${NSA_DEV} -r ${NSA_LINKIP6} log_test $? 0 "Enslaved device client, local conn - linklocal IP" @@ -3592,7 +3594,7 @@ ipv6_udp_vrf() run_cmd_nsb ip -6 ro add ${NSA_IP6}/128 dev ${NSB_DEV} log_start run_cmd nettest -6 -s -D & - sleep 1 + wait_local_port_listen ${NSA} 12345 udp run_cmd_nsb nettest -6 -D -r ${NSA_IP6} log_test $? 0 "UDP in - LLA to GUA" @@ -3667,7 +3669,7 @@ ipv6_addr_bind_novrf() # when it really should not a=${NSA_LO_IP6} log_start - show_hint "Tecnically should fail since address is not on device but kernel allows" + show_hint "Technically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address" } @@ -3724,7 +3726,7 @@ ipv6_addr_bind_vrf() # passes when it really should not a=${VRF_IP6} log_start - show_hint "Tecnically should fail since address is not on device but kernel allows" + show_hint "Technically should fail since address is not on device but kernel allows" run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind" @@ -3771,7 +3773,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3785,7 +3787,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3799,7 +3801,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -I ${NSA_DEV} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${varg} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3814,7 +3816,7 @@ ipv6_rt() # log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${NSB_IP6} & sleep 3 run_cmd ip link del ${VRF} @@ -3825,7 +3827,7 @@ ipv6_rt() log_start run_cmd_nsb nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSB} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${NSB_IP6} & sleep 3 run_cmd ip link del ${VRF} @@ -3842,7 +3844,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3856,7 +3858,7 @@ ipv6_rt() do log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${VRF} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3869,7 +3871,7 @@ ipv6_rt() a=${NSA_IP6} log_start run_cmd nettest ${varg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3880,7 +3882,7 @@ ipv6_rt() log_start run_cmd nettest ${varg} -I ${VRF} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3891,7 +3893,7 @@ ipv6_rt() log_start run_cmd nettest ${varg} -I ${NSA_DEV} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} & sleep 3 run_cmd ip link del ${VRF} @@ -3950,7 +3952,7 @@ netfilter_tcp_reset() do log_start run_cmd nettest -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -r ${a} log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx" done @@ -3968,7 +3970,7 @@ netfilter_icmp() do log_start run_cmd nettest ${arg} -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest ${arg} -r ${a} log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach" done @@ -4007,7 +4009,7 @@ netfilter_tcp6_reset() do log_start run_cmd nettest -6 -s & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 -r ${a} log_test_addr ${a} $? 1 "Global server, reject with TCP-reset on Rx" done @@ -4025,7 +4027,7 @@ netfilter_icmp6() do log_start run_cmd nettest -6 -s ${arg} & - sleep 1 + wait_local_port_listen ${NSA} 12345 tcp run_cmd_nsb nettest -6 ${arg} -r ${a} log_test_addr ${a} $? 1 "Global ${stype} server, Rx reject icmp-port-unreach" done @@ -4221,12 +4223,12 @@ use_case_snat_on_vrf() run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF} run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} & - sleep 1 + wait_local_port_listen ${NSB} ${port} tcp run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port} log_test $? 0 "IPv4 TCP connection over VRF with SNAT" run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} & - sleep 1 + wait_local_port_listen ${NSB} ${port} tcp run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port} log_test $? 0 "IPv6 TCP connection over VRF with SNAT" @@ -4272,6 +4274,7 @@ EOF TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter" TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter" TESTS_OTHER="use_cases" +# note: each TEST_ group needs a dedicated runner, e.g. fcnal-ipv4.sh PAUSE_ON_FAIL=no PAUSE=no @@ -4302,16 +4305,11 @@ elif [ "$TESTS" = "ipv4" ]; then TESTS="$TESTS_IPV4" elif [ "$TESTS" = "ipv6" ]; then TESTS="$TESTS_IPV6" +elif [ "$TESTS" = "other" ]; then + TESTS="$TESTS_OTHER" fi -# nettest can be run from PATH or from same directory as this selftest -if ! which nettest >/dev/null; then - PATH=$PWD:$PATH - if ! which nettest >/dev/null; then - echo "'nettest' command not found; skipping tests" - exit $ksft_skip - fi -fi +check_gen_prog "nettest" declare -i nfail=0 declare -i nsuccess=0 diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh index d5e3abb8658c..9931a1e36e3d 100755 --- a/tools/testing/selftests/net/fdb_flush.sh +++ b/tools/testing/selftests/net/fdb_flush.sh @@ -583,7 +583,7 @@ vxlan_test_flush_by_remote_attributes() $IP link del dev vx10 $IP link add name vx10 type vxlan dstport "$VXPORT" external - # For multicat FDB entries, the VXLAN driver stores a linked list of + # For multicast FDB entries, the VXLAN driver stores a linked list of # remotes for a given key. Verify that only the expected remotes are # flushed. multicast_fdb_entries_add diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh new file mode 100755 index 000000000000..0b8a2465dd04 --- /dev/null +++ b/tools/testing/selftests/net/fdb_notify.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +ALL_TESTS=" + test_dup_bridge + test_dup_vxlan_self + test_dup_vxlan_master + test_dup_macvlan_self + test_dup_macvlan_master +" + +do_test_dup() +{ + local op=$1; shift + local what=$1; shift + local tmpf + + RET=0 + + tmpf=$(mktemp) + defer rm "$tmpf" + + defer_scope_push + bridge monitor fdb &> "$tmpf" & + defer kill_process $! + + sleep 0.5 + bridge fdb "$op" 00:11:22:33:44:55 vlan 1 "$@" + sleep 0.5 + defer_scope_pop + + local count=$(grep -c -e 00:11:22:33:44:55 $tmpf) + ((count == 1)) + check_err $? "Got $count notifications, expected 1" + + log_test "$what $op: Duplicate notifications" +} + +test_dup_bridge() +{ + adf_ip_link_add br up type bridge vlan_filtering 1 + do_test_dup add "bridge" dev br self + do_test_dup del "bridge" dev br self +} + +test_dup_vxlan_self() +{ + adf_ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add vx up type vxlan id 2000 dstport 4789 + adf_ip_link_set_master vx br + + do_test_dup add "vxlan" dev vx self dst 192.0.2.1 + do_test_dup del "vxlan" dev vx self dst 192.0.2.1 +} + +test_dup_vxlan_master() +{ + adf_ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add vx up type vxlan id 2000 dstport 4789 + adf_ip_link_set_master vx br + + do_test_dup add "vxlan master" dev vx master + do_test_dup del "vxlan master" dev vx master +} + +test_dup_macvlan_self() +{ + adf_ip_link_add dd up type dummy + adf_ip_link_add mv up link dd type macvlan mode passthru + + do_test_dup add "macvlan self" dev mv self + do_test_dup del "macvlan self" dev mv self +} + +test_dup_macvlan_master() +{ + adf_ip_link_add br up type bridge vlan_filtering 1 + adf_ip_link_add dd up type dummy + adf_ip_link_add mv up link dd type macvlan mode passthru + adf_ip_link_set_master mv br + + do_test_dup add "macvlan master" dev mv self + do_test_dup del "macvlan master" dev mv self +} + +cleanup() +{ + defer_scopes_cleanup +} + +trap cleanup EXIT +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index ac0b2c6a5761..2b0a90581e2f 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -76,9 +76,16 @@ log_test() printf "TEST: %-60s [ OK ]\n" "${msg}" nsuccess=$((nsuccess+1)) else - ret=1 - nfail=$((nfail+1)) - printf "TEST: %-60s [FAIL]\n" "${msg}" + if [[ $rc -eq $ksft_skip ]]; then + [[ $ret -eq 0 ]] && ret=$ksft_skip + nskip=$((nskip+1)) + printf "TEST: %-60s [SKIP]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + fi + if [ "$VERBOSE" = "1" ]; then echo " rc=$rc, expected $expected" fi @@ -460,8 +467,8 @@ ipv6_fdb_grp_fcnal() log_test $? 0 "Get Fdb nexthop group by id" # fdb nexthop group can only contain fdb nexthops - run_cmd "$IP nexthop add id 63 via 2001:db8:91::4" - run_cmd "$IP nexthop add id 64 via 2001:db8:91::5" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::4 dev veth1" + run_cmd "$IP nexthop add id 64 via 2001:db8:91::5 dev veth1" run_cmd "$IP nexthop add id 103 group 63/64 fdb" log_test $? 2 "Fdb Nexthop group with non-fdb nexthops" @@ -487,6 +494,26 @@ ipv6_fdb_grp_fcnal() run_cmd "$IP nexthop add id 69 encap mpls 101 via 2001:db8:91::8 dev veth1 fdb" log_test $? 2 "Fdb Nexthop with encap" + # Replace FDB nexthop to non-FDB and vice versa + run_cmd "$IP nexthop add id 70 via 2001:db8:91::2 fdb" + run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 dev veth1" + log_test $? 0 "Replace FDB nexthop to non-FDB nexthop" + run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 fdb" + log_test $? 0 "Replace non-FDB nexthop to FDB nexthop" + + # Replace FDB nexthop address while in a group + run_cmd "$IP nexthop add id 71 group 70 fdb" + run_cmd "$IP nexthop replace id 70 via 2001:db8:91::3 fdb" + log_test $? 0 "Replace FDB nexthop address while in a group" + + # Cannot replace FDB nexthop to non-FDB and vice versa while in a group + run_cmd "$IP nexthop replace id 70 via 2001:db8:91::2 dev veth1" + log_test $? 2 "Replace FDB nexthop to non-FDB nexthop while in a group" + run_cmd "$IP nexthop add id 72 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 73 group 72" + run_cmd "$IP nexthop replace id 72 via 2001:db8:91::2 fdb" + log_test $? 2 "Replace non-FDB nexthop to FDB nexthop while in a group" + run_cmd "$IP link add name vx10 type vxlan id 1010 local 2001:db8:91::9 remote 2001:db8:91::10 dstport 4789 nolearning noudpcsum tos inherit ttl 100" run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self" log_test $? 0 "Fdb mac add with nexthop group" @@ -540,15 +567,15 @@ ipv4_fdb_grp_fcnal() log_test $? 0 "Get Fdb nexthop group by id" # fdb nexthop group can only contain fdb nexthops - run_cmd "$IP nexthop add id 14 via 172.16.1.2" - run_cmd "$IP nexthop add id 15 via 172.16.1.3" + run_cmd "$IP nexthop add id 14 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 15 via 172.16.1.3 dev veth1" run_cmd "$IP nexthop add id 103 group 14/15 fdb" log_test $? 2 "Fdb Nexthop group with non-fdb nexthops" # Non fdb nexthop group can not contain fdb nexthops run_cmd "$IP nexthop add id 16 via 172.16.1.2 fdb" run_cmd "$IP nexthop add id 17 via 172.16.1.3 fdb" - run_cmd "$IP nexthop add id 104 group 14/15" + run_cmd "$IP nexthop add id 104 group 16/17" log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops" # fdb nexthop cannot have blackhole @@ -567,6 +594,26 @@ ipv4_fdb_grp_fcnal() run_cmd "$IP nexthop add id 17 encap mpls 101 via 172.16.1.2 dev veth1 fdb" log_test $? 2 "Fdb Nexthop with encap" + # Replace FDB nexthop to non-FDB and vice versa + run_cmd "$IP nexthop add id 18 via 172.16.1.2 fdb" + run_cmd "$IP nexthop replace id 18 via 172.16.1.2 dev veth1" + log_test $? 0 "Replace FDB nexthop to non-FDB nexthop" + run_cmd "$IP nexthop replace id 18 via 172.16.1.2 fdb" + log_test $? 0 "Replace non-FDB nexthop to FDB nexthop" + + # Replace FDB nexthop address while in a group + run_cmd "$IP nexthop add id 19 group 18 fdb" + run_cmd "$IP nexthop replace id 18 via 172.16.1.3 fdb" + log_test $? 0 "Replace FDB nexthop address while in a group" + + # Cannot replace FDB nexthop to non-FDB and vice versa while in a group + run_cmd "$IP nexthop replace id 18 via 172.16.1.2 dev veth1" + log_test $? 2 "Replace FDB nexthop to non-FDB nexthop while in a group" + run_cmd "$IP nexthop add id 20 via 172.16.1.2 dev veth1" + run_cmd "$IP nexthop add id 21 group 20" + run_cmd "$IP nexthop replace id 20 via 172.16.1.2 fdb" + log_test $? 2 "Replace non-FDB nexthop to FDB nexthop while in a group" + run_cmd "$IP link add name vx10 type vxlan id 1010 local 10.0.0.1 remote 10.0.0.2 dstport 4789 nolearning noudpcsum tos inherit ttl 100" run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self" log_test $? 0 "Fdb mac add with nexthop group" @@ -575,7 +622,7 @@ ipv4_fdb_grp_fcnal() run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 12 self" log_test $? 255 "Fdb mac add with nexthop" - run_cmd "$IP ro add 172.16.0.0/22 nhid 15" + run_cmd "$IP ro add 172.16.0.0/22 nhid 16" log_test $? 2 "Route add with fdb nexthop" run_cmd "$IP ro add 172.16.0.0/22 nhid 103" @@ -736,7 +783,7 @@ ipv6_fcnal() run_cmd "$IP nexthop add id 52 via 2001:db8:92::3" log_test $? 2 "Create nexthop - gw only" - # gw is not reachable throught given dev + # gw is not reachable through given dev run_cmd "$IP nexthop add id 53 via 2001:db8:3::3 dev veth1" log_test $? 2 "Create nexthop - invalid gw+dev combination" @@ -923,6 +970,29 @@ ipv6_grp_fcnal() ipv6_grp_refs log_test $? 0 "Nexthop group replace refcounts" + + # + # 16-bit weights. + # + run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1" + run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1" + run_cmd "$IP nexthop add id 66 dev veth1" + + run_cmd "$IP nexthop add id 103 group 62,1000" + if [[ $? == 0 ]]; then + local GRP="id 103 group 62,254/63,255/64,256/65,257/66,65535" + run_cmd "$IP nexthop replace $GRP" + check_nexthop "id 103" "$GRP" + rc=$? + else + rc=$ksft_skip + fi + + $IP nexthop flush >/dev/null 2>&1 + + log_test $rc 0 "16-bit weights" } ipv6_res_grp_fcnal() @@ -987,6 +1057,31 @@ ipv6_res_grp_fcnal() check_nexthop_bucket "list id 102" \ "id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62" log_test $? 0 "Nexthop buckets updated after replace - nECMP" + + # + # 16-bit weights. + # + run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1" + run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1" + run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1" + run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 dev veth1" + run_cmd "$IP nexthop add id 66 dev veth1" + + run_cmd "$IP nexthop add id 103 group 62,1000 type resilient buckets 32" + if [[ $? == 0 ]]; then + local GRP="id 103 group 62,254/63,255/64,256/65,257/66,65535 $(: + )type resilient buckets 32 idle_timer 0 $(: + )unbalanced_timer 0" + run_cmd "$IP nexthop replace $GRP" + check_nexthop "id 103" "$GRP unbalanced_time 0" + rc=$? + else + rc=$ksft_skip + fi + + $IP nexthop flush >/dev/null 2>&1 + + log_test $rc 0 "16-bit weights" } ipv6_fcnal_runtime() @@ -2475,6 +2570,7 @@ done if [ "$TESTS" != "none" ]; then printf "\nTests passed: %3d\n" ${nsuccess} printf "Tests failed: %3d\n" ${nfail} + printf "Tests skipped: %2d\n" ${nskip} fi exit $ret diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 7c01f58a20de..5fbdd2a0b537 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -35,18 +35,13 @@ log_test() local expected=$2 local msg="$3" - $IP rule show | grep -q l3mdev - if [ $? -eq 0 ]; then - msg="$msg (VRF)" - fi - if [ ${rc} -eq ${expected} ]; then nsuccess=$((nsuccess+1)) - printf "\n TEST: %-60s [ OK ]\n" "${msg}" + printf " TEST: %-60s [ OK ]\n" "${msg}" else ret=1 nfail=$((nfail+1)) - printf "\n TEST: %-60s [FAIL]\n" "${msg}" + printf " TEST: %-60s [FAIL]\n" "${msg}" if [ "${PAUSE_ON_FAIL}" = "yes" ]; then echo echo "hit enter to continue, 'q' to quit" @@ -56,39 +51,6 @@ log_test() fi } -log_section() -{ - echo - echo "######################################################################" - echo "TEST SECTION: $*" - echo "######################################################################" -} - -check_nettest() -{ - if which nettest > /dev/null 2>&1; then - return 0 - fi - - # Add the selftest directory to PATH if not already done - if [ "${SELFTEST_PATH}" = "" ]; then - SELFTEST_PATH="$(dirname $0)" - PATH="${PATH}:${SELFTEST_PATH}" - - # Now retry with the new path - if which nettest > /dev/null 2>&1; then - return 0 - fi - - if [ "${ret}" -eq 0 ]; then - ret="${ksft_skip}" - fi - echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')" - fi - - return 1 -} - setup() { set -e @@ -187,12 +149,17 @@ fib_rule6_test_match_n_redirect() { local match="$1" local getmatch="$2" - local description="$3" + local getnomatch="$3" + local description="$4" + local nomatch_description="$5" $IP -6 rule add $match table $RTABLE $IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE" log_test $? 0 "rule6 check: $description" + $IP -6 route get $GW_IP6 $getnomatch 2>&1 | grep -q "table $RTABLE" + log_test $? 1 "rule6 check: $nomatch_description" + fib_rule6_del_by_pref "$match" log_test $? 0 "rule6 del by pref: $description" } @@ -213,18 +180,27 @@ fib_rule6_test_reject() fib_rule6_test() { + local ext_name=$1; shift + local getnomatch local getmatch local match local cnt + echo + echo "IPv6 FIB rule tests $ext_name" + # setup the fib rule redirect route $IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink match="oif $DEV" - fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table" + getnomatch="oif lo" + fib_rule6_test_match_n_redirect "$match" "$match" "$getnomatch" \ + "oif redirect to table" "oif no redirect to table" match="from $SRC_IP6 iif $DEV" - fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table" + getnomatch="from $SRC_IP6 iif lo" + fib_rule6_test_match_n_redirect "$match" "$match" "$getnomatch" \ + "iif redirect to table" "iif no redirect to table" # Reject dsfield (tos) options which have ECN bits set for cnt in $(seq 1 3); do @@ -238,44 +214,174 @@ fib_rule6_test() # Using option 'tos' instead of 'dsfield' as old iproute2 # versions don't support 'dsfield' in ip rule show. getmatch="tos $cnt" + getnomatch="tos 0x20" fib_rule6_test_match_n_redirect "$match" "$getmatch" \ - "$getmatch redirect to table" + "$getnomatch" "$getmatch redirect to table" \ + "$getnomatch no redirect to table" + done + + # Re-test TOS matching, but with input routes since they are handled + # differently from output routes. + match="tos 0x10" + for cnt in "0x10" "0x11" "0x12" "0x13"; do + getmatch="tos $cnt" + getnomatch="tos 0x20" + fib_rule6_test_match_n_redirect "$match" \ + "from $SRC_IP6 iif $DEV $getmatch" \ + "from $SRC_IP6 iif $DEV $getnomatch" \ + "iif $getmatch redirect to table" \ + "iif $getnomatch no redirect to table" done match="fwmark 0x64" getmatch="mark 0x64" - fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table" + getnomatch="mark 0x63" + fib_rule6_test_match_n_redirect "$match" "$getmatch" "$getnomatch" \ + "fwmark redirect to table" "fwmark no redirect to table" fib_check_iproute_support "uidrange" "uid" if [ $? -eq 0 ]; then match="uidrange 100-100" getmatch="uid 100" - fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table" + getnomatch="uid 101" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "uid redirect to table" \ + "uid no redirect to table" fi fib_check_iproute_support "sport" "sport" if [ $? -eq 0 ]; then match="sport 666 dport 777" - fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table" + getnomatch="sport 667 dport 778" + fib_rule6_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "sport and dport redirect to table" \ + "sport and dport no redirect to table" + + match="sport 100-200 dport 300-400" + getmatch="sport 100 dport 400" + getnomatch="sport 100 dport 401" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" \ + "sport and dport range redirect to table" \ + "sport and dport range no redirect to table" + fi + + ip rule help 2>&1 | grep sport | grep -q MASK + if [ $? -eq 0 ]; then + match="sport 0x0f00/0xff00 dport 0x000f/0x00ff" + getmatch="sport 0x0f11 dport 0x220f" + getnomatch="sport 0x1f11 dport 0x221f" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "sport and dport masked redirect to table" \ + "sport and dport masked no redirect to table" fi fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then match="ipproto tcp" - fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match" + getnomatch="ipproto udp" + fib_rule6_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "ipproto tcp match" "ipproto udp no match" fi fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then match="ipproto ipv6-icmp" - fib_rule6_test_match_n_redirect "$match" "$match" "ipproto ipv6-icmp match" + getnomatch="ipproto tcp" + fib_rule6_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "ipproto ipv6-icmp match" \ + "ipproto ipv6-tcp no match" + fi + + fib_check_iproute_support "dscp" "tos" + if [ $? -eq 0 ]; then + match="dscp 0x3f" + getmatch="tos 0xfc" + getnomatch="tos 0xf4" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp redirect to table" \ + "dscp no redirect to table" + + match="dscp 0x3f" + getmatch="from $SRC_IP6 iif $DEV tos 0xfc" + getnomatch="from $SRC_IP6 iif $DEV tos 0xf4" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp redirect to table" \ + "iif dscp no redirect to table" + fi + + ip rule help 2>&1 | grep -q "DSCP\[/MASK\]" + if [ $? -eq 0 ]; then + match="dscp 0x0f/0x0f" + tosmatch=$(printf 0x"%x" $((0x1f << 2))) + tosnomatch=$(printf 0x"%x" $((0x1e << 2))) + getmatch="tos $tosmatch" + getnomatch="tos $tosnomatch" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp masked redirect to table" \ + "dscp masked no redirect to table" + + match="dscp 0x0f/0x0f" + getmatch="from $SRC_IP6 iif $DEV tos $tosmatch" + getnomatch="from $SRC_IP6 iif $DEV tos $tosnomatch" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp masked redirect to table" \ + "iif dscp masked no redirect to table" + fi + + fib_check_iproute_support "flowlabel" "flowlabel" + if [ $? -eq 0 ]; then + match="flowlabel 0xfffff" + getmatch="flowlabel 0xfffff" + getnomatch="flowlabel 0xf" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "flowlabel redirect to table" \ + "flowlabel no redirect to table" + + match="flowlabel 0xfffff" + getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff" + getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif flowlabel redirect to table" \ + "iif flowlabel no redirect to table" + + match="flowlabel 0x08000/0x08000" + getmatch="flowlabel 0xfffff" + getnomatch="flowlabel 0xf7fff" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "flowlabel masked redirect to table" \ + "flowlabel masked no redirect to table" + + match="flowlabel 0x08000/0x08000" + getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff" + getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf7fff" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif flowlabel masked redirect to table" \ + "iif flowlabel masked no redirect to table" + fi + + $IP link show dev $DEV | grep -q vrf0 + if [ $? -eq 0 ]; then + match="oif vrf0" + getmatch="oif $DEV" + getnomatch="oif lo" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF oif redirect to table" \ + "VRF oif no redirect to table" + + match="from $SRC_IP6 iif vrf0" + getmatch="from $SRC_IP6 iif $DEV" + getnomatch="from $SRC_IP6 iif lo" + fib_rule6_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF iif redirect to table" \ + "VRF iif no redirect to table" fi } fib_rule6_vrf_test() { setup_vrf - fib_rule6_test + fib_rule6_test "- with VRF" cleanup_vrf } @@ -285,10 +391,8 @@ fib_rule6_connect_test() { local dsfield - if ! check_nettest; then - echo "SKIP: Could not run test without nettest tool" - return - fi + echo + echo "IPv6 FIB rule connect tests" setup_peer $IP -6 rule add dsfield 0x04 table $RTABLE_PEER @@ -306,7 +410,45 @@ fib_rule6_connect_test() log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})" done + # Check that UDP and TCP connections fail when using a DS Field that + # does not match the previously configured FIB rule. + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D \ + -Q 0x20 -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dsfield udp no connect (dsfield 0x20)" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0x20 \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dsfield tcp no connect (dsfield 0x20)" + $IP -6 rule del dsfield 0x04 table $RTABLE_PEER + + ip rule help 2>&1 | grep -q dscp + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 iprule too old, missing dscp match" + cleanup_peer + return + fi + + $IP -6 rule add dscp 0x3f table $RTABLE_PEER + + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xfc \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dscp udp connect" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xfc \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 0 "rule6 dscp tcp connect" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D -Q 0xf4 \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dscp udp no connect" + + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q 0xf4 \ + -l 2001:db8::1:11 -r 2001:db8::1:11 + log_test $? 1 "rule6 dscp tcp no connect" + + $IP -6 rule del dscp 0x3f table $RTABLE_PEER + cleanup_peer } @@ -326,12 +468,17 @@ fib_rule4_test_match_n_redirect() { local match="$1" local getmatch="$2" - local description="$3" + local getnomatch="$3" + local description="$4" + local nomatch_description="$5" $IP rule add $match table $RTABLE $IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE" log_test $? 0 "rule4 check: $description" + $IP route get $GW_IP4 $getnomatch 2>&1 | grep -q "table $RTABLE" + log_test $? 1 "rule4 check: $nomatch_description" + fib_rule4_del_by_pref "$match" log_test $? 0 "rule4 del by pref: $description" } @@ -352,23 +499,28 @@ fib_rule4_test_reject() fib_rule4_test() { + local ext_name=$1; shift + local getnomatch local getmatch local match local cnt + echo + echo "IPv4 FIB rule tests $ext_name" + # setup the fib rule redirect route $IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink match="oif $DEV" - fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table" + getnomatch="oif lo" + fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \ + "oif redirect to table" "oif no redirect to table" - # need enable forwarding and disable rp_filter temporarily as all the - # addresses are in the same subnet and egress device == ingress device. ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1 - ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0 match="from $SRC_IP iif $DEV" - fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table" - ip netns exec $testns sysctl -qw net.ipv4.ip_forward=0 + getnomatch="from $SRC_IP iif lo" + fib_rule4_test_match_n_redirect "$match" "$match" "$getnomatch" \ + "iif redirect to table" "iif no redirect to table" # Reject dsfield (tos) options which have ECN bits set for cnt in $(seq 1 3); do @@ -382,44 +534,144 @@ fib_rule4_test() # Using option 'tos' instead of 'dsfield' as old iproute2 # versions don't support 'dsfield' in ip rule show. getmatch="tos $cnt" + getnomatch="tos 0x20" fib_rule4_test_match_n_redirect "$match" "$getmatch" \ - "$getmatch redirect to table" + "$getnomatch" "$getmatch redirect to table" \ + "$getnomatch no redirect to table" + done + + # Re-test TOS matching, but with input routes since they are handled + # differently from output routes. + match="tos 0x10" + for cnt in "0x10" "0x11" "0x12" "0x13"; do + getmatch="tos $cnt" + getnomatch="tos 0x20" + fib_rule4_test_match_n_redirect "$match" \ + "from $SRC_IP iif $DEV $getmatch" \ + "from $SRC_IP iif $DEV $getnomatch" \ + "iif $getmatch redirect to table" \ + "iif $getnomatch no redirect to table" done match="fwmark 0x64" getmatch="mark 0x64" - fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table" + getnomatch="mark 0x63" + fib_rule4_test_match_n_redirect "$match" "$getmatch" "$getnomatch" \ + "fwmark redirect to table" "fwmark no redirect to table" fib_check_iproute_support "uidrange" "uid" if [ $? -eq 0 ]; then match="uidrange 100-100" getmatch="uid 100" - fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table" + getnomatch="uid 101" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "uid redirect to table" \ + "uid no redirect to table" fi fib_check_iproute_support "sport" "sport" if [ $? -eq 0 ]; then match="sport 666 dport 777" - fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table" + getnomatch="sport 667 dport 778" + fib_rule4_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "sport and dport redirect to table" \ + "sport and dport no redirect to table" + + match="sport 100-200 dport 300-400" + getmatch="sport 100 dport 400" + getnomatch="sport 100 dport 401" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" \ + "sport and dport range redirect to table" \ + "sport and dport range no redirect to table" + fi + + ip rule help 2>&1 | grep sport | grep -q MASK + if [ $? -eq 0 ]; then + match="sport 0x0f00/0xff00 dport 0x000f/0x00ff" + getmatch="sport 0x0f11 dport 0x220f" + getnomatch="sport 0x1f11 dport 0x221f" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "sport and dport masked redirect to table" \ + "sport and dport masked no redirect to table" fi fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then match="ipproto tcp" - fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match" + getnomatch="ipproto udp" + fib_rule4_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "ipproto tcp match" \ + "ipproto udp no match" fi fib_check_iproute_support "ipproto" "ipproto" if [ $? -eq 0 ]; then match="ipproto icmp" - fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match" + getnomatch="ipproto tcp" + fib_rule4_test_match_n_redirect "$match" "$match" \ + "$getnomatch" "ipproto icmp match" \ + "ipproto tcp no match" + fi + + fib_check_iproute_support "dscp" "tos" + if [ $? -eq 0 ]; then + match="dscp 0x3f" + getmatch="tos 0xfc" + getnomatch="tos 0xf4" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp redirect to table" \ + "dscp no redirect to table" + + match="dscp 0x3f" + getmatch="from $SRC_IP iif $DEV tos 0xfc" + getnomatch="from $SRC_IP iif $DEV tos 0xf4" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp redirect to table" \ + "iif dscp no redirect to table" + fi + + ip rule help 2>&1 | grep -q "DSCP\[/MASK\]" + if [ $? -eq 0 ]; then + match="dscp 0x0f/0x0f" + tosmatch=$(printf 0x"%x" $((0x1f << 2))) + tosnomatch=$(printf 0x"%x" $((0x1e << 2))) + getmatch="tos $tosmatch" + getnomatch="tos $tosnomatch" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "dscp masked redirect to table" \ + "dscp masked no redirect to table" + + match="dscp 0x0f/0x0f" + getmatch="from $SRC_IP iif $DEV tos $tosmatch" + getnomatch="from $SRC_IP iif $DEV tos $tosnomatch" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "iif dscp masked redirect to table" \ + "iif dscp masked no redirect to table" + fi + + $IP link show dev $DEV | grep -q vrf0 + if [ $? -eq 0 ]; then + match="oif vrf0" + getmatch="oif $DEV" + getnomatch="oif lo" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF oif redirect to table" \ + "VRF oif no redirect to table" + + match="from $SRC_IP iif vrf0" + getmatch="from $SRC_IP iif $DEV" + getnomatch="from $SRC_IP iif lo" + fib_rule4_test_match_n_redirect "$match" "$getmatch" \ + "$getnomatch" "VRF iif redirect to table" \ + "VRF iif no redirect to table" fi } fib_rule4_vrf_test() { setup_vrf - fib_rule4_test + fib_rule4_test "- with VRF" cleanup_vrf } @@ -429,10 +681,8 @@ fib_rule4_connect_test() { local dsfield - if ! check_nettest; then - echo "SKIP: Could not run test without nettest tool" - return - fi + echo + echo "IPv4 FIB rule connect tests" setup_peer $IP -4 rule add dsfield 0x04 table $RTABLE_PEER @@ -450,16 +700,46 @@ fib_rule4_connect_test() log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})" done + # Check that UDP and TCP connections fail when using a DS Field that + # does not match the previously configured FIB rule. + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0x20 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dsfield udp no connect (dsfield 0x20)" + + nettest -q -B -t 5 -N $testns -O $peerns -Q 0x20 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dsfield tcp no connect (dsfield 0x20)" + $IP -4 rule del dsfield 0x04 table $RTABLE_PEER - cleanup_peer -} -run_fibrule_tests() -{ - log_section "IPv4 fib rule" - fib_rule4_test - log_section "IPv6 fib rule" - fib_rule6_test + ip rule help 2>&1 | grep -q dscp + if [ $? -ne 0 ]; then + echo "SKIP: iproute2 iprule too old, missing dscp match" + cleanup_peer + return + fi + + $IP -4 rule add dscp 0x3f table $RTABLE_PEER + + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xfc \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dscp udp connect" + + nettest -q -B -t 5 -N $testns -O $peerns -Q 0xfc \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 0 "rule4 dscp tcp connect" + + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q 0xf4 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dscp udp no connect" + + nettest -q -B -t 5 -N $testns -O $peerns -Q 0xf4 \ + -l 198.51.100.11 -r 198.51.100.11 + log_test $? 1 "rule4 dscp tcp no connect" + + $IP -4 rule del dscp 0x3f table $RTABLE_PEER + + cleanup_peer } ################################################################################ # usage @@ -495,6 +775,8 @@ if [ ! -x "$(command -v ip)" ]; then exit $ksft_skip fi +check_gen_prog "nettest" + # start clean cleanup &> /dev/null setup diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 73895711cdf4..a88f797c549a 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -11,7 +11,8 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \ ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \ ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \ - ipv4_mpath_list ipv6_mpath_list" + ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance \ + fib6_ra_to_static" VERBOSE=0 PAUSE_ON_FAIL=no @@ -689,7 +690,7 @@ fib6_notify_test() log_test $ret 0 "ipv6 route add notify" - { kill %% && wait %%; } 2>/dev/null + kill_process %% #rm errors.txt @@ -736,7 +737,7 @@ fib_notify_test() log_test $ret 0 "ipv4 route add notify" - { kill %% && wait %%; } 2>/dev/null + kill_process %% rm errors.txt @@ -1085,6 +1086,35 @@ route_setup() set +e } +forwarding_cleanup() +{ + cleanup_ns $ns3 + + route_cleanup +} + +# extend route_setup with an ns3 reachable through ns2 over both devices +forwarding_setup() +{ + forwarding_cleanup + + route_setup + + setup_ns ns3 + + ip link add veth5 netns $ns3 type veth peer name veth6 netns $ns2 + ip -netns $ns3 link set veth5 up + ip -netns $ns2 link set veth6 up + + ip -netns $ns3 -4 addr add dev veth5 172.16.105.1/24 + ip -netns $ns2 -4 addr add dev veth6 172.16.105.2/24 + ip -netns $ns3 -4 route add 172.16.100.0/22 via 172.16.105.2 + + ip -netns $ns3 -6 addr add dev veth5 2001:db8:105::1/64 nodad + ip -netns $ns2 -6 addr add dev veth6 2001:db8:105::2/64 nodad + ip -netns $ns3 -6 route add 2001:db8:101::/33 via 2001:db8:105::2 +} + # assumption is that basic add of a single path route works # otherwise just adding an address on an interface is broken ipv6_rt_add() @@ -1447,6 +1477,68 @@ ipv6_route_metrics_test() route_cleanup } +fib6_ra_to_static() +{ + setup + + echo + echo "Fib6 route promotion from RA-learned to static test" + set -e + + # ra6 is required for the test. (ipv6toolkit) + if [ ! -x "$(command -v ra6)" ]; then + echo "SKIP: ra6 not found." + set +e + cleanup &> /dev/null + return + fi + + # Create a pair of veth devices to send a RA message from one + # device to another. + $IP link add veth1 type veth peer name veth2 + $IP link set dev veth1 up + $IP link set dev veth2 up + $IP -6 address add 2001:10::1/64 dev veth1 nodad + $IP -6 address add 2001:10::2/64 dev veth2 nodad + + # Make veth1 ready to receive RA messages. + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2 + + # Send a RA message with a prefix from veth2. + $NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60 + + # Wait for the RA message. + sleep 1 + + # systemd may mess up the test. Make sure that + # systemd-networkd.service and systemd-networkd.socket are stopped. + check_rt_num_clean 2 $($IP -6 route list|grep expires|wc -l) || return + + # Configure static address on the same prefix + $IP -6 address add 2001:12::dead/64 dev veth1 nodad + + # On-link route won't expire anymore, default route still owned by RA + check_rt_num 1 $($IP -6 route list |grep expires|wc -l) + + # Send a second RA message with a prefix from veth2. + $NS_EXEC ra6 -i veth2 -d 2001:10::1 -P 2001:12::/64\#LA\#120\#60 + sleep 1 + + # Expire is not back, on-link route is still static + check_rt_num 1 $($IP -6 route list |grep expires|wc -l) + + $IP -6 address del 2001:12::dead/64 dev veth1 nodad + + # Expire is back, on-link route is now owned by RA again + check_rt_num 2 $($IP -6 route list |grep expires|wc -l) + + log_test $ret 0 "ipv6 promote RA route to static" + + set +e + + cleanup &> /dev/null +} + # add route for a prefix, flushing any existing routes first # expected to be the first step of a test add_route() @@ -1737,53 +1829,53 @@ ipv4_rt_dsfield() # DSCP 0x10 should match the specific route, no matter the ECN bits $IP route get fibmatch 172.16.102.1 dsfield 0x10 | \ - grep -q "via 172.16.103.2" + grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2" log_test $? 0 "IPv4 route with DSCP and ECN:Not-ECT" $IP route get fibmatch 172.16.102.1 dsfield 0x11 | \ - grep -q "via 172.16.103.2" + grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2" log_test $? 0 "IPv4 route with DSCP and ECN:ECT(1)" $IP route get fibmatch 172.16.102.1 dsfield 0x12 | \ - grep -q "via 172.16.103.2" + grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2" log_test $? 0 "IPv4 route with DSCP and ECN:ECT(0)" $IP route get fibmatch 172.16.102.1 dsfield 0x13 | \ - grep -q "via 172.16.103.2" + grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2" log_test $? 0 "IPv4 route with DSCP and ECN:CE" # Unknown DSCP should match the generic route, no matter the ECN bits $IP route get fibmatch 172.16.102.1 dsfield 0x14 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with unknown DSCP and ECN:Not-ECT" $IP route get fibmatch 172.16.102.1 dsfield 0x15 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(1)" $IP route get fibmatch 172.16.102.1 dsfield 0x16 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(0)" $IP route get fibmatch 172.16.102.1 dsfield 0x17 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with unknown DSCP and ECN:CE" # Null DSCP should match the generic route, no matter the ECN bits $IP route get fibmatch 172.16.102.1 dsfield 0x00 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with no DSCP and ECN:Not-ECT" $IP route get fibmatch 172.16.102.1 dsfield 0x01 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(1)" $IP route get fibmatch 172.16.102.1 dsfield 0x02 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(0)" $IP route get fibmatch 172.16.102.1 dsfield 0x03 | \ - grep -q "via 172.16.101.2" + grep -q "172.16.102.0/24 via 172.16.101.2" log_test $? 0 "IPv4 route with no DSCP and ECN:CE" } @@ -2328,7 +2420,7 @@ ipv4_mangle_test() $IP route del table 123 172.16.101.0/24 dev veth1 $IP rule del pref 100 - { kill %% && wait %%; } 2>/dev/null + kill_process %% rm $tmp_file route_cleanup @@ -2386,7 +2478,7 @@ ipv6_mangle_test() $IP -6 route del table 123 2001:db8:101::/64 dev veth1 $IP -6 rule del pref 100 - { kill %% && wait %%; } 2>/dev/null + kill_process %% rm $tmp_file route_cleanup @@ -2531,9 +2623,6 @@ ipv4_mpath_list_test() run_cmd "ip -n $ns2 route add 203.0.113.0/24 nexthop via 172.16.201.2 nexthop via 172.16.202.2" run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" - run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" - run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" - run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" set +e local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]') @@ -2600,6 +2689,93 @@ ipv6_mpath_list_test() route_cleanup } +tc_set_flower_counter__saddr_syn() { + tc_set_flower_counter $1 $2 $3 "src_ip $4 ip_proto tcp tcp_flags 0x2" +} + +ip_mpath_balance_dep_check() +{ + if [ ! -x "$(command -v socat)" ]; then + echo "socat command not found. Skipping test" + return 1 + fi + + if [ ! -x "$(command -v jq)" ]; then + echo "jq command not found. Skipping test" + return 1 + fi +} + +ip_mpath_balance() { + local -r ipver=$1 + local -r daddr=$2 + local -r num_conn=20 + + for i in $(seq 1 $num_conn); do + ip netns exec $ns3 socat $ipver TCP-LISTEN:8000 STDIO >/dev/null & + sleep 0.02 + echo -n a | ip netns exec $ns1 socat $ipver STDIO TCP:$daddr:8000 + done + + local -r syn0="$(tc_get_flower_counter $ns1 veth1)" + local -r syn1="$(tc_get_flower_counter $ns1 veth3)" + local -r syns=$((syn0+syn1)) + + [ "$VERBOSE" = "1" ] && echo "multipath: syns seen: ($syn0,$syn1)" + + [[ $syns -ge $num_conn ]] && [[ $syn0 -gt 0 ]] && [[ $syn1 -gt 0 ]] +} + +ipv4_mpath_balance_test() +{ + echo + echo "IPv4 multipath load balance test" + + ip_mpath_balance_dep_check || return 1 + forwarding_setup + + $IP route add 172.16.105.1 \ + nexthop via 172.16.101.2 \ + nexthop via 172.16.103.2 + + ip netns exec $ns1 \ + sysctl -q -w net.ipv4.fib_multipath_hash_policy=1 + + tc_set_flower_counter__saddr_syn $ns1 4 veth1 172.16.101.1 + tc_set_flower_counter__saddr_syn $ns1 4 veth3 172.16.103.1 + + ip_mpath_balance -4 172.16.105.1 + + log_test $? 0 "IPv4 multipath loadbalance" + + forwarding_cleanup +} + +ipv6_mpath_balance_test() +{ + echo + echo "IPv6 multipath load balance test" + + ip_mpath_balance_dep_check || return 1 + forwarding_setup + + $IP route add 2001:db8:105::1\ + nexthop via 2001:db8:101::2 \ + nexthop via 2001:db8:103::2 + + ip netns exec $ns1 \ + sysctl -q -w net.ipv6.fib_multipath_hash_policy=1 + + tc_set_flower_counter__saddr_syn $ns1 6 veth1 2001:db8:101::1 + tc_set_flower_counter__saddr_syn $ns1 6 veth3 2001:db8:103::1 + + ip_mpath_balance -6 "[2001:db8:105::1]" + + log_test $? 0 "IPv6 multipath loadbalance" + + forwarding_cleanup +} + ################################################################################ # usage @@ -2683,6 +2859,9 @@ do fib6_gc_test|ipv6_gc) fib6_gc_test;; ipv4_mpath_list) ipv4_mpath_list_test;; ipv6_mpath_list) ipv6_mpath_list_test;; + ipv4_mpath_balance) ipv4_mpath_balance_test;; + ipv6_mpath_balance) ipv6_mpath_balance_test;; + fib6_ra_to_static) fib6_ra_to_static;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 224346426ef2..ff4a00d91a26 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_PROGS = bridge_fdb_learning_limit.sh \ +TEST_PROGS := \ + bridge_activity_notify.sh \ + bridge_fdb_learning_limit.sh \ + bridge_fdb_local_vlan_0.sh \ bridge_igmp.sh \ bridge_locked_port.sh \ bridge_mdb.sh \ @@ -18,64 +21,64 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ gre_custom_multipath_hash.sh \ gre_inner_v4_multipath.sh \ gre_inner_v6_multipath.sh \ - gre_multipath_nh_res.sh \ - gre_multipath_nh.sh \ gre_multipath.sh \ + gre_multipath_nh.sh \ + gre_multipath_nh_res.sh \ ip6_forward_instats_vrf.sh \ ip6gre_custom_multipath_hash.sh \ + ip6gre_flat.sh \ ip6gre_flat_key.sh \ ip6gre_flat_keys.sh \ - ip6gre_flat.sh \ + ip6gre_hier.sh \ ip6gre_hier_key.sh \ ip6gre_hier_keys.sh \ - ip6gre_hier.sh \ ip6gre_inner_v4_multipath.sh \ ip6gre_inner_v6_multipath.sh \ + ipip_flat_gre.sh \ ipip_flat_gre_key.sh \ ipip_flat_gre_keys.sh \ - ipip_flat_gre.sh \ + ipip_hier_gre.sh \ ipip_hier_gre_key.sh \ ipip_hier_gre_keys.sh \ - ipip_hier_gre.sh \ lib_sh_test.sh \ local_termination.sh \ min_max_mtu.sh \ + mirror_gre.sh \ mirror_gre_bound.sh \ mirror_gre_bridge_1d.sh \ mirror_gre_bridge_1d_vlan.sh \ - mirror_gre_bridge_1q_lag.sh \ mirror_gre_bridge_1q.sh \ + mirror_gre_bridge_1q_lag.sh \ mirror_gre_changes.sh \ mirror_gre_flower.sh \ mirror_gre_lag_lacp.sh \ mirror_gre_neigh.sh \ mirror_gre_nh.sh \ - mirror_gre.sh \ - mirror_gre_vlan_bridge_1q.sh \ mirror_gre_vlan.sh \ + mirror_gre_vlan_bridge_1q.sh \ mirror_vlan.sh \ no_forwarding.sh \ pedit_dsfield.sh \ pedit_ip.sh \ pedit_l4port.sh \ - q_in_vni_ipv6.sh \ q_in_vni.sh \ + q_in_vni_ipv6.sh \ + router.sh \ router_bridge.sh \ router_bridge_1d.sh \ router_bridge_1d_lag.sh \ router_bridge_lag.sh \ + router_bridge_pvid_vlan_upper.sh \ router_bridge_vlan.sh \ router_bridge_vlan_upper.sh \ - router_bridge_pvid_vlan_upper.sh \ router_bridge_vlan_upper_pvid.sh \ router_broadcast.sh \ - router_mpath_nh_res.sh \ router_mpath_nh.sh \ + router_mpath_nh_res.sh \ router_mpath_seed.sh \ router_multicast.sh \ router_multipath.sh \ router_nh.sh \ - router.sh \ router_vid_1.sh \ sch_ets.sh \ sch_red.sh \ @@ -85,30 +88,34 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ skbedit_priority.sh \ tc_actions.sh \ tc_chains.sh \ - tc_flower_router.sh \ tc_flower.sh \ - tc_flower_l2_miss.sh \ tc_flower_cfm.sh \ + tc_flower_l2_miss.sh \ tc_flower_port_range.sh \ + tc_flower_router.sh \ tc_mpls_l2vpn.sh \ tc_police.sh \ tc_shblocks.sh \ tc_tunnel_key.sh \ tc_vlan_modify.sh \ - vxlan_asymmetric_ipv6.sh \ vxlan_asymmetric.sh \ + vxlan_asymmetric_ipv6.sh \ + vxlan_bridge_1d.sh \ vxlan_bridge_1d_ipv6.sh \ - vxlan_bridge_1d_port_8472_ipv6.sh \ vxlan_bridge_1d_port_8472.sh \ - vxlan_bridge_1d.sh \ + vxlan_bridge_1d_port_8472_ipv6.sh \ + vxlan_bridge_1q.sh \ vxlan_bridge_1q_ipv6.sh \ - vxlan_bridge_1q_port_8472_ipv6.sh \ + vxlan_bridge_1q_mc_ul.sh \ vxlan_bridge_1q_port_8472.sh \ - vxlan_bridge_1q.sh \ + vxlan_bridge_1q_port_8472_ipv6.sh \ + vxlan_reserved.sh \ + vxlan_symmetric.sh \ vxlan_symmetric_ipv6.sh \ - vxlan_symmetric.sh +# end of TEST_PROGS -TEST_FILES := devlink_lib.sh \ +TEST_FILES := \ + devlink_lib.sh \ fib_offload_lib.sh \ forwarding.config.sample \ ip6gre_lib.sh \ @@ -123,9 +130,12 @@ TEST_FILES := devlink_lib.sh \ sch_ets_tests.sh \ sch_tbf_core.sh \ sch_tbf_etsprio.sh \ - tc_common.sh + tc_common.sh \ +# end of TEST_FILES TEST_INCLUDES := \ - ../lib.sh + $(wildcard ../lib/sh/*.sh) \ + ../lib.sh \ +# end of TEST_INCLUDES include ../../lib.mk diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README index 7fdb6a9ca543..392a5a91ed37 100644 --- a/tools/testing/selftests/net/forwarding/README +++ b/tools/testing/selftests/net/forwarding/README @@ -6,7 +6,7 @@ to easily create and test complex environments. Unfortunately, these namespaces can not be used with actual switching ASICs, as their ports can not be migrated to other network namespaces -(NETIF_F_NETNS_LOCAL) and most of them probably do not support the +(dev->netns_immutable) and most of them probably do not support the L1-separation provided by namespaces. However, a similar kind of flexibility can be achieved by using VRFs and @@ -57,6 +57,21 @@ o Code shall be checked using ShellCheck [1] prior to submission. 1. https://www.shellcheck.net/ +Cleanups +-------- + +o lib.sh brings in defer.sh (by way of ../lib.sh) by default. Consider + making use of the defer primitive to schedule automatic cleanups. This + makes it harder to forget to remove a temporary netdevice, kill a running + process or perform other cleanup when the test script is interrupted. + +o When adding a helper that dirties the environment, but schedules all + necessary cleanups through defer, consider prefixing it adf_ for + consistency with lib.sh and ../lib.sh helpers. This serves as an + immediately visible bit of documentation about the helper API. + +o Definitely do the above for any new code in lib.sh, if practical. + Customization ============= diff --git a/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh new file mode 100755 index 000000000000..522a5b1b046c --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh @@ -0,0 +1,170 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | 192.0.2.1/28 | | 192.0.2.2/28 | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1d) + $swp2 | | +# | | | | +# | +-----------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + new_inactive_test + existing_active_test + norefresh_test +" + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + adf_simple_if_init "$h1" 192.0.2.1/28 +} + +h2_create() +{ + adf_simple_if_init "$h2" 192.0.2.2/28 +} + +switch_create() +{ + adf_ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 \ + ageing_time "$LOW_AGEING_TIME" + adf_ip_link_set_up br1 + + adf_ip_link_set_master "$swp1" br1 + adf_ip_link_set_up "$swp1" + + adf_ip_link_set_master "$swp2" br1 + adf_ip_link_set_up "$swp2" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + adf_vrf_prepare + + h1_create + h2_create + switch_create +} + +fdb_active_wait() +{ + local mac=$1; shift + + bridge -d fdb get "$mac" br br1 | grep -q -v "inactive" +} + +fdb_inactive_wait() +{ + local mac=$1; shift + + bridge -d fdb get "$mac" br br1 | grep -q "inactive" +} + +new_inactive_test() +{ + local mac="00:11:22:33:44:55" + + # Add a new FDB entry as static and inactive and check that it + # becomes active upon traffic. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master static activity_notify inactive + bridge -d fdb get "$mac" br br1 | grep -q "inactive" + check_err $? "FDB entry not present as \"inactive\" when should" + + $MZ "$h1" -c 1 -p 64 -a "$mac" -b bcast -t ip -q + + busywait "$BUSYWAIT_TIMEOUT" fdb_active_wait "$mac" + check_err $? "FDB entry present as \"inactive\" when should not" + + log_test "Transition from inactive to active" + + bridge fdb del "$mac" dev "$swp1" master +} + +existing_active_test() +{ + local mac="00:11:22:33:44:55" + local ageing_time + + # Enable activity notifications on an existing dynamic FDB entry and + # check that it becomes inactive after the ageing time passed. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master dynamic + bridge fdb replace "$mac" dev "$swp1" master static activity_notify norefresh + + bridge -d fdb get "$mac" br br1 | grep -q "activity_notify" + check_err $? "FDB entry not present as \"activity_notify\" when should" + + bridge -d fdb get "$mac" br br1 | grep -q "inactive" + check_fail $? "FDB entry present as \"inactive\" when should not" + + ageing_time=$(bridge_ageing_time_get br1) + slowwait $((ageing_time * 2)) fdb_inactive_wait "$mac" + check_err $? "FDB entry not present as \"inactive\" when should" + + log_test "Transition from active to inactive" + + bridge fdb del "$mac" dev "$swp1" master +} + +norefresh_test() +{ + local mac="00:11:22:33:44:55" + local updated_time + + # Check that the "updated" time is reset when replacing an FDB entry + # without the "norefresh" keyword and that it is not reset when + # replacing with the "norefresh" keyword. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master static + sleep 1 + + bridge fdb replace "$mac" dev "$swp1" master static activity_notify + updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]') + if [[ $updated_time -ne 0 ]]; then + check_err 1 "\"updated\" time was not reset when should" + fi + + sleep 1 + bridge fdb replace "$mac" dev "$swp1" master static norefresh + updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]') + if [[ $updated_time -eq 0 ]]; then + check_err 1 "\"updated\" time was reset when should not" + fi + + log_test "Resetting of \"updated\" time" + + bridge fdb del "$mac" dev "$swp1" master +} + +if ! bridge fdb help 2>&1 | grep -q "activity_notify"; then + echo "SKIP: iproute2 too old, missing bridge FDB activity notification control" + exit "$ksft_skip" +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh index 0760a34b7114..a21b7085da2e 100755 --- a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh +++ b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh @@ -178,6 +178,22 @@ fdb_del() check_err $? "Failed to remove a FDB entry of type ${type}" } +check_fdb_n_learned_support() +{ + if ! ip link help bridge 2>&1 | grep -q "fdb_max_learned"; then + echo "SKIP: iproute2 too old, missing bridge max learned support" + exit $ksft_skip + fi + + ip link add dev br0 type bridge + local learned=$(fdb_get_n_learned) + ip link del dev br0 + if [ "$learned" == "null" ]; then + echo "SKIP: kernel too old; bridge fdb_n_learned feature not supported." + exit $ksft_skip + fi +} + check_accounting_one_type() { local type=$1 is_counted=$2 overrides_learned=$3 @@ -274,6 +290,8 @@ check_limit() done } +check_fdb_n_learned_support + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh new file mode 100755 index 000000000000..694de8ba97e4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_fdb_local_vlan_0.sh @@ -0,0 +1,387 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +-----------------------+ +-----------------------+ +# | H1 (vrf) | | H2 (vrf) | | H3 (vrf) | +# | + $h1 | | + $h2 | | + $h3 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | | | 192.0.2.18/28 | +# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 | | | 2001:db8:2::2/64 | +# | | | | | | | | | +# +----|------------------+ +----|------------------+ +----|------------------+ +# | | | +# +----|-------------------------|-------------------------|------------------+ +# | +--|-------------------------|------------------+ | | +# | | + $swp1 + $swp2 | + $swp3 | +# | | | 192.0.2.17/28 | +# | | BR1 (802.1q) | 2001:db8:2::1/64 | +# | | 192.0.2.3/28 | | +# | | 2001:db8:1::3/64 | | +# | +-----------------------------------------------+ SW | +# +---------------------------------------------------------------------------+ +# +#shellcheck disable=SC2317 # SC doesn't see our uses of functions. +#shellcheck disable=SC2034 # ... and global variables + +ALL_TESTS=" + test_d_no_sharing + test_d_sharing + test_q_no_sharing + test_q_sharing + test_addr_set +" + +NUM_NETIFS=6 +source lib.sh + +pMAC=00:11:22:33:44:55 +bMAC=00:11:22:33:44:66 +mMAC=00:11:22:33:44:77 +xMAC=00:11:22:33:44:88 + +host_create() +{ + local h=$1; shift + local ipv4=$1; shift + local ipv6=$1; shift + + adf_simple_if_init "$h" "$ipv4" "$ipv6" + adf_ip_route_add vrf "v$h" 192.0.2.16/28 nexthop via 192.0.2.3 + adf_ip_route_add vrf "v$h" 2001:db8:2::/64 nexthop via 2001:db8:1::3 +} + +h3_create() +{ + adf_simple_if_init "$h3" 192.0.2.18/28 2001:db8:2::2/64 + adf_ip_route_add vrf "v$h3" 192.0.2.0/28 nexthop via 192.0.2.17 + adf_ip_route_add vrf "v$h3" 2001:db8:1::/64 nexthop via 2001:db8:2::1 + + tc qdisc add dev "$h3" clsact + defer tc qdisc del dev "$h3" clsact + + tc filter add dev "$h3" ingress proto ip pref 104 \ + flower skip_hw ip_proto udp dst_port 4096 \ + action pass + defer tc filter del dev "$h3" ingress proto ip pref 104 + + tc qdisc add dev "$h2" clsact + defer tc qdisc del dev "$h2" clsact + + tc filter add dev "$h2" ingress proto ip pref 104 \ + flower skip_hw ip_proto udp dst_port 4096 \ + action pass + defer tc filter del dev "$h2" ingress proto ip pref 104 +} + +switch_create() +{ + adf_ip_link_set_up "$swp1" + + adf_ip_link_set_up "$swp2" + + adf_ip_addr_add "$swp3" 192.0.2.17/28 + adf_ip_addr_add "$swp3" 2001:db8:2::1/64 + adf_ip_link_set_up "$swp3" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + adf_vrf_prepare + adf_forwarding_enable + + host_create "$h1" 192.0.2.1/28 2001:db8:1::1/64 + host_create "$h2" 192.0.2.2/28 2001:db8:1::2/64 + h3_create + + switch_create +} + +adf_bridge_configure() +{ + local dev + + adf_ip_addr_add br 192.0.2.3/28 + adf_ip_addr_add br 2001:db8:1::3/64 + + adf_bridge_vlan_add dev br vid 1 pvid untagged self + adf_bridge_vlan_add dev br vid 2 self + adf_bridge_vlan_add dev br vid 3 self + + for dev in "$swp1" "$swp2"; do + adf_ip_link_set_master "$dev" br + adf_bridge_vlan_add dev "$dev" vid 1 pvid untagged + adf_bridge_vlan_add dev "$dev" vid 2 + adf_bridge_vlan_add dev "$dev" vid 3 + done +} + +adf_bridge_create() +{ + local mac + + adf_ip_link_add br up type bridge vlan_default_pvid 0 "$@" + mac=$(mac_get br) + adf_bridge_configure + adf_ip_link_set_addr br "$mac" +} + +check_fdb_local_vlan_0_support() +{ + if adf_ip_link_add XXbr up type bridge vlan_filtering 1 \ + fdb_local_vlan_0 1 &>/dev/null; then + return 0 + fi + + log_test_skip "FDB sharing" \ + "iproute 2 or the kernel do not support fdb_local_vlan_0" +} + +check_mac_presence() +{ + local should_fail=$1; shift + local dev=$1; shift + local vlan=$1; shift + local mac + + mac=$(mac_get "$dev") + + if ((vlan == 0)); then + vlan=null + fi + + bridge -j fdb show dev "$dev" | + jq -e --arg mac "$mac" --argjson vlan "$vlan" \ + '.[] | select(.mac == $mac) | select(.vlan == $vlan)' > /dev/null + check_err_fail "$should_fail" $? "FDB dev $dev vid $vlan addr $mac exists" +} + +do_sharing_test() +{ + local should_fail=$1; shift + local what=$1; shift + local dev + + RET=0 + + for dev in "$swp1" "$swp2" br; do + check_mac_presence 0 "$dev" 0 + check_mac_presence "$should_fail" "$dev" 1 + check_mac_presence "$should_fail" "$dev" 2 + check_mac_presence "$should_fail" "$dev" 3 + done + + log_test "$what" +} + +do_end_to_end_test() +{ + local mac=$1; shift + local what=$1; shift + local probe_dev=${1-$h3}; shift + local expect=${1-10}; shift + + local t0 + local t1 + local dd + + RET=0 + + # In mausezahn, use $dev MAC as the destination MAC. In the MAC sharing + # context, that will cause an FDB miss on VLAN 1 and prompt a second + # lookup in VLAN 0. + + t0=$(tc_rule_stats_get "$probe_dev" 104 ingress) + + $MZ "$h1" -c 10 -p 64 -a own -b "$mac" \ + -A 192.0.2.1 -B 192.0.2.18 -t udp "dp=4096,sp=2048" -q + sleep 1 + + t1=$(tc_rule_stats_get "$probe_dev" 104 ingress) + dd=$((t1 - t0)) + + ((dd == expect)) + check_err $? "Expected $expect packets on $probe_dev got $dd" + + log_test "$what" +} + +do_tests() +{ + local should_fail=$1; shift + local what=$1; shift + local swp1_mac + local br_mac + + swp1_mac=$(mac_get "$swp1") + br_mac=$(mac_get br) + + do_sharing_test "$should_fail" "$what" + do_end_to_end_test "$swp1_mac" "$what: end to end, $swp1 MAC" + do_end_to_end_test "$br_mac" "$what: end to end, br MAC" +} + +bridge_standard() +{ + local vlan_filtering=$1; shift + + if ((vlan_filtering)); then + echo 802.1q + else + echo 802.1d + fi +} + +nonexistent_fdb_test() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + # We expect flooding, so $h2 should get the traffic. + do_end_to_end_test "$xMAC" "$standard: Nonexistent FDB" "$h2" +} + +misleading_fdb_test() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + defer_scope_push + # Add an FDB entry on VLAN 0. The lookup on VLAN-aware bridge + # shouldn't pick this up even with fdb_local_vlan_0 enabled, so + # the traffic should be flooded. This all holds on + # vlan_filtering bridge, on non-vlan_filtering one the FDB entry + # is expected to be found as usual, no flooding takes place. + # + # Adding only on VLAN 0 is a bit tricky, because bridge is + # trying to be nice and interprets the request as if the FDB + # should be added on each VLAN. + + bridge fdb add "$mMAC" dev "$swp1" master + bridge fdb del "$mMAC" dev "$swp1" vlan 1 master + bridge fdb del "$mMAC" dev "$swp1" vlan 2 master + bridge fdb del "$mMAC" dev "$swp1" vlan 3 master + + local expect=$((vlan_filtering ? 10 : 0)) + do_end_to_end_test "$mMAC" \ + "$standard: Lookup of non-local MAC on VLAN 0" \ + "$h2" "$expect" + defer_scope_pop +} + +change_mac() +{ + local dev=$1; shift + local mac=$1; shift + local cur_mac + + cur_mac=$(mac_get "$dev") + + log_info "Change $dev MAC $cur_mac -> $mac" + adf_ip_link_set_addr "$dev" "$mac" + defer log_info "Change $dev MAC back" +} + +do_test_no_sharing() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + adf_bridge_create vlan_filtering "$vlan_filtering" + setup_wait + + do_tests 0 "$standard, no FDB sharing" + + change_mac "$swp1" "$pMAC" + change_mac br "$bMAC" + + do_tests 0 "$standard, no FDB sharing after MAC change" + + in_defer_scope check_fdb_local_vlan_0_support || return + + log_info "Set fdb_local_vlan_0=1" + ip link set dev br type bridge fdb_local_vlan_0 1 + + do_tests 1 "$standard, fdb sharing after toggle" +} + +do_test_sharing() +{ + local vlan_filtering=$1; shift + local standard + + standard=$(bridge_standard "$vlan_filtering") + + in_defer_scope check_fdb_local_vlan_0_support || return + + adf_bridge_create vlan_filtering "$vlan_filtering" fdb_local_vlan_0 1 + setup_wait + + do_tests 1 "$standard, FDB sharing" + + nonexistent_fdb_test "$vlan_filtering" + misleading_fdb_test "$vlan_filtering" + + change_mac "$swp1" "$pMAC" + change_mac br "$bMAC" + + do_tests 1 "$standard, FDB sharing after MAC change" + + log_info "Set fdb_local_vlan_0=0" + ip link set dev br type bridge fdb_local_vlan_0 0 + + do_tests 0 "$standard, No FDB sharing after toggle" +} + +test_d_no_sharing() +{ + do_test_no_sharing 0 +} + +test_d_sharing() +{ + do_test_sharing 0 +} + +test_q_no_sharing() +{ + do_test_no_sharing 1 +} + +test_q_sharing() +{ + do_test_sharing 1 +} + +adf_addr_set_bridge_create() +{ + adf_ip_link_add br up type bridge vlan_filtering 0 + adf_ip_link_set_addr br "$(mac_get br)" + adf_bridge_configure +} + +test_addr_set() +{ + adf_addr_set_bridge_create + setup_wait + + do_end_to_end_test "$(mac_get br)" "NET_ADDR_SET: end to end, br MAC" +} + +trap cleanup EXIT + +setup_prepare +tests_run diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh index e6a3e04fd83f..d4e7dd659354 100755 --- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh +++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh @@ -1,10 +1,24 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \ - v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \ - v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \ - v3exc_timeout_test v3star_ex_auto_add_test" +ALL_TESTS=" + v2reportleave_test + v3include_test + v3inc_allow_test + v3inc_is_include_test + v3inc_is_exclude_test + v3inc_to_exclude_test + v3exc_allow_test + v3exc_is_include_test + v3exc_is_exclude_test + v3exc_to_exclude_test + v3inc_block_test + v3exc_block_test + v3exc_timeout_test + v3star_ex_auto_add_test + v2per_vlan_snooping_port_stp_test + v2per_vlan_snooping_vlan_stp_test +" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="239.10.10.10" @@ -554,6 +568,64 @@ v3star_ex_auto_add_test() v3cleanup $swp2 $TEST_GROUP } +v2per_vlan_snooping_stp_test() +{ + local is_port=$1 + + local msg="port" + [[ $is_port -ne 1 ]] && msg="vlan" + + ip link set br0 up type bridge vlan_filtering 1 \ + mcast_igmp_version 2 \ + mcast_snooping 1 \ + mcast_vlan_snooping 1 \ + mcast_querier 1 \ + mcast_stats_enabled 1 + bridge vlan global set vid 1 dev br0 \ + mcast_snooping 1 \ + mcast_querier 1 \ + mcast_query_interval 100 \ + mcast_startup_query_count 0 + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4 + sleep 5 + local tx_s=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]') + + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3 + sleep 5 + local tx_e=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]') + + RET=0 + local tx=$(expr $tx_e - $tx_s) + test $tx -gt 0 + check_err $? "No IGMP queries after STP state becomes forwarding" + log_test "per vlan snooping with $msg stp state change" + + # restore settings + bridge vlan global set vid 1 dev br0 \ + mcast_querier 0 \ + mcast_query_interval 12500 \ + mcast_startup_query_count 2 + ip link set br0 up type bridge vlan_filtering 0 \ + mcast_vlan_snooping 0 \ + mcast_stats_enabled 0 +} + +v2per_vlan_snooping_port_stp_test() +{ + v2per_vlan_snooping_stp_test 1 +} + +v2per_vlan_snooping_vlan_stp_test() +{ + v2per_vlan_snooping_stp_test 0 +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index d9d587454d20..e86d77946585 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -28,6 +28,7 @@ ALL_TESTS=" cfg_test fwd_test ctrl_test + disable_test " NUM_NETIFS=4 @@ -64,7 +65,10 @@ h2_destroy() switch_create() { - ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \ + local vlan_filtering=$1; shift + + ip link add name br0 type bridge \ + vlan_filtering "$vlan_filtering" vlan_default_pvid 0 \ mcast_snooping 1 mcast_igmp_version 3 mcast_mld_version 2 bridge vlan add vid 10 dev br0 self bridge vlan add vid 20 dev br0 self @@ -118,7 +122,7 @@ setup_prepare() h1_create h2_create - switch_create + switch_create 1 } cleanup() @@ -149,7 +153,7 @@ cfg_test_host_common() check_err $? "Failed to add $name host entry" bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null - check_fail $? "Managed to replace $name host entry" + check_err $? "Failed to replace $name host entry" bridge mdb del dev br0 port br0 grp $grp $state vid 10 bridge mdb get dev br0 grp $grp vid 10 &> /dev/null @@ -1357,6 +1361,98 @@ ctrl_test() ctrl_mldv2_is_in_test } +check_group() +{ + local group=$1; shift + local vid=$1; shift + local should_fail=$1; shift + local when=$1; shift + local -a vidkws + + if ((vid)); then + vidkws=(vid "$vid") + fi + + bridge mdb get dev br0 grp "$group" "${vidkws[@]}" 2>/dev/null | + grep -q "port $swp1" + check_err_fail "$should_fail" $? "$group seen $when snooping disable:" +} + +__disable_test() +{ + local vid=$1; shift + local what=$1; shift + local -a vidkws + + if ((vid)); then + vidkws=(vid "$vid") + fi + + RET=0 + + bridge mdb add dev br0 port "$swp1" grp ff0e::1 permanent \ + "${vidkws[@]}" filter_mode include source_list 2001:db8:1::1 + bridge mdb add dev br0 port "$swp1" grp ff0e::2 permanent \ + "${vidkws[@]}" filter_mode exclude + + bridge mdb add dev br0 port "$swp1" grp ff0e::3 \ + "${vidkws[@]}" filter_mode include source_list 2001:db8:1::2 + bridge mdb add dev br0 port "$swp1" grp ff0e::4 \ + "${vidkws[@]}" filter_mode exclude + + bridge mdb add dev br0 port "$swp1" grp 239.1.1.1 permanent \ + "${vidkws[@]}" filter_mode include source_list 192.0.2.1 + bridge mdb add dev br0 port "$swp1" grp 239.1.1.2 permanent \ + "${vidkws[@]}" filter_mode exclude + + bridge mdb add dev br0 port "$swp1" grp 239.1.1.3 \ + "${vidkws[@]}" filter_mode include source_list 192.0.2.2 + bridge mdb add dev br0 port "$swp1" grp 239.1.1.4 \ + "${vidkws[@]}" filter_mode exclude + + check_group ff0e::1 "$vid" 0 "before" + check_group ff0e::2 "$vid" 0 "before" + check_group ff0e::3 "$vid" 0 "before" + check_group ff0e::4 "$vid" 0 "before" + + check_group 239.1.1.1 "$vid" 0 "before" + check_group 239.1.1.2 "$vid" 0 "before" + check_group 239.1.1.3 "$vid" 0 "before" + check_group 239.1.1.4 "$vid" 0 "before" + + ip link set dev br0 type bridge mcast_snooping 0 + + check_group ff0e::1 "$vid" 0 "after" + check_group ff0e::2 "$vid" 0 "after" + check_group ff0e::3 "$vid" 1 "after" + check_group ff0e::4 "$vid" 1 "after" + + check_group 239.1.1.1 "$vid" 0 "after" + check_group 239.1.1.2 "$vid" 0 "after" + check_group 239.1.1.3 "$vid" 1 "after" + check_group 239.1.1.4 "$vid" 1 "after" + + log_test "$what: Flush after disable" + + ip link set dev br0 type bridge mcast_snooping 1 + sleep 10 +} + +disable_test() +{ + __disable_test 10 802.1q + + switch_destroy + switch_create 0 + setup_wait + + __disable_test 0 802.1d + + switch_destroy + switch_create 1 + setup_wait +} + if ! bridge mdb help 2>&1 | grep -q "flush"; then echo "SKIP: iproute2 too old, missing bridge mdb flush support" exit $ksft_skip diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh index f84ab2e65754..4cacef5a813a 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mld.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh @@ -1,10 +1,23 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \ - mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \ - mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \ - mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test" +ALL_TESTS=" + mldv2include_test + mldv2inc_allow_test + mldv2inc_is_include_test + mldv2inc_is_exclude_test + mldv2inc_to_exclude_test + mldv2exc_allow_test + mldv2exc_is_include_test + mldv2exc_is_exclude_test + mldv2exc_to_exclude_test + mldv2inc_block_test + mldv2exc_block_test + mldv2exc_timeout_test + mldv2star_ex_auto_add_test + mldv2per_vlan_snooping_port_stp_test + mldv2per_vlan_snooping_vlan_stp_test +" NUM_NETIFS=4 CHECK_TC="yes" TEST_GROUP="ff02::cc" @@ -554,6 +567,66 @@ mldv2star_ex_auto_add_test() mldv2cleanup $swp2 } +mldv2per_vlan_snooping_stp_test() +{ + local is_port=$1 + + local msg="port" + [[ $is_port -ne 1 ]] && msg="vlan" + + ip link set br0 up type bridge vlan_filtering 1 \ + mcast_mld_version 2 \ + mcast_snooping 1 \ + mcast_vlan_snooping 1 \ + mcast_querier 1 \ + mcast_stats_enabled 1 + bridge vlan global set vid 1 dev br0 \ + mcast_mld_version 2 \ + mcast_snooping 1 \ + mcast_querier 1 \ + mcast_query_interval 100 \ + mcast_startup_query_count 0 + + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4 + sleep 5 + local tx_s=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["mld_queries"]["tx_v2"]') + [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3 + [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3 + sleep 5 + local tx_e=$(ip -j -p stats show dev $swp1 \ + group xstats_slave subgroup bridge suite mcast \ + | jq '.[]["multicast"]["mld_queries"]["tx_v2"]') + + RET=0 + local tx=$(expr $tx_e - $tx_s) + test $tx -gt 0 + check_err $? "No MLD queries after STP state becomes forwarding" + log_test "per vlan snooping with $msg stp state change" + + # restore settings + bridge vlan global set vid 1 dev br0 \ + mcast_querier 0 \ + mcast_query_interval 12500 \ + mcast_startup_query_count 2 \ + mcast_mld_version 1 + ip link set br0 up type bridge vlan_filtering 0 \ + mcast_vlan_snooping 0 \ + mcast_stats_enabled 0 +} + +mldv2per_vlan_snooping_port_stp_test() +{ + mldv2per_vlan_snooping_stp_test 1 +} + +mldv2per_vlan_snooping_vlan_stp_test() +{ + mldv2per_vlan_snooping_stp_test 0 +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh index 64bd00fe9a4f..e59fba366a0a 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid 8021p drop_untagged" NUM_NETIFS=4 CHECK_TC="yes" source lib.sh @@ -142,6 +142,152 @@ extern_learn() bridge fdb del de:ad:be:ef:13:37 dev $swp1 master vlan 1 &> /dev/null } +other_tpid() +{ + local mac=de:ad:be:ef:13:37 + + # Test that packets with TPID 802.1ad VID 3 + TPID 802.1Q VID 5 are + # classified as untagged by a bridge with vlan_protocol 802.1Q, and + # are processed in the PVID of the ingress port (here 1). Not VID 3, + # and not VID 5. + RET=0 + + tc qdisc add dev $h2 clsact + tc filter add dev $h2 ingress protocol all pref 1 handle 101 \ + flower dst_mac $mac action drop + ip link set $h2 promisc on + ethtool -K $h2 rx-vlan-filter off rx-vlan-stag-filter off + + $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + # Match on 'self' addresses as well, for those drivers which + # do not push their learned addresses to the bridge software + # database + bridge -j fdb show $swp1 | \ + jq -e ".[] | select(.mac == \"$(mac_get $h1)\") | select(.vlan == 1)" &> /dev/null + check_err $? "FDB entry was not learned when it should" + + log_test "FDB entry in PVID for VLAN-tagged with other TPID" + + RET=0 + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet was not forwarded when it should" + log_test "Reception of VLAN with other TPID as untagged" + + bridge vlan del dev $swp1 vid 1 + + $MZ -q $h1 -c 1 -b $mac -a own "88:a8 00:03 81:00 00:05 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + RET=0 + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err $? "Packet was forwarded when should not" + log_test "Reception of VLAN with other TPID as untagged (no PVID)" + + bridge vlan add dev $swp1 vid 1 pvid untagged + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + +8021p_do() +{ + local should_fail=$1; shift + local mac=de:ad:be:ef:13:37 + + tc filter add dev $h2 ingress protocol all pref 1 handle 101 \ + flower dst_mac $mac action drop + + $MZ -q $h1 -c 1 -b $mac -a own "81:00 00:00 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa" + sleep 1 + + tc -j -s filter show dev $h2 ingress \ + | jq -e ".[] | select(.options.handle == 101) \ + | select(.options.actions[0].stats.packets == 1)" &> /dev/null + check_err_fail $should_fail $? "802.1p-tagged reception" + + tc filter del dev $h2 ingress pref 1 +} + +8021p() +{ + RET=0 + + tc qdisc add dev $h2 clsact + ip link set $h2 promisc on + + # Test that with the default_pvid, 1, packets tagged with VID 0 are + # accepted. + 8021p_do 0 + + # Test that packets tagged with VID 0 are still accepted after changing + # the default_pvid. + ip link set br0 type bridge vlan_default_pvid 10 + 8021p_do 0 + + log_test "Reception of 802.1p-tagged traffic" + + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + +send_untagged_and_8021p() +{ + ping_do $h1 192.0.2.2 + check_fail $? + + 8021p_do 1 +} + +drop_untagged() +{ + RET=0 + + tc qdisc add dev $h2 clsact + ip link set $h2 promisc on + + # Test that with no PVID, untagged and 802.1p-tagged traffic is + # dropped. + ip link set br0 type bridge vlan_default_pvid 1 + + # First we reconfigure the default_pvid, 1, as a non-PVID VLAN. + bridge vlan add dev $swp1 vid 1 untagged + send_untagged_and_8021p + bridge vlan add dev $swp1 vid 1 pvid untagged + + # Next we try to delete VID 1 altogether + bridge vlan del dev $swp1 vid 1 + send_untagged_and_8021p + bridge vlan add dev $swp1 vid 1 pvid untagged + + # Set up the bridge without a default_pvid, then check that the 8021q + # module, when the bridge port goes down and then up again, does not + # accidentally re-enable untagged packet reception. + ip link set br0 type bridge vlan_default_pvid 0 + ip link set $swp1 down + ip link set $swp1 up + setup_wait + send_untagged_and_8021p + + # Remove swp1 as a bridge port and let it rejoin the bridge while it + # has no default_pvid. + ip link set $swp1 nomaster + ip link set $swp1 master br0 + send_untagged_and_8021p + + # Restore settings + ip link set br0 type bridge vlan_default_pvid 1 + + log_test "Dropping of untagged and 802.1p-tagged traffic with no PVID" + + ip link set $h2 promisc off + tc qdisc del dev $h2 clsact +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh index 1c8a26046589..2b5700b61ffa 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh @@ -1,7 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding" +ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding pvid_change" NUM_NETIFS=4 source lib.sh @@ -77,12 +77,16 @@ cleanup() ping_ipv4() { - ping_test $h1 192.0.2.2 + local msg=$1 + + ping_test $h1 192.0.2.2 "$msg" } ping_ipv6() { - ping6_test $h1 2001:db8:1::2 + local msg=$1 + + ping6_test $h1 2001:db8:1::2 "$msg" } learning() @@ -95,6 +99,21 @@ flooding() flood_test $swp2 $h1 $h2 } +pvid_change() +{ + # Test that the changing of the VLAN-aware PVID does not affect + # VLAN-unaware forwarding + bridge vlan add vid 3 dev $swp1 pvid untagged + + ping_ipv4 " with bridge port $swp1 PVID changed" + ping_ipv6 " with bridge port $swp1 PVID changed" + + bridge vlan del vid 3 dev $swp1 + + ping_ipv4 " with bridge port $swp1 PVID deleted" + ping_ipv6 " with bridge port $swp1 PVID deleted" +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 8d7a1a004b7c..ce64518aaa11 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -1,23 +1,23 @@ +CONFIG_BPF_SYSCALL=y CONFIG_BRIDGE=m -CONFIG_VLAN_8021Q=m +CONFIG_BRIDGE_IGMP_SNOOPING=y CONFIG_BRIDGE_VLAN_FILTERING=y -CONFIG_NET_L3_MASTER_DEV=y -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_NET_VRF=m -CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y CONFIG_DUMMY=m +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y CONFIG_IPV6=y CONFIG_IPV6_GRE=m CONFIG_IPV6_MROUTE=y CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y +CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6_PIMSM_V2=y -CONFIG_IP_MROUTE=y -CONFIG_IP_MROUTE_MULTIPLE_TABLES=y -CONFIG_IP_PIMSM_V1=y -CONFIG_IP_PIMSM_V2=y CONFIG_MACVLAN=m +CONFIG_NAMESPACES=y CONFIG_NET_ACT_CT=m +CONFIG_NET_ACT_GACT=m CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_MPLS=m CONFIG_NET_ACT_PEDIT=m @@ -26,29 +26,30 @@ CONFIG_NET_ACT_SAMPLE=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_VLAN=m +CONFIG_NET_CLS_BASIC=m CONFIG_NET_CLS_FLOWER=m CONFIG_NET_CLS_MATCHALL=m -CONFIG_NET_CLS_BASIC=m CONFIG_NET_EMATCH=y CONFIG_NET_EMATCH_META=m +CONFIG_NETFILTER=y CONFIG_NET_IPGRE=m CONFIG_NET_IPGRE_DEMUX=m CONFIG_NET_IPIP=m +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_NS=y CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_INGRESS=m -CONFIG_NET_ACT_GACT=m CONFIG_NET_SCH_PRIO=m CONFIG_NET_SCH_RED=m CONFIG_NET_SCH_TBF=m CONFIG_NET_TC_SKB_EXT=y CONFIG_NET_TEAM=y CONFIG_NET_TEAM_MODE_LOADBALANCE=y -CONFIG_NETFILTER=y +CONFIG_NET_VRF=m CONFIG_NF_CONNTRACK=m CONFIG_NF_FLOW_TABLE=m CONFIG_NF_TABLES=m CONFIG_VETH=m -CONFIG_NAMESPACES=y -CONFIG_NET_NS=y +CONFIG_VLAN_8021Q=m CONFIG_VXLAN=m CONFIG_XFRM_USER=m diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh index 1783c10215e5..5dbfab0e23e3 100755 --- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh @@ -224,10 +224,10 @@ send_dst_ipv6() send_flowlabel() { # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec v$h1 \ - $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1 - done + ip vrf exec v$h1 sh -c \ + "for _ in {1..16384}; do \ + $PING6 -F 0 -c 1 -q 2001:db8:4::2 >/dev/null 2>&1; \ + done" } send_src_udp6() diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index f1de525cfa55..18afa89ebbcc 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -122,6 +122,8 @@ devlink_reload() still_pending=$(devlink resource show "$DEVLINK_DEV" | \ grep -c "size_new") check_err $still_pending "Failed reload - There are still unset sizes" + + udevadm settle } declare -A DEVLINK_ORIG @@ -499,7 +501,7 @@ devlink_trap_drop_cleanup() local pref=$1; shift local handle=$1; shift - kill $mz_pid && wait $mz_pid &> /dev/null + kill_process $mz_pid tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower } diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh index 9788bd0f6e8b..b4f17a5bbc61 100755 --- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh @@ -319,10 +319,10 @@ send_dst_ipv6() send_flowlabel() { # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec v$h1 \ - $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 - done + ip vrf exec v$h1 sh -c \ + "for _ in {1..16384}; do \ + $PING6 -F 0 -c 1 -q 2001:db8:2::2 >/dev/null 2>&1; \ + done" } send_src_udp6() diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh index 49fa94b53a1c..25036e38043c 100755 --- a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh +++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh @@ -95,7 +95,7 @@ ipv6_in_too_big_err() # Send too big packets ip vrf exec $vrf_name \ - $PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + $PING6 -s 1300 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors) test "$((t1 - t0))" -ne 0 @@ -131,7 +131,7 @@ ipv6_in_addr_err() # Disable forwarding temporary while sending the packet sysctl -qw net.ipv6.conf.all.forwarding=0 ip vrf exec $vrf_name \ - $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + $PING6 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null sysctl -qw net.ipv6.conf.all.forwarding=1 local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors) @@ -150,7 +150,7 @@ ipv6_in_discard() # Add a policy to discard ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block ip vrf exec $vrf_name \ - $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null + $PING6 -c 1 -w $PING_TIMEOUT 2001:1:2::2 &> /dev/null ip xfrm policy del dst 2001:1:2::2/128 dir fwd local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards) diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh index 2ab9eaaa5532..b24acfa52a3a 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh @@ -321,10 +321,10 @@ send_dst_ipv6() send_flowlabel() { # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec v$h1 \ - $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 - done + ip vrf exec v$h1 sh -c \ + "for _ in {1..16384}; do \ + $PING6 -F 0 -c 1 -q 2001:db8:2::2 >/dev/null 2>&1; \ + done" } send_src_udp6() diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh index 96c97064f2d3..becc7c3fc809 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_flat.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_flat gre_mtu_change + gre_flat_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change } +gre_flat_remote_change() +{ + flat_remote_change + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 (new remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 (new remote)" + + flat_remote_restore + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 (old remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh index ff9fb0db9bd1..e5335116a2fd 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_flat gre_mtu_change + gre_flat_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change } +gre_flat_remote_change() +{ + flat_remote_change + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with key (new remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with key (new remote)" + + flat_remote_restore + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with key (old remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with key (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh index 12c138785242..7e0cbfdefab0 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_flat gre_mtu_change + gre_flat_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change gre } +gre_flat_remote_change() +{ + flat_remote_change + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with ikey/okey (new remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with ikey/okey (new remote)" + + flat_remote_restore + + test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with ikey/okey (old remote)" + test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with ikey/okey (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh index 83b55c30a5c3..e0844495f3d1 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_hier.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_hier gre_mtu_change + gre_hier_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change gre } +gre_hier_remote_change() +{ + hier_remote_change + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 (new remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 (new remote)" + + hier_remote_restore + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 (old remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh index 256607916d92..741bc9c928eb 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_hier gre_mtu_change + gre_hier_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change gre } +gre_hier_remote_change() +{ + hier_remote_change + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with key (new remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with key (new remote)" + + hier_remote_restore + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with key (old remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with key (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh index ad1bcd6334a8..ad9eab4b1367 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh @@ -8,6 +8,7 @@ ALL_TESTS=" gre_hier gre_mtu_change + gre_hier_remote_change " NUM_NETIFS=6 @@ -44,6 +45,19 @@ gre_mtu_change() test_mtu_change gre } +gre_hier_remote_change() +{ + hier_remote_change + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with ikey/okey (new remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with ikey/okey (new remote)" + + hier_remote_restore + + test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with ikey/okey (old remote)" + test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with ikey/okey (old remote)" +} + cleanup() { pre_cleanup diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh index 24f4ab328bd2..2d91281dc5b7 100644 --- a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh @@ -436,3 +436,83 @@ test_mtu_change() check_err $? log_test "ping GRE IPv6, packet size 1800 after MTU change" } + +topo_flat_remote_change() +{ + local old1=$1; shift + local new1=$1; shift + local old2=$1; shift + local new2=$1; shift + + ip link set dev g1a type ip6gre local $new1 remote $new2 + __addr_add_del g1a add "$new1/128" + __addr_add_del g1a del "$old1/128" + ip -6 route add $new2/128 via 2001:db8:10::2 + ip -6 route del $old2/128 + + ip link set dev g2a type ip6gre local $new2 remote $new1 + __addr_add_del g2a add "$new2/128" + __addr_add_del g2a del "$old2/128" + ip -6 route add vrf v$ol2 $new1/128 via 2001:db8:10::1 + ip -6 route del vrf v$ol2 $old1/128 +} + +flat_remote_change() +{ + local old1=2001:db8:3::1 + local new1=2001:db8:3::10 + local old2=2001:db8:3::2 + local new2=2001:db8:3::20 + + topo_flat_remote_change $old1 $new1 $old2 $new2 +} + +flat_remote_restore() +{ + local old1=2001:db8:3::10 + local new1=2001:db8:3::1 + local old2=2001:db8:3::20 + local new2=2001:db8:3::2 + + topo_flat_remote_change $old1 $new1 $old2 $new2 +} + +topo_hier_remote_change() +{ + local old1=$1; shift + local new1=$1; shift + local old2=$1; shift + local new2=$1; shift + + __addr_add_del dummy1 del "$old1/64" + __addr_add_del dummy1 add "$new1/64" + ip link set dev g1a type ip6gre local $new1 remote $new2 + ip -6 route add vrf v$ul1 $new2/128 via 2001:db8:10::2 + ip -6 route del vrf v$ul1 $old2/128 + + __addr_add_del dummy2 del "$old2/64" + __addr_add_del dummy2 add "$new2/64" + ip link set dev g2a type ip6gre local $new2 remote $new1 + ip -6 route add vrf v$ul2 $new1/128 via 2001:db8:10::1 + ip -6 route del vrf v$ul2 $old1/128 +} + +hier_remote_change() +{ + local old1=2001:db8:3::1 + local new1=2001:db8:3::10 + local old2=2001:db8:3::2 + local new2=2001:db8:3::20 + + topo_hier_remote_change $old1 $new1 $old2 $new2 +} + +hier_remote_restore() +{ + local old1=2001:db8:3::10 + local new1=2001:db8:3::1 + local old2=2001:db8:3::20 + local new2=2001:db8:3::2 + + topo_hier_remote_change $old1 $new1 $old2 $new2 +} diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index ff96bb7535ff..a9034f0bb58b 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -37,6 +37,7 @@ declare -A NETIFS=( : "${TEAMD:=teamd}" : "${MCD:=smcrouted}" : "${MC_CLI:=smcroutectl}" +: "${MCD_TABLE_NAME:=selftests}" # Constants for netdevice bring-up: # Default time in seconds to wait for an interface to come up before giving up @@ -48,7 +49,6 @@ declare -A NETIFS=( : "${WAIT_TIME:=5}" # Whether to pause on, respectively, after a failure and before cleanup. -: "${PAUSE_ON_FAIL:=no}" : "${PAUSE_ON_CLEANUP:=no}" # Whether to create virtual interfaces, and what netdevice type they should be. @@ -69,6 +69,7 @@ declare -A NETIFS=( : "${REQUIRE_JQ:=yes}" : "${REQUIRE_MZ:=yes}" : "${REQUIRE_MTOOLS:=no}" +: "${REQUIRE_TEAMD:=no}" # Whether to override MAC addresses on interfaces participating in the test. : "${STABLE_MAC_ADDRS:=no}" @@ -141,6 +142,20 @@ check_tc_version() fi } +check_tc_erspan_support() +{ + local dev=$1; shift + + tc filter add dev $dev ingress pref 1 handle 1 flower \ + erspan_opts 1:0:0:0 &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing erspan support" + return $ksft_skip + fi + tc filter del dev $dev ingress pref 1 handle 1 flower \ + erspan_opts 1:0:0:0 &> /dev/null +} + # Old versions of tc don't understand "mpls_uc" check_tc_mpls_support() { @@ -291,16 +306,6 @@ if [[ "$CHECK_TC" = "yes" ]]; then check_tc_version fi -require_command() -{ - local cmd=$1; shift - - if [[ ! -x "$(command -v "$cmd")" ]]; then - echo "SKIP: $cmd not installed" - exit $ksft_skip - fi -} - # IPv6 support was added in v3.0 check_mtools_version() { @@ -322,6 +327,9 @@ fi if [[ "$REQUIRE_MZ" = "yes" ]]; then require_command $MZ fi +if [[ "$REQUIRE_TEAMD" = "yes" ]]; then + require_command $TEAMD +fi if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then # https://github.com/troglobit/mtools require_command msend @@ -446,179 +454,6 @@ done ############################################################################## # Helpers -# Exit status to return at the end. Set in case one of the tests fails. -EXIT_STATUS=0 -# Per-test return value. Clear at the beginning of each test. -RET=0 - -ret_set_ksft_status() -{ - local ksft_status=$1; shift - local msg=$1; shift - - RET=$(ksft_status_merge $RET $ksft_status) - if (( $? )); then - retmsg=$msg - fi -} - -# Whether FAILs should be interpreted as XFAILs. Internal. -FAIL_TO_XFAIL= - -check_err() -{ - local err=$1 - local msg=$2 - - if ((err)); then - if [[ $FAIL_TO_XFAIL = yes ]]; then - ret_set_ksft_status $ksft_xfail "$msg" - else - ret_set_ksft_status $ksft_fail "$msg" - fi - fi -} - -check_fail() -{ - local err=$1 - local msg=$2 - - check_err $((!err)) "$msg" -} - -check_err_fail() -{ - local should_fail=$1; shift - local err=$1; shift - local what=$1; shift - - if ((should_fail)); then - check_fail $err "$what succeeded, but should have failed" - else - check_err $err "$what failed" - fi -} - -xfail_on_slow() -{ - if [[ $KSFT_MACHINE_SLOW = yes ]]; then - FAIL_TO_XFAIL=yes "$@" - else - "$@" - fi -} - -xfail_on_veth() -{ - local dev=$1; shift - local kind - - kind=$(ip -j -d link show dev $dev | - jq -r '.[].linkinfo.info_kind') - if [[ $kind = veth ]]; then - FAIL_TO_XFAIL=yes "$@" - else - "$@" - fi -} - -log_test_result() -{ - local test_name=$1; shift - local opt_str=$1; shift - local result=$1; shift - local retmsg=$1; shift - - printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" - if [[ $retmsg ]]; then - printf "\t%s\n" "$retmsg" - fi -} - -pause_on_fail() -{ - if [[ $PAUSE_ON_FAIL == yes ]]; then - echo "Hit enter to continue, 'q' to quit" - read a - [[ $a == q ]] && exit 1 - fi -} - -handle_test_result_pass() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" " OK " -} - -handle_test_result_fail() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" FAIL "$retmsg" - pause_on_fail -} - -handle_test_result_xfail() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" XFAIL "$retmsg" - pause_on_fail -} - -handle_test_result_skip() -{ - local test_name=$1; shift - local opt_str=$1; shift - - log_test_result "$test_name" "$opt_str" SKIP "$retmsg" -} - -log_test() -{ - local test_name=$1 - local opt_str=$2 - - if [[ $# -eq 2 ]]; then - opt_str="($opt_str)" - fi - - if ((RET == ksft_pass)); then - handle_test_result_pass "$test_name" "$opt_str" - elif ((RET == ksft_xfail)); then - handle_test_result_xfail "$test_name" "$opt_str" - elif ((RET == ksft_skip)); then - handle_test_result_skip "$test_name" "$opt_str" - else - handle_test_result_fail "$test_name" "$opt_str" - fi - - EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET) - return $RET -} - -log_test_skip() -{ - RET=$ksft_skip retmsg= log_test "$@" -} - -log_test_xfail() -{ - RET=$ksft_xfail retmsg= log_test "$@" -} - -log_info() -{ - local msg=$1 - - echo "INFO: $msg" -} - not() { "$@" @@ -705,9 +540,9 @@ setup_wait_dev_with_timeout() return 1 } -setup_wait() +setup_wait_n() { - local num_netifs=${1:-$NUM_NETIFS} + local num_netifs=$1; shift local i for ((i = 1; i <= num_netifs; ++i)); do @@ -718,6 +553,11 @@ setup_wait() sleep $WAIT_TIME } +setup_wait() +{ + setup_wait_n "$NUM_NETIFS" +} + wait_for_dev() { local dev=$1; shift @@ -731,30 +571,6 @@ wait_for_dev() fi } -cmd_jq() -{ - local cmd=$1 - local jq_exp=$2 - local jq_opts=$3 - local ret - local output - - output="$($cmd)" - # it the command fails, return error right away - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - output=$(echo $output | jq -r $jq_opts "$jq_exp") - ret=$? - if [[ $ret -ne 0 ]]; then - return $ret - fi - echo $output - # return success only in case of non-empty output - [ ! -z "$output" ] -} - pre_cleanup() { if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then @@ -783,6 +599,12 @@ vrf_cleanup() ip -4 rule del pref 32765 } +adf_vrf_prepare() +{ + vrf_prepare + defer vrf_cleanup +} + __last_tb_id=0 declare -A __TB_IDS @@ -895,6 +717,12 @@ simple_if_fini() vrf_destroy $vrf_name } +adf_simple_if_init() +{ + simple_if_init "$@" + defer simple_if_fini "$@" +} + tunnel_create() { local name=$1; shift @@ -1106,11 +934,37 @@ packets_rate() echo $(((t1 - t0) / interval)) } -mac_get() +ether_addr_to_u64() { - local if_name=$1 + local addr="$1" + local order="$((1 << 40))" + local val=0 + local byte + + addr="${addr//:/ }" + + for byte in $addr; do + byte="0x$byte" + val=$((val + order * byte)) + order=$((order >> 8)) + done - ip -j link show dev $if_name | jq -r '.[]["address"]' + printf "0x%x" $val +} + +u64_to_ether_addr() +{ + local val=$1 + local byte + local i + + for ((i = 40; i >= 0; i -= 8)); do + byte=$(((val & (0xff << i)) >> i)) + printf "%02x" $byte + if [ $i -ne 0 ]; then + printf ":" + fi + done } ipv6_lladdr_get() @@ -1169,6 +1023,12 @@ forwarding_restore() sysctl_restore net.ipv4.conf.all.forwarding } +adf_forwarding_enable() +{ + forwarding_enable + defer forwarding_restore +} + declare -A MTU_ORIG mtu_set() { @@ -1353,13 +1213,10 @@ matchall_sink_create() action drop } -tests_run() +cleanup() { - local current_test - - for current_test in ${TESTS:-$ALL_TESTS}; do - $current_test - done + pre_cleanup + defer_scopes_cleanup } multipath_eval() @@ -1428,8 +1285,8 @@ ping_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING $args $dip -c $PING_COUNT -i 0.1 \ - -w $PING_TIMEOUT &> /dev/null + $PING $args -c $PING_COUNT -i 0.1 \ + -w $PING_TIMEOUT $dip &> /dev/null } ping_test() @@ -1459,8 +1316,8 @@ ping6_do() vrf_name=$(master_name_get $if_name) ip vrf exec $vrf_name \ - $PING6 $args $dip -c $PING_COUNT -i 0.1 \ - -w $PING_TIMEOUT &> /dev/null + $PING6 $args -c $PING_COUNT -i 0.1 \ + -w $PING_TIMEOUT $dip &> /dev/null } ping6_test() @@ -1716,8 +1573,9 @@ start_tcp_traffic() stop_traffic() { - # Suppress noise from killing mausezahn. - { kill %% && wait %%; } 2>/dev/null + local pid=${1-%%}; shift + + kill_process "$pid" } declare -A cappid @@ -1913,6 +1771,51 @@ mc_send() msend -g $groups -I $if_name -c 1 > /dev/null 2>&1 } +adf_mcd_start() +{ + local ifs=("$@") + + local table_name="$MCD_TABLE_NAME" + local smcroutedir + local pid + local if + local i + + check_command "$MCD" || return 1 + check_command "$MC_CLI" || return 1 + + smcroutedir=$(mktemp -d) + defer rm -rf "$smcroutedir" + + for ((i = 1; i <= NUM_NETIFS; ++i)); do + echo "phyint ${NETIFS[p$i]} enable" >> \ + "$smcroutedir/$table_name.conf" + done + + for if in "${ifs[@]}"; do + if ! ip_link_has_flag "$if" MULTICAST; then + ip link set dev "$if" multicast on + defer ip link set dev "$if" multicast off + fi + + echo "phyint $if enable" >> \ + "$smcroutedir/$table_name.conf" + done + + "$MCD" -N -I "$table_name" -f "$smcroutedir/$table_name.conf" \ + -P "$smcroutedir/$table_name.pid" + busywait "$BUSYWAIT_TIMEOUT" test -e "$smcroutedir/$table_name.pid" + pid=$(cat "$smcroutedir/$table_name.pid") + defer kill_process "$pid" +} + +mc_cli() +{ + local table_name="$MCD_TABLE_NAME" + + "$MC_CLI" -I "$table_name" "$@" +} + start_ip_monitor() { local mtype=$1; shift @@ -2229,3 +2132,22 @@ absval() echo $((v > 0 ? v : -v)) } + +has_unicast_flt() +{ + local dev=$1; shift + local mac_addr=$(mac_get $dev) + local tmp=$(ether_addr_to_u64 $mac_addr) + local promisc + + ip link set $dev up + ip link add link $dev name macvlan-tmp type macvlan mode private + ip link set macvlan-tmp address $(u64_to_ether_addr $((tmp + 1))) + ip link set macvlan-tmp up + + promisc=$(ip -j -d link show dev $dev | jq -r '.[].promiscuity') + + ip link del macvlan-tmp + + [[ $promisc == 1 ]] && echo "no" || echo "yes" +} diff --git a/tools/testing/selftests/net/forwarding/lib_sh_test.sh b/tools/testing/selftests/net/forwarding/lib_sh_test.sh index ff2accccaf4d..b4eda6c6199e 100755 --- a/tools/testing/selftests/net/forwarding/lib_sh_test.sh +++ b/tools/testing/selftests/net/forwarding/lib_sh_test.sh @@ -30,6 +30,11 @@ tfail() do_test "tfail" false } +tfail2() +{ + do_test "tfail2" false +} + txfail() { FAIL_TO_XFAIL=yes do_test "txfail" false @@ -132,6 +137,8 @@ test_ret() ret_subtest $ksft_fail "tfail" txfail tfail ret_subtest $ksft_xfail "txfail" txfail txfail + + ret_subtest $ksft_fail "tfail2" tfail2 tfail } exit_status_tests_run() diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index 4b364cdf3ef0..892895659c7e 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -1,11 +1,12 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ALL_TESTS="standalone bridge" +ALL_TESTS="standalone vlan_unaware_bridge vlan_aware_bridge test_vlan \ + vlan_over_vlan_unaware_bridged_port vlan_over_vlan_aware_bridged_port \ + vlan_over_vlan_unaware_bridge vlan_over_vlan_aware_bridge" NUM_NETIFS=2 PING_COUNT=1 REQUIRE_MTOOLS=yes -REQUIRE_MZ=no source lib.sh @@ -37,9 +38,68 @@ UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05" UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06" UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07" -NON_IP_MC="01:02:03:04:05:06" -NON_IP_PKT="00:04 48:45:4c:4f" -BC="ff:ff:ff:ff:ff:ff" +PTP_1588_L2_SYNC=" \ +01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 00 02 \ +00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00" +PTP_1588_L2_FOLLOW_UP=" \ +01:1b:19:00:00:00 00:00:de:ad:be:ef 88:f7 08 02 \ +00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ +00 00 66 83 c5 f1 17 97 ed f0" +PTP_1588_L2_PDELAY_REQ=" \ +01:80:c2:00:00:0e 00:00:de:ad:be:ef 88:f7 02 02 \ +00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 06 05 7f \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 00 00" +PTP_1588_IPV4_SYNC=" \ +01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ +00 48 0a 9a 40 00 01 11 cb 88 c0 00 02 01 e0 00 \ +01 81 01 3f 01 3f 00 34 a3 c8 00 02 00 2c 00 00 \ +02 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 00" +PTP_1588_IPV4_FOLLOW_UP=" +01:00:5e:00:01:81 00:00:de:ad:be:ef 08:00 45 00 \ +00 48 0a 9b 40 00 01 11 cb 87 c0 00 02 01 e0 00 \ +01 81 01 40 01 40 00 34 a3 c8 08 02 00 2c 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 00 02 00 00 00 66 83 \ +c6 0f 1d 9a 61 87" +PTP_1588_IPV4_PDELAY_REQ=" \ +01:00:5e:00:00:6b 00:00:de:ad:be:ef 08:00 45 00 \ +00 52 35 a9 40 00 01 11 a1 85 c0 00 02 01 e0 00 \ +00 6b 01 3f 01 3f 00 3e a2 bc 02 02 00 36 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 3e 37 \ +63 ff fe cf 17 0e 00 01 00 01 05 7f 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00" +PTP_1588_IPV6_SYNC=" \ +33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 06 \ +7c 2f 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ +00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ +00 00 00 00 01 81 01 3f 01 3f 00 36 2e 92 00 02 \ +00 2c 00 00 02 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 00 00 \ +00 00 00 00 00 00 00 00 00 00 00 00" +PTP_1588_IPV6_FOLLOW_UP=" \ +33:33:00:00:01:81 00:00:de:ad:be:ef 86:dd 60 0a \ +00 bc 00 36 11 01 20 01 0d b8 00 01 00 00 00 00 \ +00 00 00 00 00 01 ff 0e 00 00 00 00 00 00 00 00 \ +00 00 00 00 01 81 01 40 01 40 00 36 2e 92 08 02 \ +00 2c 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 00 02 00 \ +00 00 66 83 c6 2a 32 09 bd 74 00 00" +PTP_1588_IPV6_PDELAY_REQ=" \ +33:33:00:00:00:6b 00:00:de:ad:be:ef 86:dd 60 0c \ +5c fd 00 40 11 01 fe 80 00 00 00 00 00 00 3c 37 \ +63 ff fe cf 17 0e ff 02 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 6b 01 3f 01 3f 00 40 b4 54 02 02 \ +00 36 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 3e 37 63 ff fe cf 17 0e 00 01 00 01 05 7f \ +00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 \ +00 00 00 00 00 00" # Disable promisc to ensure we don't receive unknown MAC DA packets export TCPDUMP_EXTRA_FLAGS="-pl" @@ -47,13 +107,15 @@ export TCPDUMP_EXTRA_FLAGS="-pl" h1=${NETIFS[p1]} h2=${NETIFS[p2]} -send_non_ip() +send_raw() { - local if_name=$1 - local smac=$2 - local dmac=$3 + local if_name=$1; shift + local pkt="$1"; shift + local smac=$(mac_get $if_name) + + pkt="${pkt/00:00:de:ad:be:ef/$smac}" - $MZ -q $if_name "$dmac $smac $NON_IP_PKT" + $MZ -q $if_name "$pkt" } send_uc_ipv4() @@ -68,10 +130,11 @@ send_uc_ipv4() check_rcv() { - local if_name=$1 - local type=$2 - local pattern=$3 - local should_receive=$4 + local if_name=$1; shift + local type=$1; shift + local pattern=$1; shift + local should_receive=$1; shift + local test_name="$1"; shift local should_fail= [ $should_receive = true ] && should_fail=0 || should_fail=1 @@ -81,7 +144,7 @@ check_rcv() check_err_fail "$should_fail" "$?" "reception" - log_test "$if_name: $type" + log_test "$test_name: $type" } mc_route_prepare() @@ -104,44 +167,80 @@ mc_route_destroy() run_test() { - local rcv_if_name=$1 - local smac=$(mac_get $h1) + local send_if_name=$1; shift + local rcv_if_name=$1; shift + local skip_ptp=$1; shift + local no_unicast_flt=$1; shift + local test_name="$1"; shift + local smac=$(mac_get $send_if_name) local rcv_dmac=$(mac_get $rcv_if_name) + local should_receive + + setup_wait tcpdump_start $rcv_if_name - mc_route_prepare $h1 + mc_route_prepare $send_if_name mc_route_prepare $rcv_if_name - send_uc_ipv4 $h1 $rcv_dmac - send_uc_ipv4 $h1 $MACVLAN_ADDR - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1 + send_uc_ipv4 $send_if_name $rcv_dmac + send_uc_ipv4 $send_if_name $MACVLAN_ADDR + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR1 ip link set dev $rcv_if_name promisc on - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2 - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2 + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR2 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR2 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR2 ip link set dev $rcv_if_name promisc off mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR - mc_send $h1 $JOINED_IPV4_MC_ADDR + mc_send $send_if_name $JOINED_IPV4_MC_ADDR mc_leave mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR - mc_send $h1 $JOINED_IPV6_MC_ADDR + mc_send $send_if_name $JOINED_IPV6_MC_ADDR mc_leave - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR1 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR1 ip link set dev $rcv_if_name allmulticast on - send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3 - mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3 - mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3 + send_uc_ipv4 $send_if_name $UNKNOWN_UC_ADDR3 + mc_send $send_if_name $UNKNOWN_IPV4_MC_ADDR3 + mc_send $send_if_name $UNKNOWN_IPV6_MC_ADDR3 ip link set dev $rcv_if_name allmulticast off mc_route_destroy $rcv_if_name - mc_route_destroy $h1 + mc_route_destroy $send_if_name + + if [ $skip_ptp = false ]; then + ip maddress add 01:1b:19:00:00:00 dev $rcv_if_name + send_raw $send_if_name "$PTP_1588_L2_SYNC" + send_raw $send_if_name "$PTP_1588_L2_FOLLOW_UP" + ip maddress del 01:1b:19:00:00:00 dev $rcv_if_name + + ip maddress add 01:80:c2:00:00:0e dev $rcv_if_name + send_raw $send_if_name "$PTP_1588_L2_PDELAY_REQ" + ip maddress del 01:80:c2:00:00:0e dev $rcv_if_name + + mc_join $rcv_if_name 224.0.1.129 + send_raw $send_if_name "$PTP_1588_IPV4_SYNC" + send_raw $send_if_name "$PTP_1588_IPV4_FOLLOW_UP" + mc_leave + + mc_join $rcv_if_name 224.0.0.107 + send_raw $send_if_name "$PTP_1588_IPV4_PDELAY_REQ" + mc_leave + + mc_join $rcv_if_name ff0e::181 + send_raw $send_if_name "$PTP_1588_IPV6_SYNC" + send_raw $send_if_name "$PTP_1588_IPV6_FOLLOW_UP" + mc_leave + + mc_join $rcv_if_name ff02::6b + send_raw $send_if_name "$PTP_1588_IPV6_PDELAY_REQ" + mc_leave + fi sleep 1 @@ -149,61 +248,99 @@ run_test() check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \ "$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \ "$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - xfail_on_veth $h1 \ - check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ - "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ - false + [ $no_unicast_flt = true ] && should_receive=true || should_receive=false + check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \ + "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \ + $should_receive "$test_name" check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \ "$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - xfail_on_veth $h1 \ - check_rcv $rcv_if_name \ - "Unicast IPv4 to unknown MAC address, allmulti" \ - "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ - false + [ $no_unicast_flt = true ] && should_receive=true || should_receive=false + check_rcv $rcv_if_name \ + "Unicast IPv4 to unknown MAC address, allmulti" \ + "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \ + $should_receive "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to joined group" \ "$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \ - true + true "$test_name" - xfail_on_veth $h1 \ + xfail \ check_rcv $rcv_if_name \ "Multicast IPv4 to unknown group" \ "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \ - false + false "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \ "$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to joined group" \ "$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" - xfail_on_veth $h1 \ + xfail \ check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \ "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \ - false + false "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \ "$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \ "$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \ - true + true "$test_name" + + if [ $skip_ptp = false ]; then + check_rcv $rcv_if_name "1588v2 over L2 transport, Sync" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over L2 transport, Follow-Up" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over L2 transport, Peer Delay Request" \ + "ethertype PTP (0x88f7).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Sync" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Follow-Up" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv4, Peer Delay Request" \ + "ethertype IPv4 (0x0800).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Sync" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : sync msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Follow-Up" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : follow up msg" \ + true "$test_name" + + check_rcv $rcv_if_name "1588v2 over IPv6, Peer Delay Request" \ + "ethertype IPv6 (0x86dd).* PTPv2.* msg type : peer delay req msg" \ + true "$test_name" + fi tcpdump_cleanup $rcv_if_name } @@ -228,62 +365,217 @@ h2_destroy() simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 } +h1_vlan_create() +{ + simple_if_init $h1 + vlan_create $h1 100 v$h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_vlan_destroy() +{ + vlan_destroy $h1 100 + simple_if_fini $h1 +} + +h2_vlan_create() +{ + simple_if_init $h2 + vlan_create $h2 100 v$h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_vlan_destroy() +{ + vlan_destroy $h2 100 + simple_if_fini $h2 +} + bridge_create() { - ip link add br0 type bridge + local vlan_filtering=$1 + + ip link add br0 type bridge vlan_filtering $vlan_filtering ip link set br0 address $BRIDGE_ADDR ip link set br0 up ip link set $h2 master br0 ip link set $h2 up - - simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 } bridge_destroy() { - simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 - ip link del br0 } -standalone() +macvlan_create() { - h1_create - h2_create + local lower=$1 - ip link add link $h2 name macvlan0 type macvlan mode private + ip link add link $lower name macvlan0 type macvlan mode private ip link set macvlan0 address $MACVLAN_ADDR ip link set macvlan0 up +} - run_test $h2 - +macvlan_destroy() +{ ip link del macvlan0 +} + +standalone() +{ + local no_unicast_flt=true + local skip_ptp=false + if [ $(has_unicast_flt $h2) = yes ]; then + no_unicast_flt=false + fi + + h1_create + h2_create + macvlan_create $h2 + + run_test $h1 $h2 $skip_ptp $no_unicast_flt "$h2" + + macvlan_destroy h2_destroy h1_destroy } -bridge() +test_bridge() { + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=true + h1_create - bridge_create + bridge_create $vlan_filtering + simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64 + macvlan_create br0 - ip link add link br0 name macvlan0 type macvlan mode private - ip link set macvlan0 address $MACVLAN_ADDR - ip link set macvlan0 up + run_test $h1 br0 $skip_ptp $no_unicast_flt \ + "vlan_filtering=$vlan_filtering bridge" - run_test br0 + macvlan_destroy + simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64 + bridge_destroy + h1_destroy +} - ip link del macvlan0 +vlan_unaware_bridge() +{ + test_bridge 0 +} + +vlan_aware_bridge() +{ + test_bridge 1 +} + +test_vlan() +{ + local no_unicast_flt=true + local skip_ptp=false + + if [ $(has_unicast_flt $h2) = yes ]; then + no_unicast_flt=false + fi + + h1_vlan_create + h2_vlan_create + macvlan_create $h2.100 + run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt "VLAN upper" + + macvlan_destroy + h2_vlan_destroy + h1_vlan_destroy +} + +vlan_over_bridged_port() +{ + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=false + + # br_manage_promisc() will not force a single vlan_filtering port to + # promiscuous mode, so we should still expect unicast filtering to take + # place if the device can do it. + if [ $(has_unicast_flt $h2) = yes ] && [ $vlan_filtering = 1 ]; then + no_unicast_flt=false + fi + + h1_vlan_create + h2_vlan_create + bridge_create $vlan_filtering + macvlan_create $h2.100 + + run_test $h1.100 $h2.100 $skip_ptp $no_unicast_flt \ + "VLAN over vlan_filtering=$vlan_filtering bridged port" + + macvlan_destroy bridge_destroy - h1_destroy + h2_vlan_destroy + h1_vlan_destroy +} + +vlan_over_vlan_unaware_bridged_port() +{ + vlan_over_bridged_port 0 +} + +vlan_over_vlan_aware_bridged_port() +{ + vlan_over_bridged_port 1 +} + +vlan_over_bridge() +{ + local no_unicast_flt=true + local vlan_filtering=$1 + local skip_ptp=true + + h1_vlan_create + bridge_create $vlan_filtering + simple_if_init br0 + vlan_create br0 100 vbr0 $H2_IPV4/24 $H2_IPV6/64 + macvlan_create br0.100 + + if [ $vlan_filtering = 1 ]; then + bridge vlan add dev $h2 vid 100 master + bridge vlan add dev br0 vid 100 self + fi + + run_test $h1.100 br0.100 $skip_ptp $no_unicast_flt \ + "VLAN over vlan_filtering=$vlan_filtering bridge" + + if [ $vlan_filtering = 1 ]; then + bridge vlan del dev br0 vid 100 self + bridge vlan del dev $h2 vid 100 master + fi + + macvlan_destroy + vlan_destroy br0 100 + simple_if_fini br0 + bridge_destroy + h1_vlan_destroy +} + +vlan_over_vlan_unaware_bridge() +{ + vlan_over_bridge 0 +} + +vlan_over_vlan_aware_bridge() +{ + vlan_over_bridge 1 } cleanup() { pre_cleanup + + ip link set $h2 down + ip link set $h1 down + vrf_cleanup } diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index fe4d7c906a70..8d4ae6c952a1 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -49,6 +49,7 @@ ALL_TESTS=" test_mirror_gretap_second " +REQUIRE_TEAMD="yes" NUM_NETIFS=6 source lib.sh source mirror_lib.sh @@ -237,7 +238,7 @@ test_lag_slave() ip neigh flush dev br1 setup_wait_dev $up_dev setup_wait_dev $host_dev - $ARPING -I br1 192.0.2.130 -qfc 1 + $ARPING -I br1 -qfc 1 192.0.2.130 sleep 2 mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 ">= 10" diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh index 1261e6f46e34..ff7049582d35 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh @@ -53,6 +53,7 @@ ALL_TESTS=" test_mirror_gretap_second " +REQUIRE_TEAMD="yes" NUM_NETIFS=6 source lib.sh source mirror_lib.sh diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh index 1b902cc579f6..a21c771908b3 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh @@ -196,7 +196,7 @@ test_span_gre_forbidden_egress() bridge vlan add dev $swp3 vid 555 # Re-prime FDB - $ARPING -I br1.555 192.0.2.130 -fqc 1 + $ARPING -I br1.555 -fqc 1 192.0.2.130 sleep 1 quick_test_span_gre_dir $tundev @@ -290,7 +290,7 @@ test_span_gre_fdb_roaming() bridge fdb del dev $swp2 $h3mac vlan 555 master 2>/dev/null # Re-prime FDB - $ARPING -I br1.555 192.0.2.130 -fqc 1 + $ARPING -I br1.555 -fqc 1 192.0.2.130 sleep 1 quick_test_span_gre_dir $tundev diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh index af3b398d13f0..694ece9ba3a7 100755 --- a/tools/testing/selftests/net/forwarding/no_forwarding.sh +++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh @@ -202,7 +202,7 @@ one_bridge_two_pvids() ip link set $swp2 master br0 bridge vlan add dev $swp1 vid 1 pvid untagged - bridge vlan add dev $swp1 vid 2 pvid untagged + bridge vlan add dev $swp2 vid 2 pvid untagged run_test "Switch ports in VLAN-aware bridge with different PVIDs" @@ -233,6 +233,9 @@ cleanup() { pre_cleanup + ip link set dev $swp2 down + ip link set dev $swp1 down + h2_destroy h1_destroy diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh index b98ea9449b8b..dfb6646cb97b 100755 --- a/tools/testing/selftests/net/forwarding/router.sh +++ b/tools/testing/selftests/net/forwarding/router.sh @@ -18,6 +18,8 @@ # | 2001:db8:1::1/64 2001:db8:2::1/64 | # | | # +-----------------------------------------------------------------+ +# +#shellcheck disable=SC2034 # SC doesn't see our uses of global variables ALL_TESTS=" ping_ipv4 @@ -27,6 +29,7 @@ ALL_TESTS=" ipv4_sip_equal_dip ipv6_sip_equal_dip ipv4_dip_link_local + ipv4_sip_link_local " NUM_NETIFS=4 @@ -330,6 +333,32 @@ ipv4_dip_link_local() tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower } +ipv4_sip_link_local() +{ + local sip=169.254.1.1 + + RET=0 + + # Disable rpfilter to prevent packets to be dropped because of it. + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf."$rp1".rp_filter 0 + + tc filter add dev "$rp2" egress protocol ip pref 1 handle 101 \ + flower src_ip "$sip" action pass + + $MZ "$h1" -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b "$rp1mac" \ + -A "$sip" -B 198.51.100.2 -q + + tc_check_packets "dev $rp2 egress" 101 5 + check_err $? "Packets were dropped" + + log_test "IPv4 source IP is link-local" + + tc filter del dev "$rp2" egress protocol ip pref 1 handle 101 flower + sysctl_restore net.ipv4.conf."$rp1".rp_filter + sysctl_restore net.ipv4.conf.all.rp_filter +} + trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh index e064b946e821..16583a470ec3 100755 --- a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh +++ b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh @@ -109,6 +109,7 @@ ALL_TESTS=" ping_ipv4 ping_ipv6 " +REQUIRE_TEAMD="yes" NUM_NETIFS=8 source lib.sh diff --git a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh index f05ffe213c46..2a4cd1af1b85 100755 --- a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh +++ b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh @@ -76,6 +76,7 @@ ping_ipv4 ping_ipv6 "} +REQUIRE_TEAMD="yes" NUM_NETIFS=8 : ${lib_dir:=.} source $lib_dir/lib.sh diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index 2ba44247c60a..a7d8399c8d4f 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -40,6 +40,7 @@ ALL_TESTS=" ping_ipv4 ping_ipv6 multipath_test + multipath16_test ping_ipv4_blackhole ping_ipv6_blackhole nh_stats_test_v4 @@ -226,9 +227,11 @@ routing_nh_obj() multipath4_test() { - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 + local desc=$1; shift + local weight_rp12=$1; shift + local weight_rp13=$1; shift + local ports=${1-sp=1024,dp=0-32768}; shift + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 @@ -242,7 +245,8 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "$ports" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -258,9 +262,11 @@ multipath4_test() multipath6_test() { - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 + local desc=$1; shift + local weight_rp12=$1; shift + local weight_rp13=$1; shift + local ports=${1-sp=1024,dp=0-32768}; shift + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 @@ -275,7 +281,8 @@ multipath6_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "$ports" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -313,6 +320,23 @@ multipath_test() multipath6_test "Weighted MP 11:45" 11 45 } +multipath16_test() +{ + check_nhgw16 104 || return + + log_info "Running 16-bit IPv4 multipath tests" + multipath4_test "65535:65535" 65535 65535 + multipath4_test "128:512" 128 512 + omit_on_slow \ + multipath4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535 + + log_info "Running 16-bit IPv6 multipath tests" + multipath6_test "65535:65535" 65535 65535 + multipath6_test "128:512" 128 512 + omit_on_slow \ + multipath6_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535 +} + ping_ipv4_blackhole() { RET=0 diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh index 2903294d8bca..507b2852dabe 100644 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh @@ -117,3 +117,16 @@ __nh_stats_test_v6() $MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2 sysctl_restore net.ipv6.fib_multipath_hash_policy } + +check_nhgw16() +{ + local nhid=$1; shift + + ip nexthop replace id 9999 group "$nhid,65535" &>/dev/null + if (( $? )); then + log_test_skip "16-bit multipath tests" \ + "iproute2 or the kernel do not support 16-bit next hop weights" + return 1 + fi + ip nexthop del id 9999 ||: +} diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh index cd9e346436fc..88ddae05b39d 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh @@ -40,6 +40,7 @@ ALL_TESTS=" ping_ipv4 ping_ipv6 multipath_test + multipath16_test nh_stats_test_v4 nh_stats_test_v6 " @@ -228,9 +229,11 @@ routing_nh_obj() multipath4_test() { - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 + local desc=$1; shift + local weight_rp12=$1; shift + local weight_rp13=$1; shift + local ports=${1-sp=1024,dp=0-32768}; shift + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 @@ -243,7 +246,8 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "$ports" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -258,9 +262,11 @@ multipath4_test() multipath6_l4_test() { - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 + local desc=$1; shift + local weight_rp12=$1; shift + local weight_rp13=$1; shift + local ports=${1-sp=1024,dp=0-32768}; shift + local t0_rp12 t0_rp13 t1_rp12 t1_rp13 local packets_rp12 packets_rp13 @@ -273,7 +279,8 @@ multipath6_l4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "$ports" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -371,6 +378,41 @@ multipath_test() ip nexthop replace id 106 group 104,1/105,1 type resilient } +multipath16_test() +{ + check_nhgw16 104 || return + + log_info "Running 16-bit IPv4 multipath tests" + ip nexthop replace id 103 group 101/102 type resilient idle_timer 0 + + ip nexthop replace id 103 group 101,65535/102,65535 type resilient + multipath4_test "65535:65535" 65535 65535 + + ip nexthop replace id 103 group 101,128/102,512 type resilient + multipath4_test "128:512" 128 512 + + ip nexthop replace id 103 group 101,255/102,65535 type resilient + omit_on_slow \ + multipath4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535 + + ip nexthop replace id 103 group 101,1/102,1 type resilient + + log_info "Running 16-bit IPv6 L4 hash multipath tests" + ip nexthop replace id 106 group 104/105 type resilient idle_timer 0 + + ip nexthop replace id 106 group 104,65535/105,65535 type resilient + multipath6_l4_test "65535:65535" 65535 65535 + + ip nexthop replace id 106 group 104,128/105,512 type resilient + multipath6_l4_test "128:512" 128 512 + + ip nexthop replace id 106 group 104,255/105,65535 type resilient + omit_on_slow \ + multipath6_l4_test "255:65535" 255 65535 sp=1024-1026,dp=0-65535 + + ip nexthop replace id 106 group 104,1/105,1 type resilient +} + nh_stats_test_v4() { __nh_stats_test_v4 resilient diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh index 5a58b1ec8aef..83e52abdbc2e 100755 --- a/tools/testing/selftests/net/forwarding/router_multicast.sh +++ b/tools/testing/selftests/net/forwarding/router_multicast.sh @@ -33,10 +33,6 @@ NUM_NETIFS=6 source lib.sh source tc_common.sh -require_command $MCD -require_command $MC_CLI -table_name=selftests - h1_create() { simple_if_init $h1 198.51.100.2/28 2001:db8:1::2/64 @@ -149,25 +145,6 @@ router_destroy() ip link set dev $rp1 down } -start_mcd() -{ - SMCROUTEDIR="$(mktemp -d)" - - for ((i = 1; i <= $NUM_NETIFS; ++i)); do - echo "phyint ${NETIFS[p$i]} enable" >> \ - $SMCROUTEDIR/$table_name.conf - done - - $MCD -N -I $table_name -f $SMCROUTEDIR/$table_name.conf \ - -P $SMCROUTEDIR/$table_name.pid -} - -kill_mcd() -{ - pkill $MCD - rm -rf $SMCROUTEDIR -} - setup_prepare() { h1=${NETIFS[p1]} @@ -179,7 +156,7 @@ setup_prepare() rp3=${NETIFS[p5]} h3=${NETIFS[p6]} - start_mcd + adf_mcd_start || exit "$EXIT_STATUS" vrf_prepare @@ -206,7 +183,7 @@ cleanup() vrf_cleanup - kill_mcd + defer_scopes_cleanup } create_mcast_sg() @@ -214,9 +191,9 @@ create_mcast_sg() local if_name=$1; shift local s_addr=$1; shift local mcast=$1; shift - local dest_ifs=${@} + local dest_ifs=("${@}") - $MC_CLI -I $table_name add $if_name $s_addr $mcast $dest_ifs + mc_cli add "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}" } delete_mcast_sg() @@ -224,9 +201,9 @@ delete_mcast_sg() local if_name=$1; shift local s_addr=$1; shift local mcast=$1; shift - local dest_ifs=${@} + local dest_ifs=("${@}") - $MC_CLI -I $table_name remove $if_name $s_addr $mcast $dest_ifs + mc_cli remove "$if_name" "$s_addr" "$mcast" "${dest_ifs[@]}" } mcast_v4() diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index e2be354167a1..46f365b557b7 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -180,6 +180,7 @@ multipath4_test() ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -217,6 +218,7 @@ multipath6_test() $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" + sleep 1 t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh index e60c8b4818cc..6269d5e23487 100755 --- a/tools/testing/selftests/net/forwarding/sch_ets.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets.sh @@ -11,6 +11,7 @@ ALL_TESTS=" ets_test_strict ets_test_mixed ets_test_dwrr + ets_test_plug classifier_mode ets_test_strict ets_test_mixed @@ -24,15 +25,10 @@ switch_create() # Create a bottleneck so that the DWRR process can kick in. tc qdisc add dev $swp2 root handle 1: tbf \ rate 1Gbit burst 1Mbit latency 100ms + defer tc qdisc del dev $swp2 root PARENT="parent 1:" } -switch_destroy() -{ - ets_switch_destroy - tc qdisc del dev $swp2 root -} - # Callback from sch_ets_tests.sh collect_stats() { diff --git a/tools/testing/selftests/net/forwarding/sch_ets_core.sh b/tools/testing/selftests/net/forwarding/sch_ets_core.sh index f906fcc66572..0453210271dc 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_core.sh @@ -165,45 +165,31 @@ h1_create() { local i; - simple_if_init $h1 + adf_simple_if_init $h1 + mtu_set $h1 9900 + defer mtu_restore $h1 + for i in {0..2}; do vlan_create $h1 1$i v$h1 $(sip $i)/28 + defer vlan_destroy $h1 1$i ip link set dev $h1.1$i type vlan egress 0:$i done } -h1_destroy() -{ - local i - - for i in {0..2}; do - vlan_destroy $h1 1$i - done - mtu_restore $h1 - simple_if_fini $h1 -} - h2_create() { local i - simple_if_init $h2 - mtu_set $h2 9900 - for i in {0..2}; do - vlan_create $h2 1$i v$h2 $(dip $i)/28 - done -} + adf_simple_if_init $h2 -h2_destroy() -{ - local i + mtu_set $h2 9900 + defer mtu_restore $h2 for i in {0..2}; do - vlan_destroy $h2 1$i + vlan_create $h2 1$i v$h2 $(dip $i)/28 + defer vlan_destroy $h2 1$i done - mtu_restore $h2 - simple_if_fini $h2 } ets_switch_create() @@ -211,44 +197,45 @@ ets_switch_create() local i ip link set dev $swp1 up + defer ip link set dev $swp1 down + mtu_set $swp1 9900 + defer mtu_restore $swp1 ip link set dev $swp2 up + defer ip link set dev $swp2 down + mtu_set $swp2 9900 + defer mtu_restore $swp2 for i in {0..2}; do vlan_create $swp1 1$i + defer vlan_destroy $swp1 1$i ip link set dev $swp1.1$i type vlan ingress 0:0 1:1 2:2 vlan_create $swp2 1$i + defer vlan_destroy $swp2 1$i ip link add dev br1$i type bridge + defer ip link del dev br1$i + ip link set dev $swp1.1$i master br1$i + defer ip link set dev $swp1.1$i nomaster + ip link set dev $swp2.1$i master br1$i + defer ip link set dev $swp2.1$i nomaster ip link set dev br1$i up - ip link set dev $swp1.1$i up - ip link set dev $swp2.1$i up - done -} + defer ip link set dev br1$i down -ets_switch_destroy() -{ - local i - - ets_delete_qdisc + ip link set dev $swp1.1$i up + defer ip link set dev $swp1.1$i down - for i in {0..2}; do - ip link del dev br1$i - vlan_destroy $swp2 1$i - vlan_destroy $swp1 1$i + ip link set dev $swp2.1$i up + defer ip link set dev $swp2.1$i down done - mtu_restore $swp2 - ip link set dev $swp2 down - - mtu_restore $swp1 - ip link set dev $swp1 down + defer ets_delete_qdisc } setup_prepare() @@ -262,24 +249,13 @@ setup_prepare() put=$swp2 hut=$h2 - vrf_prepare + adf_vrf_prepare h1_create h2_create switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1.10 $(dip 0) " vlan 10" diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh index f9d26a7911bb..79d837a2868a 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -90,6 +90,7 @@ __ets_dwrr_test() for stream in ${streams[@]}; do ets_start_traffic $stream + defer stop_traffic $! done sleep 10 @@ -120,25 +121,24 @@ __ets_dwrr_test() ${d[0]} ${d[$i]} fi done - - for stream in ${streams[@]}; do - stop_traffic - done } ets_dwrr_test_012() { - __ets_dwrr_test 0 1 2 + in_defer_scope \ + __ets_dwrr_test 0 1 2 } ets_dwrr_test_01() { - __ets_dwrr_test 0 1 + in_defer_scope \ + __ets_dwrr_test 0 1 } ets_dwrr_test_12() { - __ets_dwrr_test 1 2 + in_defer_scope \ + __ets_dwrr_test 1 2 } ets_qdisc_setup() @@ -224,3 +224,11 @@ ets_test_dwrr() ets_set_dwrr_two_bands xfail_on_slow ets_dwrr_test_01 } + +ets_test_plug() +{ + ets_change_qdisc $put 2 "3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3" "1514 1514" + tc qdisc add dev $put handle 20: parent 10:4 plug + start_traffic_pktsize 100 $h1.10 192.0.2.1 192.0.2.2 00:c1:a0:c1:a0:00 "-c 1" + ets_qdisc_setup $put 2 +} diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh index 17f28644568e..f2a3d9254642 100755 --- a/tools/testing/selftests/net/forwarding/sch_red.sh +++ b/tools/testing/selftests/net/forwarding/sch_red.sh @@ -52,72 +52,61 @@ PKTSZ=1400 h1_create() { - simple_if_init $h1 192.0.2.1/28 + adf_simple_if_init $h1 192.0.2.1/28 + mtu_set $h1 10000 + defer mtu_restore $h1 + tc qdisc replace dev $h1 root handle 1: tbf \ rate 10Mbit burst 10K limit 1M -} - -h1_destroy() -{ - tc qdisc del dev $h1 root - mtu_restore $h1 - simple_if_fini $h1 192.0.2.1/28 + defer tc qdisc del dev $h1 root } h2_create() { - simple_if_init $h2 192.0.2.2/28 - mtu_set $h2 10000 -} + adf_simple_if_init $h2 192.0.2.2/28 -h2_destroy() -{ - mtu_restore $h2 - simple_if_fini $h2 192.0.2.2/28 + mtu_set $h2 10000 + defer mtu_restore $h2 } h3_create() { - simple_if_init $h3 192.0.2.3/28 - mtu_set $h3 10000 -} + adf_simple_if_init $h3 192.0.2.3/28 -h3_destroy() -{ - mtu_restore $h3 - simple_if_fini $h3 192.0.2.3/28 + mtu_set $h3 10000 + defer mtu_restore $h3 } switch_create() { ip link add dev br up type bridge + defer ip link del dev br + ip link set dev $swp1 up master br + defer ip link set dev $swp1 down nomaster + ip link set dev $swp2 up master br + defer ip link set dev $swp2 down nomaster + ip link set dev $swp3 up master br + defer ip link set dev $swp3 down nomaster mtu_set $swp1 10000 + defer mtu_restore $h1 + mtu_set $swp2 10000 + defer mtu_restore $h2 + mtu_set $swp3 10000 + defer mtu_restore $h3 tc qdisc replace dev $swp3 root handle 1: tbf \ rate 10Mbit burst 10K limit 1M - ip link add name _drop_test up type dummy -} + defer tc qdisc del dev $swp3 root -switch_destroy() -{ - ip link del dev _drop_test - tc qdisc del dev $swp3 root - - mtu_restore $h3 - mtu_restore $h2 - mtu_restore $h1 - - ip link set dev $swp3 down nomaster - ip link set dev $swp2 down nomaster - ip link set dev $swp1 down nomaster - ip link del dev br + ip link add name _drop_test up type dummy + defer ip link del dev _drop_test } setup_prepare() @@ -133,7 +122,7 @@ setup_prepare() h3_mac=$(mac_get $h3) - vrf_prepare + adf_vrf_prepare h1_create h2_create @@ -141,18 +130,6 @@ setup_prepare() switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h3_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1 192.0.2.3 " from host 1" @@ -287,6 +264,7 @@ do_ecn_test() $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t tcp -q tos=0x01 & + defer stop_traffic $! sleep 1 ecn_test_common "$name" $limit @@ -298,9 +276,6 @@ do_ecn_test() build_backlog $((2 * limit)) udp >/dev/null check_fail $? "UDP traffic went into backlog instead of being early-dropped" log_test "$name backlog > limit: UDP early-dropped" - - stop_traffic - sleep 1 } do_ecn_nodrop_test() @@ -310,6 +285,7 @@ do_ecn_nodrop_test() $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t tcp -q tos=0x01 & + defer stop_traffic $! sleep 1 ecn_test_common "$name" $limit @@ -321,9 +297,6 @@ do_ecn_nodrop_test() build_backlog $((2 * limit)) udp >/dev/null check_err $? "UDP traffic was early-dropped instead of getting into backlog" log_test "$name backlog > limit: UDP not dropped" - - stop_traffic - sleep 1 } do_red_test() @@ -336,6 +309,7 @@ do_red_test() # is above limit. $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t tcp -q tos=0x01 & + defer stop_traffic $! # Pushing below the queue limit should work. RET=0 @@ -352,9 +326,6 @@ do_red_test() pct=$(check_marking "== 0") check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." log_test "RED backlog > limit" - - stop_traffic - sleep 1 } do_red_qevent_test() @@ -369,6 +340,7 @@ do_red_qevent_test() $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t udp -q & + defer stop_traffic $! sleep 1 tc filter add block 10 pref 1234 handle 102 matchall skip_hw \ @@ -396,9 +368,6 @@ do_red_qevent_test() check_err $? "Dropped packets still observed: 0 expected, $((now - base)) seen" log_test "RED early_dropped packets mirrored" - - stop_traffic - sleep 1 } do_ecn_qevent_test() @@ -410,6 +379,7 @@ do_ecn_qevent_test() $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ -a own -b $h3_mac -t tcp -q tos=0x01 & + defer stop_traffic $! sleep 1 tc filter add block 10 pref 1234 handle 102 matchall skip_hw \ @@ -428,9 +398,6 @@ do_ecn_qevent_test() tc filter del block 10 pref 1234 handle 102 matchall log_test "ECN marked packets mirrored" - - stop_traffic - sleep 1 } install_qdisc() @@ -451,36 +418,36 @@ uninstall_qdisc() ecn_test() { install_qdisc ecn + defer uninstall_qdisc xfail_on_slow do_ecn_test $BACKLOG - uninstall_qdisc } ecn_nodrop_test() { install_qdisc ecn nodrop + defer uninstall_qdisc xfail_on_slow do_ecn_nodrop_test $BACKLOG - uninstall_qdisc } red_test() { install_qdisc + defer uninstall_qdisc xfail_on_slow do_red_test $BACKLOG - uninstall_qdisc } red_qevent_test() { install_qdisc qevent early_drop block 10 + defer uninstall_qdisc xfail_on_slow do_red_qevent_test $BACKLOG - uninstall_qdisc } ecn_qevent_test() { install_qdisc ecn qevent mark block 10 + defer uninstall_qdisc xfail_on_slow do_ecn_qevent_test $BACKLOG - uninstall_qdisc } trap cleanup EXIT diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh index 9cd884d4a5de..070c17faa9e4 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh @@ -59,69 +59,65 @@ host_create() local dev=$1; shift local host=$1; shift - simple_if_init $dev + adf_simple_if_init $dev + mtu_set $dev 10000 + defer mtu_restore $dev vlan_create $dev 10 v$dev $(ipaddr $host 10)/28 + defer vlan_destroy $dev 10 ip link set dev $dev.10 type vlan egress 0:0 vlan_create $dev 11 v$dev $(ipaddr $host 11)/28 + defer vlan_destroy $dev 11 ip link set dev $dev.11 type vlan egress 0:1 } -host_destroy() -{ - local dev=$1; shift - - vlan_destroy $dev 11 - vlan_destroy $dev 10 - mtu_restore $dev - simple_if_fini $dev -} - h1_create() { host_create $h1 1 } -h1_destroy() -{ - host_destroy $h1 -} - h2_create() { host_create $h2 2 tc qdisc add dev $h2 clsact + defer tc qdisc del dev $h2 clsact + tc filter add dev $h2 ingress pref 1010 prot 802.1q \ flower $TCFLAGS vlan_id 10 action pass tc filter add dev $h2 ingress pref 1011 prot 802.1q \ flower $TCFLAGS vlan_id 11 action pass } -h2_destroy() -{ - tc qdisc del dev $h2 clsact - host_destroy $h2 -} - switch_create() { local intf local vlan ip link add dev br10 type bridge + defer ip link del dev br10 + ip link add dev br11 type bridge + defer ip link del dev br11 for intf in $swp1 $swp2; do ip link set dev $intf up + defer ip link set dev $intf down + mtu_set $intf 10000 + defer mtu_restore $intf for vlan in 10 11; do vlan_create $intf $vlan + defer vlan_destroy $intf $vlan + ip link set dev $intf.$vlan master br$vlan + defer ip link set dev $intf.$vlan nomaster + ip link set dev $intf.$vlan up + defer ip link set dev $intf.$vlan down done done @@ -130,34 +126,10 @@ switch_create() done ip link set dev br10 up - ip link set dev br11 up -} - -switch_destroy() -{ - local intf - local vlan - - # A test may have been interrupted mid-run, with Qdisc installed. Delete - # it here. - tc qdisc del dev $swp2 root 2>/dev/null - - ip link set dev br11 down - ip link set dev br10 down + defer ip link set dev br10 down - for intf in $swp2 $swp1; do - for vlan in 11 10; do - ip link set dev $intf.$vlan down - ip link set dev $intf.$vlan nomaster - vlan_destroy $intf $vlan - done - - mtu_restore $intf - ip link set dev $intf down - done - - ip link del dev br11 - ip link del dev br10 + ip link set dev br11 up + defer ip link set dev br11 down } setup_prepare() @@ -176,24 +148,13 @@ setup_prepare() h2_mac=$(mac_get $h2) - vrf_prepare + adf_vrf_prepare h1_create h2_create switch_create } -cleanup() -{ - pre_cleanup - - switch_destroy - h2_destroy - h1_destroy - - vrf_cleanup -} - ping_ipv4() { ping_test $h1.10 $(ipaddr 2 10) " vlan 10" @@ -207,18 +168,18 @@ tbf_get_counter() tc_rule_stats_get $h2 10$vlan ingress .bytes } -do_tbf_test() +__tbf_test() { local vlan=$1; shift local mbit=$1; shift start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 2 $vlan) $h2_mac + defer stop_traffic $! sleep 5 # Wait for the burst to dwindle local t2=$(busywait_for_counter 1000 +1 tbf_get_counter $vlan) sleep 10 local t3=$(tbf_get_counter $vlan) - stop_traffic RET=0 @@ -231,3 +192,9 @@ do_tbf_test() log_test "TC $((vlan - 10)): TBF rate ${mbit}Mbit" } + +do_tbf_test() +{ + in_defer_scope \ + __tbf_test "$@" +} diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh index df9bcd6a811a..c182a04282bc 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh @@ -30,8 +30,9 @@ tbf_test() # This test is used for both ETS and PRIO. Even though we only need two # bands, PRIO demands a minimum of three. tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0 + defer tc qdisc del dev $swp2 root + tbf_test_one 128K - tc qdisc del dev $swp2 root } tbf_root_test() @@ -42,6 +43,8 @@ tbf_root_test() tc qdisc replace dev $swp2 root handle 1: \ tbf rate 400Mbit burst $bs limit 1M + defer tc qdisc del dev $swp2 root + tc qdisc replace dev $swp2 parent 1:1 handle 10: \ $QDISC 3 priomap 2 1 0 tc qdisc replace dev $swp2 parent 10:3 handle 103: \ @@ -53,8 +56,6 @@ tbf_root_test() do_tbf_test 10 400 $bs do_tbf_test 11 400 $bs - - tc qdisc del dev $swp2 root } if type -t sch_tbf_pre_hook >/dev/null; then diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh index 96c997be0d03..9f20320f8d84 100755 --- a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh @@ -14,13 +14,14 @@ tbf_test_one() tc qdisc replace dev $swp2 root handle 108: tbf \ rate 400Mbit burst $bs limit 1M + defer tc qdisc del dev $swp2 root + do_tbf_test 10 400 $bs } tbf_test() { tbf_test_one 128K - tc qdisc del dev $swp2 root } if type -t sch_tbf_pre_hook >/dev/null; then diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index 589629636502..ea89e558672d 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -4,7 +4,8 @@ ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \ mirred_egress_mirror_test matchall_mirred_egress_mirror_test \ gact_trap_test mirred_egress_to_ingress_test \ - mirred_egress_to_ingress_tcp_test" + mirred_egress_to_ingress_tcp_test \ + ingress_2nd_vlan_push egress_2nd_vlan_push" NUM_NETIFS=4 source tc_common.sh source lib.sh @@ -244,6 +245,49 @@ mirred_egress_to_ingress_tcp_test() log_test "mirred_egress_to_ingress_tcp ($tcflags)" } +ingress_2nd_vlan_push() +{ + tc filter add dev $swp1 ingress pref 20 chain 0 handle 20 flower \ + $tcflags num_of_vlans 1 \ + action vlan push id 100 protocol 0x8100 action goto chain 5 + tc filter add dev $swp1 ingress pref 30 chain 5 handle 30 flower \ + $tcflags num_of_vlans 2 \ + cvlan_ethtype 0x800 action pass + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -Q 10 -q + + tc_check_packets "dev $swp1 ingress" 30 1 + check_err $? "No double-vlan packets received" + + tc filter del dev $swp1 ingress pref 20 chain 0 handle 20 flower + tc filter del dev $swp1 ingress pref 30 chain 5 handle 30 flower + + log_test "ingress_2nd_vlan_push ($tcflags)" +} + +egress_2nd_vlan_push() +{ + tc filter add dev $h1 egress pref 20 chain 0 handle 20 flower \ + $tcflags num_of_vlans 0 \ + action vlan push id 10 protocol 0x8100 \ + pipe action vlan push id 100 protocol 0x8100 action goto chain 5 + tc filter add dev $h1 egress pref 30 chain 5 handle 30 flower \ + $tcflags num_of_vlans 2 \ + cvlan_ethtype 0x800 action pass + + $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \ + -t ip -q + + tc_check_packets "dev $h1 egress" 30 1 + check_err $? "No double-vlan packets received" + + tc filter del dev $h1 egress pref 20 chain 0 handle 20 flower + tc filter del dev $h1 egress pref 30 chain 5 handle 30 flower + + log_test "egress_2nd_vlan_push ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh index b1daad19b01e..b58909a93112 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower.sh @@ -6,7 +6,7 @@ ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \ match_ip_tos_test match_indev_test match_ip_ttl_test match_mpls_label_test \ match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \ - match_mpls_lse_test" + match_mpls_lse_test match_erspan_opts_test" NUM_NETIFS=2 source tc_common.sh source lib.sh @@ -676,6 +676,56 @@ match_mpls_lse_test() log_test "mpls lse match ($tcflags)" } +match_erspan_opts_test() +{ + RET=0 + + check_tc_erspan_support $h2 || return 0 + + # h1 erspan setup + tunnel_create erspan1 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1001 \ + tos C ttl 64 erspan_ver 1 erspan 6789 # ERSPAN Type II + tunnel_create erspan2 erspan 192.0.2.1 192.0.2.2 dev $h1 seq key 1002 \ + tos C ttl 64 erspan_ver 2 erspan_dir egress erspan_hwid 63 \ + # ERSPAN Type III + ip link set dev erspan1 master v$h1 + ip link set dev erspan2 master v$h1 + # h2 erspan setup + ip link add ep-ex type erspan ttl 64 external # To collect tunnel info + ip link set ep-ex up + ip link set dev ep-ex master v$h2 + tc qdisc add dev ep-ex clsact + + # ERSPAN Type II [decap direction] + tc filter add dev ep-ex ingress protocol ip handle 101 flower \ + $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \ + enc_key_id 1001 erspan_opts 1:6789:0:0 \ + action drop + # ERSPAN Type III [decap direction] + tc filter add dev ep-ex ingress protocol ip handle 102 flower \ + $tcflags enc_src_ip 192.0.2.1 enc_dst_ip 192.0.2.2 \ + enc_key_id 1002 erspan_opts 2:0:1:63 action drop + + ep1mac=$(mac_get erspan1) + $MZ erspan1 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q + tc_check_packets "dev ep-ex ingress" 101 1 + check_err $? "ERSPAN Type II" + + ep2mac=$(mac_get erspan2) + $MZ erspan2 -c 1 -p 64 -a $ep1mac -b $h2mac -t ip -q + tc_check_packets "dev ep-ex ingress" 102 1 + check_err $? "ERSPAN Type III" + + # h2 erspan cleanup + tc qdisc del dev ep-ex clsact + tunnel_destroy ep-ex + # h1 erspan cleanup + tunnel_destroy erspan2 # ERSPAN Type III + tunnel_destroy erspan1 # ERSPAN Type II + + log_test "erspan_opts match ($tcflags)" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh index 3885a2a91f7d..baed5e380dae 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh @@ -20,6 +20,7 @@ ALL_TESTS=" test_port_range_ipv4_tcp test_port_range_ipv6_udp test_port_range_ipv6_tcp + test_port_range_ipv4_udp_drop " NUM_NETIFS=4 @@ -194,6 +195,51 @@ test_port_range_ipv6_tcp() __test_port_range $proto $ip_proto $sip $dip $mode "$name" } +test_port_range_ipv4_udp_drop() +{ + local proto=ipv4 + local ip_proto=udp + local sip=192.0.2.1 + local dip=192.0.2.2 + local mode="-4" + local name="IPv4 UDP Drop" + local dmac=$(mac_get $h2) + local smac=$(mac_get $h1) + local sport_min=2000 + local sport_max=3000 + local sport_mid=$((sport_min + (sport_max - sport_min) / 2)) + local dport=5000 + + RET=0 + + tc filter add dev $swp1 ingress protocol $proto handle 101 pref 1 \ + flower src_ip $sip dst_ip $dip ip_proto $ip_proto \ + src_port $sport_min-$sport_max \ + dst_port $dport \ + action drop + + # Test ports outside range - should pass + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$((sport_min - 1)),dp=$dport" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$((sport_max + 1)),dp=$dport" + + # Test ports inside range - should be dropped + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_min,dp=$dport" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_mid,dp=$dport" + $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \ + -t $ip_proto "sp=$sport_max,dp=$dport" + + tc_check_packets "dev $swp1 ingress" 101 3 + check_err $? "Filter did not drop the expected number of packets" + + tc filter del dev $swp1 ingress protocol $proto pref 1 handle 101 flower + + log_test "Port range matching - $name" +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh index 5103f64a71d6..509fdedfcfa1 100755 --- a/tools/testing/selftests/net/forwarding/tc_police.sh +++ b/tools/testing/selftests/net/forwarding/tc_police.sh @@ -148,7 +148,7 @@ police_common_test() log_test "$test_name" - { kill %% && wait %%; } 2>/dev/null + kill_process %% tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower } @@ -198,7 +198,7 @@ police_shared_common_test() log_test "$test_name" - { kill %% && wait %%; } 2>/dev/null + kill_process %% } police_shared_test() @@ -278,7 +278,7 @@ police_mirror_common_test() log_test "$test_name" - { kill %% && wait %%; } 2>/dev/null + kill_process %% tc filter del dev $pol_if $dir protocol ip pref 1 handle 101 flower tc filter del dev $h3 ingress protocol ip pref 1 handle 101 flower tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower @@ -320,7 +320,7 @@ police_pps_common_test() log_test "$test_name" - { kill %% && wait %%; } 2>/dev/null + kill_process %% tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower } diff --git a/tools/testing/selftests/net/forwarding/tc_taprio.sh b/tools/testing/selftests/net/forwarding/tc_taprio.sh new file mode 100755 index 000000000000..8992aeabfe0b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/tc_taprio.sh @@ -0,0 +1,421 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" \ + test_clock_jump_backward \ + test_taprio_after_ptp \ + test_max_sdu \ + test_clock_jump_backward_forward \ +" +NUM_NETIFS=4 +source tc_common.sh +source lib.sh +source tsn_lib.sh + +require_command python3 + +# The test assumes the usual topology from the README, where h1 is connected to +# swp1, h2 to swp2, and swp1 and swp2 are together in a bridge. +# Additional assumption: h1 and h2 use the same PHC, and so do swp1 and swp2. +# By synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized to +# swp1 (and both to CLOCK_REALTIME). +h1=${NETIFS[p1]} +swp1=${NETIFS[p2]} +swp2=${NETIFS[p3]} +h2=${NETIFS[p4]} + +UDS_ADDRESS_H1="/var/run/ptp4l_h1" +UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1" + +H1_IPV4="192.0.2.1" +H2_IPV4="192.0.2.2" +H1_IPV6="2001:db8:1::1" +H2_IPV6="2001:db8:1::2" + +# Tunables +NUM_PKTS=100 +STREAM_VID=10 +STREAM_PRIO_1=6 +STREAM_PRIO_2=5 +STREAM_PRIO_3=4 +# PTP uses TC 0 +ALL_GATES=$((1 << 0 | 1 << STREAM_PRIO_1 | 1 << STREAM_PRIO_2)) +# Use a conservative cycle of 10 ms to allow the test to still pass when the +# kernel has some extra overhead like lockdep etc +CYCLE_TIME_NS=10000000 +# Create two Gate Control List entries, one OPEN and one CLOSE, of equal +# durations +GATE_DURATION_NS=$((CYCLE_TIME_NS / 2)) +# Give 2/3 of the cycle time to user space and 1/3 to the kernel +FUDGE_FACTOR=$((CYCLE_TIME_NS / 3)) +# Shift the isochron base time by half the gate time, so that packets are +# always received by swp1 close to the middle of the time slot, to minimize +# inaccuracies due to network sync +SHIFT_TIME_NS=$((GATE_DURATION_NS / 2)) + +path_delay= + +h1_create() +{ + simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h1_destroy() +{ + simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64 +} + +h2_create() +{ + simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +h2_destroy() +{ + simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64 +} + +switch_create() +{ + local h2_mac_addr=$(mac_get $h2) + + ip link set $swp1 up + ip link set $swp2 up + + ip link add br0 type bridge vlan_filtering 1 + ip link set $swp1 master br0 + ip link set $swp2 master br0 + ip link set br0 up + + bridge vlan add dev $swp2 vid $STREAM_VID + bridge vlan add dev $swp1 vid $STREAM_VID + bridge fdb add dev $swp2 \ + $h2_mac_addr vlan $STREAM_VID static master +} + +switch_destroy() +{ + ip link del br0 +} + +ptp_setup() +{ + # Set up swp1 as a master PHC for h1, synchronized to the local + # CLOCK_REALTIME. + phc2sys_start $UDS_ADDRESS_SWP1 + ptp4l_start $h1 true $UDS_ADDRESS_H1 + ptp4l_start $swp1 false $UDS_ADDRESS_SWP1 +} + +ptp_cleanup() +{ + ptp4l_stop $swp1 + ptp4l_stop $h1 + phc2sys_stop +} + +txtime_setup() +{ + local if_name=$1 + + tc qdisc add dev $if_name clsact + # Classify PTP on TC 7 and isochron on TC 6 + tc filter add dev $if_name egress protocol 0x88f7 \ + flower action skbedit priority 7 + tc filter add dev $if_name egress protocol 802.1Q \ + flower vlan_ethtype 0xdead action skbedit priority 6 + tc qdisc add dev $if_name handle 100: parent root mqprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + hw 1 + # Set up TC 5, 6, 7 for SO_TXTIME. tc-mqprio queues count from 1. + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_1 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_2 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR + tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_3 + 1)) etf \ + clockid CLOCK_TAI offload delta $FUDGE_FACTOR +} + +txtime_cleanup() +{ + local if_name=$1 + + tc qdisc del dev $if_name clsact + tc qdisc del dev $if_name root +} + +taprio_replace() +{ + local if_name="$1"; shift + local extra_args="$1"; shift + + # STREAM_PRIO_1 always has an open gate. + # STREAM_PRIO_2 has a gate open for GATE_DURATION_NS (half the cycle time) + # STREAM_PRIO_3 always has a closed gate. + tc qdisc replace dev $if_name root stab overhead 24 taprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \ + map 0 1 2 3 4 5 6 7 \ + sched-entry S $(printf "%x" $ALL_GATES) $GATE_DURATION_NS \ + sched-entry S $(printf "%x" $((ALL_GATES & ~(1 << STREAM_PRIO_2)))) $GATE_DURATION_NS \ + base-time 0 flags 0x2 $extra_args + taprio_wait_for_admin $if_name +} + +taprio_cleanup() +{ + local if_name=$1 + + tc qdisc del dev $if_name root +} + +probe_path_delay() +{ + local isochron_dat="$(mktemp)" + local received + + log_info "Probing path delay" + + isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" 0 \ + "$CYCLE_TIME_NS" "" "" "$NUM_PKTS" \ + "$STREAM_VID" "$STREAM_PRIO_1" "" "$isochron_dat" + + received=$(isochron_report_num_received "$isochron_dat") + if [ "$received" != "$NUM_PKTS" ]; then + echo "Cannot establish basic data path between $h1 and $h2" + exit $ksft_fail + fi + + printf "pdelay = {}\n" > isochron_data.py + isochron report --input-file "$isochron_dat" \ + --printf-format "pdelay[%u] = %d - %d\n" \ + --printf-args "qRT" \ + >> isochron_data.py + cat <<-'EOF' > isochron_postprocess.py + #!/usr/bin/env python3 + + from isochron_data import pdelay + import numpy as np + + w = np.array(list(pdelay.values())) + print("{}".format(np.max(w))) + EOF + path_delay=$(python3 ./isochron_postprocess.py) + + log_info "Path delay from $h1 to $h2 estimated at $path_delay ns" + + if [ "$path_delay" -gt "$GATE_DURATION_NS" ]; then + echo "Path delay larger than gate duration, aborting" + exit $ksft_fail + fi + + rm -f ./isochron_data.py 2> /dev/null + rm -f ./isochron_postprocess.py 2> /dev/null + rm -f "$isochron_dat" 2> /dev/null +} + +setup_prepare() +{ + vrf_prepare + + h1_create + h2_create + switch_create + + txtime_setup $h1 + + # Temporarily set up PTP just to probe the end-to-end path delay. + ptp_setup + probe_path_delay + ptp_cleanup +} + +cleanup() +{ + pre_cleanup + + isochron_recv_stop + txtime_cleanup $h1 + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +run_test() +{ + local base_time=$1; shift + local stream_prio=$1; shift + local expected_delay=$1; shift + local should_fail=$1; shift + local test_name=$1; shift + local isochron_dat="$(mktemp)" + local received + local median_delay + + RET=0 + + # Set the shift time equal to the cycle time, which effectively + # cancels the default advance time. Packets won't be sent early in + # software, which ensures that they won't prematurely enter through + # the open gate in __test_out_of_band(). Also, the gate is open for + # long enough that this won't cause a problem in __test_in_band(). + isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" "$base_time" \ + "$CYCLE_TIME_NS" "$SHIFT_TIME_NS" "$GATE_DURATION_NS" \ + "$NUM_PKTS" "$STREAM_VID" "$stream_prio" "" "$isochron_dat" + + received=$(isochron_report_num_received "$isochron_dat") + [ "$received" = "$NUM_PKTS" ] + check_err_fail $should_fail $? "Reception of $NUM_PKTS packets" + + if [ $should_fail = 0 ] && [ "$received" = "$NUM_PKTS" ]; then + printf "pdelay = {}\n" > isochron_data.py + isochron report --input-file "$isochron_dat" \ + --printf-format "pdelay[%u] = %d - %d\n" \ + --printf-args "qRT" \ + >> isochron_data.py + cat <<-'EOF' > isochron_postprocess.py + #!/usr/bin/env python3 + + from isochron_data import pdelay + import numpy as np + + w = np.array(list(pdelay.values())) + print("{}".format(int(np.median(w)))) + EOF + median_delay=$(python3 ./isochron_postprocess.py) + + # If the condition below is true, packets were delayed by a closed gate + [ "$median_delay" -gt $((path_delay + expected_delay)) ] + check_fail $? "Median delay $median_delay is greater than expected delay $expected_delay plus path delay $path_delay" + + # If the condition below is true, packets were sent expecting them to + # hit a closed gate in the switch, but were not delayed + [ "$expected_delay" -gt 0 ] && [ "$median_delay" -lt "$expected_delay" ] + check_fail $? "Median delay $median_delay is less than expected delay $expected_delay" + fi + + log_test "$test_name" + + rm -f ./isochron_data.py 2> /dev/null + rm -f ./isochron_postprocess.py 2> /dev/null + rm -f "$isochron_dat" 2> /dev/null +} + +__test_always_open() +{ + run_test 0.000000000 $STREAM_PRIO_1 0 0 "Gate always open" +} + +__test_always_closed() +{ + run_test 0.000000000 $STREAM_PRIO_3 0 1 "Gate always closed" +} + +__test_in_band() +{ + # Send packets in-band with the OPEN gate entry + run_test 0.000000000 $STREAM_PRIO_2 0 0 "In band with gate" +} + +__test_out_of_band() +{ + # Send packets in-band with the CLOSE gate entry + run_test 0.005000000 $STREAM_PRIO_2 \ + $((GATE_DURATION_NS - SHIFT_TIME_NS)) 0 \ + "Out of band with gate" +} + +run_subtests() +{ + __test_always_open + __test_always_closed + __test_in_band + __test_out_of_band +} + +test_taprio_after_ptp() +{ + log_info "Setting up taprio after PTP" + ptp_setup + taprio_replace $swp2 + run_subtests + taprio_cleanup $swp2 + ptp_cleanup +} + +__test_under_max_sdu() +{ + # Limit max-sdu for STREAM_PRIO_1 + taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 100 0" + run_test 0.000000000 $STREAM_PRIO_1 0 0 "Under maximum SDU" +} + +__test_over_max_sdu() +{ + # Limit max-sdu for STREAM_PRIO_1 + taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 20 0" + run_test 0.000000000 $STREAM_PRIO_1 0 1 "Over maximum SDU" +} + +test_max_sdu() +{ + ptp_setup + __test_under_max_sdu + __test_over_max_sdu + taprio_cleanup $swp2 + ptp_cleanup +} + +# Perform a clock jump in the past without synchronization running, so that the +# time base remains where it was set by phc_ctl. +test_clock_jump_backward() +{ + # This is a more complex schedule specifically crafted in a way that + # has been problematic on NXP LS1028A. Not much to test with it other + # than the fact that it passes traffic. + tc qdisc replace dev $swp2 root stab overhead 24 taprio num_tc 8 \ + queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \ + base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \ + sched-entry S 20 300000 sched-entry S 48 200000 \ + sched-entry S 20 300000 sched-entry S 83 200000 \ + sched-entry S 40 300000 sched-entry S 00 200000 flags 2 + + log_info "Forcing a backward clock jump" + phc_ctl $swp1 set 0 + + ping_test $h1 192.0.2.2 + taprio_cleanup $swp2 +} + +# Test that taprio tolerates clock jumps. +# Since ptp4l and phc2sys are running, it is expected for the time to +# eventually recover (through yet another clock jump). Isochron waits +# until that is the case. +test_clock_jump_backward_forward() +{ + log_info "Forcing a backward and a forward clock jump" + taprio_replace $swp2 + phc_ctl $swp1 set 0 + ptp_setup + ping_test $h1 192.0.2.2 + run_subtests + ptp_cleanup + taprio_cleanup $swp2 +} + +tc_offload_check +if [[ $? -ne 0 ]]; then + log_test_skip "Could not test offloaded functionality" + exit $EXIT_STATUS +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh index b91bcd8008a9..08c044ff6689 100644 --- a/tools/testing/selftests/net/forwarding/tsn_lib.sh +++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh @@ -2,6 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright 2021-2022 NXP +tc_testing_scripts_dir=$(dirname $0)/../../tc-testing/scripts + REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes} REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes} @@ -18,6 +20,7 @@ fi if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then require_command phc2sys require_command ptp4l + require_command phc_ctl fi phc2sys_start() @@ -182,6 +185,7 @@ isochron_do() local base_time=$1; shift local cycle_time=$1; shift local shift_time=$1; shift + local window_size=$1; shift local num_pkts=$1; shift local vid=$1; shift local priority=$1; shift @@ -212,6 +216,10 @@ isochron_do() extra_args="${extra_args} --shift-time=${shift_time}" fi + if ! [ -z "${window_size}" ]; then + extra_args="${extra_args} --window-size=${window_size}" + fi + if [ "${use_l2}" = "true" ]; then extra_args="${extra_args} --l2 --etype=0xdead ${vid}" receiver_extra_args="--l2 --etype=0xdead" @@ -247,3 +255,21 @@ isochron_do() cpufreq_restore ${ISOCHRON_CPU} } + +isochron_report_num_received() +{ + local isochron_dat=$1; shift + + # Count all received packets by looking at the non-zero RX timestamps + isochron report \ + --input-file "${isochron_dat}" \ + --printf-format "%u\n" --printf-args "R" | \ + grep -w -v '0' | wc -l +} + +taprio_wait_for_admin() +{ + local if_name="$1"; shift + + "$tc_testing_scripts_dir/taprio_wait_for_admin.sh" "$(which tc)" "$if_name" +} diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index 3f9d50f1ef9e..b43816dd998c 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -428,6 +428,14 @@ __test_flood() test_flood() { __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood" + + # Add an entry with arbitrary destination IP. Verify that packets are + # not duplicated (this can happen if hardware floods the packets, and + # then traps them due to misconfiguration, so software data path repeats + # flooding and resends packets). + bridge fdb append dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self + __test_flood de:ad:be:ef:13:37 192.0.2.100 "flood, unresolved FDB entry" + bridge fdb del dev vx1 00:00:00:00:00:00 dst 198.51.100.1 self } vxlan_fdb_add_del() @@ -740,6 +748,8 @@ test_learning() vxlan_flood_test $mac $dst 0 10 0 + # The entry should age out when it only forwards traffic + $MZ $h1 -c 50 -d 1sec -p 64 -b $mac -B $dst -t icmp -q & sleep 60 bridge fdb show brport vx1 | grep $mac | grep -q self diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh index fb9a34cb50c6..afc65647f673 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -539,6 +539,21 @@ test_flood() 10 10 0 10 0 __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 "flood vlan 20" \ 10 0 10 0 10 + + # Add entries with arbitrary destination IP. Verify that packets are + # not duplicated (this can happen if hardware floods the packets, and + # then traps them due to misconfiguration, so software data path repeats + # flooding and resends packets). + bridge fdb append dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self + bridge fdb append dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self + + __test_flood de:ad:be:ef:13:37 192.0.2.100 10 \ + "flood vlan 10, unresolved FDB entry" 10 10 0 10 0 + __test_flood ca:fe:be:ef:13:37 198.51.100.100 20 \ + "flood vlan 20, unresolved FDB entry" 10 0 10 0 10 + + bridge fdb del dev vx20 00:00:00:00:00:00 dst 203.0.113.2 self + bridge fdb del dev vx10 00:00:00:00:00:00 dst 203.0.113.1 self } vxlan_fdb_add_del() diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh new file mode 100755 index 000000000000..6a570d256e07 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_mc_ul.sh @@ -0,0 +1,766 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------------------------+ +# | + $h1.10 + $h1.20 | +# | | 192.0.2.1/28 | 2001:db8:1::1/64 | +# | \________ ________/ | +# | \ / | +# | + $h1 H1 (vrf) | +# +-----------|-----------------------------+ +# | +# +-----------|----------------------------------------------------------------+ +# | +---------|--------------------------------------+ SWITCH (main vrf) | +# | | + $swp1 BR1 (802.1q) | | +# | | vid 10 20 | | +# | | | | +# | | + vx10 (vxlan) + vx20 (vxlan) | + lo10 (dummy) | +# | | local 192.0.2.100 local 2001:db8:4::1 | 192.0.2.100/28 | +# | | group 233.252.0.1 group ff0e::1:2:3 | 2001:db8:4::1/64 | +# | | id 1000 id 2000 | | +# | | vid 10 pvid untagged vid 20 pvid untagged | | +# | +------------------------------------------------+ | +# | | +# | + $swp2 $swp3 + | +# | | 192.0.2.33/28 192.0.2.65/28 | | +# | | 2001:db8:2::1/64 2001:db8:3::1/64 | | +# | | | | +# +---|--------------------------------------------------------------------|---+ +# | | +# +---|--------------------------------+ +--------------------------------|---+ +# | | H2 (vrf) | | H3 (vrf) | | +# | +-|----------------------------+ | | +-----------------------------|-+ | +# | | + $h2 BR2 (802.1d) | | | | BR3 (802.1d) $h3 + | | +# | | | | | | | | +# | | + v1$h2 (veth) | | | | v1$h3 (veth) + | | +# | +-|----------------------------+ | | +-----------------------------|-+ | +# | | | | | | +# +---|--------------------------------+ +--------------------------------|---+ +# | | +# +---|--------------------------------+ +--------------------------------|---+ +# | + v2$h2 (veth) NS2 (netns) | | NS3 (netns) v2$h3 (veth) + | +# | 192.0.2.34/28 | | 192.0.2.66/28 | +# | 2001:db8:2::2/64 | | 2001:db8:3::2/64 | +# | | | | +# | +--------------------------------+ | | +--------------------------------+ | +# | | BR1 (802.1q) | | | | BR1 (802.1q) | | +# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | | +# | | local 192.0.2.34 | | | | local 192.0.2.50 | | +# | | group 233.252.0.1 dev v2$h2 | | | | group 233.252.0.1 dev v2$h3 | | +# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | | +# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | | +# | | | | | | | | +# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | | +# | | local 2001:db8:2::2 | | | | local 2001:db8:3::2 | | +# | | group ff0e::1:2:3 dev v2$h2 | | | | group ff0e::1:2:3 dev v2$h3 | | +# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | | +# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | | +# | | | | | | | | +# | | + w1 (veth) | | | | + w1 (veth) | | +# | | | vid 10 20 | | | | | vid 10 20 | | +# | +--|-----------------------------+ | | +--|-----------------------------+ | +# | | | | | | +# | +--|-----------------------------+ | | +--|-----------------------------+ | +# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | | +# | | |\ | | | | |\ | | +# | | | + w2.10 | | | | | + w2.10 | | +# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | | +# | | | | | | | | | | +# | | + w2.20 | | | | + w2.20 | | +# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | | +# | +--------------------------------+ | | +--------------------------------+ | +# +------------------------------------+ +------------------------------------+ +# +#shellcheck disable=SC2317 # SC doesn't see our uses of functions. + +: "${VXPORT:=4789}" +export VXPORT + +: "${GROUP4:=233.252.0.1}" +export GROUP4 + +: "${GROUP6:=ff0e::1:2:3}" +export GROUP6 + +: "${IPMR:=lo10}" + +ALL_TESTS=" + ipv4_nomcroute + ipv4_mcroute + ipv4_mcroute_changelink + ipv4_mcroute_starg + ipv4_mcroute_noroute + ipv4_mcroute_fdb + ipv4_mcroute_fdb_oif0 + ipv4_mcroute_fdb_oif0_sep + + ipv6_nomcroute + ipv6_mcroute + ipv6_mcroute_changelink + ipv6_mcroute_starg + ipv6_mcroute_noroute + ipv6_mcroute_fdb + ipv6_mcroute_fdb_oif0 + + ipv4_nomcroute_rx + ipv4_mcroute_rx + ipv4_mcroute_starg_rx + ipv4_mcroute_fdb_oif0_sep_rx + ipv4_mcroute_fdb_sep_rx + + ipv6_nomcroute_rx + ipv6_mcroute_rx + ipv6_mcroute_starg_rx + ipv6_mcroute_fdb_sep_rx +" + +NUM_NETIFS=6 +source lib.sh + +h1_create() +{ + adf_simple_if_init "$h1" + + adf_ip_link_add "$h1.10" master "v$h1" link "$h1" type vlan id 10 + adf_ip_link_set_up "$h1.10" + adf_ip_addr_add "$h1.10" 192.0.2.1/28 + + adf_ip_link_add "$h1.20" master "v$h1" link "$h1" type vlan id 20 + adf_ip_link_set_up "$h1.20" + adf_ip_addr_add "$h1.20" 2001:db8:1::1/64 +} + +install_capture() +{ + local dev=$1; shift + + tc qdisc add dev "$dev" clsact + defer tc qdisc del dev "$dev" clsact + + tc filter add dev "$dev" ingress proto ip pref 104 \ + flower skip_hw ip_proto udp dst_port "$VXPORT" \ + action pass + defer tc filter del dev "$dev" ingress proto ip pref 104 + + tc filter add dev "$dev" ingress proto ipv6 pref 106 \ + flower skip_hw ip_proto udp dst_port "$VXPORT" \ + action pass + defer tc filter del dev "$dev" ingress proto ipv6 pref 106 +} + +h2_create() +{ + # $h2 + adf_ip_link_set_up "$h2" + + # H2 + vrf_create "v$h2" + defer vrf_destroy "v$h2" + + adf_ip_link_set_up "v$h2" + + # br2 + adf_ip_link_add br2 type bridge vlan_filtering 0 mcast_snooping 0 + adf_ip_link_set_master br2 "v$h2" + adf_ip_link_set_up br2 + + # $h2 + adf_ip_link_set_master "$h2" br2 + install_capture "$h2" + + # v1$h2 + adf_ip_link_set_up "v1$h2" + adf_ip_link_set_master "v1$h2" br2 +} + +h3_create() +{ + # $h3 + adf_ip_link_set_up "$h3" + + # H3 + vrf_create "v$h3" + defer vrf_destroy "v$h3" + + adf_ip_link_set_up "v$h3" + + # br3 + adf_ip_link_add br3 type bridge vlan_filtering 0 mcast_snooping 0 + adf_ip_link_set_master br3 "v$h3" + adf_ip_link_set_up br3 + + # $h3 + adf_ip_link_set_master "$h3" br3 + install_capture "$h3" + + # v1$h3 + adf_ip_link_set_up "v1$h3" + adf_ip_link_set_master "v1$h3" br3 +} + +switch_create() +{ + local swp1_mac + + # br1 + swp1_mac=$(mac_get "$swp1") + adf_ip_link_add br1 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + adf_ip_link_set_addr br1 "$swp1_mac" + adf_ip_link_set_up br1 + + # A dummy to force the IPv6 OIF=0 test to install a suitable MC route on + # $IPMR to be deterministic. Also used for the IPv6 RX!=TX ping test. + adf_ip_link_add "X$IPMR" up type dummy + + # IPMR + adf_ip_link_add "$IPMR" up type dummy + adf_ip_addr_add "$IPMR" 192.0.2.100/28 + adf_ip_addr_add "$IPMR" 2001:db8:4::1/64 + + # $swp1 + adf_ip_link_set_up "$swp1" + adf_ip_link_set_master "$swp1" br1 + adf_bridge_vlan_add vid 10 dev "$swp1" + adf_bridge_vlan_add vid 20 dev "$swp1" + + # $swp2 + adf_ip_link_set_up "$swp2" + adf_ip_addr_add "$swp2" 192.0.2.33/28 + adf_ip_addr_add "$swp2" 2001:db8:2::1/64 + + # $swp3 + adf_ip_link_set_up "$swp3" + adf_ip_addr_add "$swp3" 192.0.2.65/28 + adf_ip_addr_add "$swp3" 2001:db8:3::1/64 +} + +vx_create() +{ + local name=$1; shift + local vid=$1; shift + + adf_ip_link_add "$name" up type vxlan dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 16 \ + "$@" + adf_ip_link_set_master "$name" br1 + adf_bridge_vlan_add vid "$vid" dev "$name" pvid untagged +} +export -f vx_create + +vx_wait() +{ + # Wait for all the ARP, IGMP etc. noise to settle down so that the + # tunnel is clear for measurements. + sleep 10 +} + +vx10_create() +{ + vx_create vx10 10 id 1000 "$@" +} +export -f vx10_create + +vx20_create() +{ + vx_create vx20 20 id 2000 "$@" +} +export -f vx20_create + +vx10_create_wait() +{ + vx10_create "$@" + vx_wait +} + +vx20_create_wait() +{ + vx20_create "$@" + vx_wait +} + +ns_init_common() +{ + local ns=$1; shift + local if_in=$1; shift + local ipv4_in=$1; shift + local ipv6_in=$1; shift + local ipv4_host=$1; shift + local ipv6_host=$1; shift + + # v2$h2 / v2$h3 + adf_ip_link_set_up "$if_in" + adf_ip_addr_add "$if_in" "$ipv4_in" + adf_ip_addr_add "$if_in" "$ipv6_in" + + # br1 + adf_ip_link_add br1 type bridge vlan_filtering 1 \ + vlan_default_pvid 0 mcast_snooping 0 + adf_ip_link_set_up br1 + + # vx10, vx20 + vx10_create local "${ipv4_in%/*}" group "$GROUP4" dev "$if_in" + vx20_create local "${ipv6_in%/*}" group "$GROUP6" dev "$if_in" + + # w1 + adf_ip_link_add w1 type veth peer name w2 + adf_ip_link_set_master w1 br1 + adf_ip_link_set_up w1 + adf_bridge_vlan_add vid 10 dev w1 + adf_bridge_vlan_add vid 20 dev w1 + + # w2 + adf_simple_if_init w2 + + # w2.10 + adf_ip_link_add w2.10 master vw2 link w2 type vlan id 10 + adf_ip_link_set_up w2.10 + adf_ip_addr_add w2.10 "$ipv4_host" + + # w2.20 + adf_ip_link_add w2.20 master vw2 link w2 type vlan id 20 + adf_ip_link_set_up w2.20 + adf_ip_addr_add w2.20 "$ipv6_host" +} +export -f ns_init_common + +ns2_create() +{ + # NS2 + ip netns add ns2 + defer ip netns del ns2 + + # v2$h2 + ip link set dev "v2$h2" netns ns2 + defer ip -n ns2 link set dev "v2$h2" netns 1 + + in_ns ns2 \ + ns_init_common ns2 "v2$h2" \ + 192.0.2.34/28 2001:db8:2::2/64 \ + 192.0.2.3/28 2001:db8:1::3/64 +} + +ns3_create() +{ + # NS3 + ip netns add ns3 + defer ip netns del ns3 + + # v2$h3 + ip link set dev "v2$h3" netns ns3 + defer ip -n ns3 link set dev "v2$h3" netns 1 + + ip -n ns3 link set dev "v2$h3" up + + in_ns ns3 \ + ns_init_common ns3 "v2$h3" \ + 192.0.2.66/28 2001:db8:3::2/64 \ + 192.0.2.4/28 2001:db8:1::4/64 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + adf_vrf_prepare + adf_forwarding_enable + + adf_ip_link_add "v1$h2" type veth peer name "v2$h2" + adf_ip_link_add "v1$h3" type veth peer name "v2$h3" + + h1_create + h2_create + h3_create + switch_create + ns2_create + ns3_create +} + +adf_install_broken_sg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3" + defer mc_cli remove "$swp2" 192.0.2.100 "$GROUP4" "$swp1" "$swp3" + + mc_cli add "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3" + defer mc_cli remove "$swp2" 2001:db8:4::1 "$GROUP6" "$swp1" "$swp3" +} + +adf_install_rx() +{ + mc_cli add "$swp2" 0.0.0.0 "$GROUP4" "$IPMR" + defer mc_cli remove "$swp2" 0.0.0.0 "$GROUP4" lo10 + + mc_cli add "$swp3" 0.0.0.0 "$GROUP4" "$IPMR" + defer mc_cli remove "$swp3" 0.0.0.0 "$GROUP4" lo10 + + mc_cli add "$swp2" :: "$GROUP6" "$IPMR" + defer mc_cli remove "$swp2" :: "$GROUP6" lo10 + + mc_cli add "$swp3" :: "$GROUP6" "$IPMR" + defer mc_cli remove "$swp3" :: "$GROUP6" lo10 +} + +adf_install_sg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$IPMR" 192.0.2.100 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 192.0.2.33 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 2001:db8:4::1 "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +adf_install_sg_sep() +{ + adf_mcd_start lo || exit "$EXIT_STATUS" + + mc_cli add lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove lo 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + + mc_cli add lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove lo 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" +} + +adf_install_sg_sep_rx() +{ + local lo=$1; shift + + adf_mcd_start "$IPMR" "$lo" || exit "$EXIT_STATUS" + + mc_cli add "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$lo" 192.0.2.120 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$lo" 2001:db8:5::1 "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +adf_install_starg() +{ + adf_mcd_start "$IPMR" || exit "$EXIT_STATUS" + + mc_cli add "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" 0.0.0.0 "$GROUP4" "$swp2" "$swp3" + + mc_cli add "$IPMR" :: "$GROUP6" "$swp2" "$swp3" + defer mc_cli remove "$IPMR" :: "$GROUP6" "$swp2" "$swp3" + + adf_install_rx +} + +do_packets_v4() +{ + local mac + + mac=$(mac_get "$h2") + "$MZ" "$h1" -Q 10 -c 10 -d 100msec -p 64 -a own -b "$mac" \ + -A 192.0.2.1 -B 192.0.2.2 -t udp sp=1234,dp=2345 -q +} + +do_packets_v6() +{ + local mac + + mac=$(mac_get "$h2") + "$MZ" -6 "$h1" -Q 20 -c 10 -d 100msec -p 64 -a own -b "$mac" \ + -A 2001:db8:1::1 -B 2001:db8:1::2 -t udp sp=1234,dp=2345 -q +} + +do_test() +{ + local ipv=$1; shift + local expect_h2=$1; shift + local expect_h3=$1; shift + local what=$1; shift + + local pref=$((100 + ipv)) + local t0_h2 + local t0_h3 + local t1_h2 + local t1_h3 + local d_h2 + local d_h3 + + RET=0 + + t0_h2=$(tc_rule_stats_get "$h2" "$pref" ingress) + t0_h3=$(tc_rule_stats_get "$h3" "$pref" ingress) + + "do_packets_v$ipv" + sleep 1 + + t1_h2=$(tc_rule_stats_get "$h2" "$pref" ingress) + t1_h3=$(tc_rule_stats_get "$h3" "$pref" ingress) + + d_h2=$((t1_h2 - t0_h2)) + d_h3=$((t1_h3 - t0_h3)) + + ((d_h2 == expect_h2)) + check_err $? "Expected $expect_h2 packets on H2, got $d_h2" + + ((d_h3 == expect_h3)) + check_err $? "Expected $expect_h3 packets on H3, got $d_h3" + + log_test "VXLAN MC flood $what" +} + +ipv4_do_test_rx() +{ + local h3_should_fail=$1; shift + local what=$1; shift + + RET=0 + + ping_do "$h1.10" 192.0.2.3 + check_err $? "H2 should respond" + + ping_do "$h1.10" 192.0.2.4 + check_err_fail "$h3_should_fail" $? "H3 responds" + + log_test "VXLAN MC flood $what" +} + +ipv6_do_test_rx() +{ + local h3_should_fail=$1; shift + local what=$1; shift + + RET=0 + + ping6_do "$h1.20" 2001:db8:1::3 + check_err $? "H2 should respond" + + ping6_do "$h1.20" 2001:db8:1::4 + check_err_fail "$h3_should_fail" $? "H3 responds" + + log_test "VXLAN MC flood $what" +} + +ipv4_nomcroute() +{ + # Install a misleading (S,G) rule to attempt to trick the system into + # pushing the packets elsewhere. + adf_install_broken_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$swp2" + do_test 4 10 0 "IPv4 nomcroute" +} + +ipv6_nomcroute() +{ + # Like for IPv4, install a misleading (S,G). + adf_install_broken_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$swp2" + do_test 6 10 0 "IPv6 nomcroute" +} + +ipv4_nomcroute_rx() +{ + vx10_create local 192.0.2.100 group "$GROUP4" dev "$swp2" + ipv4_do_test_rx 1 "IPv4 nomcroute ping" +} + +ipv6_nomcroute_rx() +{ + vx20_create local 2001:db8:4::1 group "$GROUP6" dev "$swp2" + ipv6_do_test_rx 1 "IPv6 nomcroute ping" +} + +ipv4_mcroute() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 10 10 "IPv4 mcroute" +} + +ipv6_mcroute() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 10 10 "IPv6 mcroute" +} + +ipv4_mcroute_rx() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + ipv4_do_test_rx 0 "IPv4 mcroute ping" +} + +ipv6_mcroute_rx() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ipv6_do_test_rx 0 "IPv6 mcroute ping" +} + +ipv4_mcroute_changelink() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" + ip link set dev vx10 type vxlan mcroute + sleep 1 + do_test 4 10 10 "IPv4 mcroute changelink" +} + +ipv6_mcroute_changelink() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ip link set dev vx20 type vxlan mcroute + sleep 1 + do_test 6 10 10 "IPv6 mcroute changelink" +} + +ipv4_mcroute_starg() +{ + adf_install_starg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 10 10 "IPv4 mcroute (*,G)" +} + +ipv6_mcroute_starg() +{ + adf_install_starg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 10 10 "IPv6 mcroute (*,G)" +} + +ipv4_mcroute_starg_rx() +{ + adf_install_starg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + ipv4_do_test_rx 0 "IPv4 mcroute (*,G) ping" +} + +ipv6_mcroute_starg_rx() +{ + adf_install_starg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + ipv6_do_test_rx 0 "IPv6 mcroute (*,G) ping" +} + +ipv4_mcroute_noroute() +{ + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + do_test 4 0 0 "IPv4 mcroute, no route" +} + +ipv6_mcroute_noroute() +{ + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + do_test 6 0 0 "IPv6 mcroute, no route" +} + +ipv4_mcroute_fdb() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 dev "$IPMR" mcroute + bridge fdb add dev vx10 \ + 00:00:00:00:00:00 self static dst "$GROUP4" via "$IPMR" + do_test 4 10 10 "IPv4 mcroute FDB" +} + +ipv6_mcroute_fdb() +{ + adf_install_sg + vx20_create_wait local 2001:db8:4::1 dev "$IPMR" mcroute + bridge -6 fdb add dev vx20 \ + 00:00:00:00:00:00 self static dst "$GROUP6" via "$IPMR" + do_test 6 10 10 "IPv6 mcroute FDB" +} + +# Use FDB to configure VXLAN in a way where oif=0 for purposes of FIB lookup. +ipv4_mcroute_fdb_oif0() +{ + adf_install_sg + vx10_create_wait local 192.0.2.100 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + do_test 4 10 10 "IPv4 mcroute oif=0" +} + +ipv6_mcroute_fdb_oif0() +{ + # The IPv6 tunnel lookup does not fall back to selection by source + # address. Instead it just does a FIB match, and that would find one of + # the several ff00::/8 multicast routes -- each device has one. In order + # to reliably force the $IPMR device, add a /128 route for the + # destination group address. + ip -6 route add table local multicast "$GROUP6/128" dev "$IPMR" + defer ip -6 route del table local multicast "$GROUP6/128" dev "$IPMR" + + adf_install_sg + vx20_create_wait local 2001:db8:4::1 group "$GROUP6" dev "$IPMR" mcroute + bridge -6 fdb del dev vx20 00:00:00:00:00:00 + bridge -6 fdb add dev vx20 00:00:00:00:00:00 self static dst "$GROUP6" + do_test 6 10 10 "IPv6 mcroute oif=0" +} + +# In oif=0 test as above, have FIB lookup resolve to loopback instead of IPMR. +# This doesn't work with IPv6 -- a MC route on lo would be marked as RTF_REJECT. +ipv4_mcroute_fdb_oif0_sep() +{ + adf_install_sg_sep + + adf_ip_addr_add lo 192.0.2.120/28 + vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + do_test 4 10 10 "IPv4 mcroute TX!=RX oif=0" +} + +ipv4_mcroute_fdb_oif0_sep_rx() +{ + adf_install_sg_sep_rx lo + + adf_ip_addr_add lo 192.0.2.120/28 + vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" + ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX oif=0 ping" +} + +ipv4_mcroute_fdb_sep_rx() +{ + adf_install_sg_sep_rx lo + + adf_ip_addr_add lo 192.0.2.120/28 + vx10_create_wait local 192.0.2.120 group "$GROUP4" dev "$IPMR" mcroute + bridge fdb del dev vx10 00:00:00:00:00:00 + bridge fdb add \ + dev vx10 00:00:00:00:00:00 self static dst "$GROUP4" via lo + ipv4_do_test_rx 0 "IPv4 mcroute TX!=RX ping" +} + +ipv6_mcroute_fdb_sep_rx() +{ + adf_install_sg_sep_rx "X$IPMR" + + adf_ip_addr_add "X$IPMR" 2001:db8:5::1/64 + vx20_create_wait local 2001:db8:5::1 group "$GROUP6" dev "$IPMR" mcroute + bridge -6 fdb del dev vx20 00:00:00:00:00:00 + bridge -6 fdb add dev vx20 00:00:00:00:00:00 \ + self static dst "$GROUP6" via "X$IPMR" + ipv6_do_test_rx 0 "IPv6 mcroute TX!=RX ping" +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh new file mode 100755 index 000000000000..709845123727 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh @@ -0,0 +1,347 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +--------------------+ +# | H1 (vrf) | +# | + $h1 | +# | | 192.0.2.1/28 | +# +----|---------------+ +# | +# +----|--------------------------------+ +# | SW | | +# | +--|------------------------------+ | +# | | + $swp1 BR1 (802.1d) | | +# | | | | +# | | + vx1 (vxlan) | | +# | | local 192.0.2.17 | | +# | | id 1000 dstport $VXPORT | | +# | +---------------------------------+ | +# | | +# | 192.0.2.32/28 via 192.0.2.18 | +# | | +# | + $rp1 | +# | | 192.0.2.17/28 | +# +--|----------------------------------+ +# | +# +--|----------------------------------+ +# | | | +# | + $rp2 | +# | 192.0.2.18/28 | +# | | +# | VRP2 (vrf) | +# +-------------------------------------+ + +: ${VXPORT:=4789} +: ${ALL_TESTS:=" + default_test + plain_test + reserved_0_test + reserved_10_test + reserved_31_test + reserved_56_test + reserved_63_test + "} + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + adf_simple_if_init $h1 192.0.2.1/28 + + tc qdisc add dev $h1 clsact + defer tc qdisc del dev $h1 clsact + + tc filter add dev $h1 ingress pref 77 \ + prot ip flower skip_hw ip_proto icmp action drop + defer tc filter del dev $h1 ingress pref 77 +} + +switch_create() +{ + adf_ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 + # Make sure the bridge uses the MAC address of the local port and not + # that of the VxLAN's device. + adf_ip_link_set_addr br1 $(mac_get $swp1) + adf_ip_link_set_up br1 + + adf_ip_link_set_up $rp1 + adf_ip_addr_add $rp1 192.0.2.17/28 + adf_ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18 + + adf_ip_link_set_master $swp1 br1 + adf_ip_link_set_up $swp1 +} + +vrp2_create() +{ + adf_simple_if_init $rp2 192.0.2.18/28 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + rp1=${NETIFS[p3]} + rp2=${NETIFS[p4]} + + adf_vrf_prepare + adf_forwarding_enable + + h1_create + switch_create + + vrp2_create +} + +vxlan_header_bytes() +{ + local vni=$1; shift + local -a extra_bits=("$@") + local -a bits + local i + + for ((i=0; i < 64; i++)); do + bits[i]=0 + done + + # Bit 4 is the I flag and is always on. + bits[4]=1 + + for i in ${extra_bits[@]}; do + bits[i]=1 + done + + # Bits 32..55 carry the VNI + local mask=0x800000 + for ((i=0; i < 24; i++)); do + bits[$((i + 32))]=$(((vni & mask) != 0)) + ((mask >>= 1)) + done + + local bytes + for ((i=0; i < 8; i++)); do + local byte=0 + local j + for ((j=0; j < 8; j++)); do + local bit=${bits[8 * i + j]} + ((byte += bit << (7 - j))) + done + bytes+=$(printf %02x $byte): + done + + echo ${bytes%:} +} + +neg_bytes() +{ + local bytes=$1; shift + + local -A neg=([0]=f [1]=e [2]=d [3]=c [4]=b [5]=a [6]=9 [7]=8 + [8]=7 [9]=6 [a]=5 [b]=4 [c]=3 [d]=2 [e]=1 [f]=0 [:]=:) + local out + local i + + for ((i=0; i < ${#bytes}; i++)); do + local c=${bytes:$i:1} + out+=${neg[$c]} + done + echo $out +} + +vxlan_ping_do() +{ + local count=$1; shift + local dev=$1; shift + local next_hop_mac=$1; shift + local dest_ip=$1; shift + local dest_mac=$1; shift + local vni=$1; shift + local reserved_bits=$1; shift + + local vxlan_header=$(vxlan_header_bytes $vni $reserved_bits) + + $MZ $dev -c $count -d 100msec -q \ + -b $next_hop_mac -B $dest_ip \ + -t udp sp=23456,dp=$VXPORT,p=$(: + )"$vxlan_header:"$( : VXLAN + )"$dest_mac:"$( : ETH daddr + )"00:11:22:33:44:55:"$( : ETH saddr + )"08:00:"$( : ETH type + )"45:"$( : IP version + IHL + )"00:"$( : IP TOS + )"00:54:"$( : IP total length + )"99:83:"$( : IP identification + )"40:00:"$( : IP flags + frag off + )"40:"$( : IP TTL + )"01:"$( : IP proto + )"00:00:"$( : IP header csum + )"$(ipv4_to_bytes 192.0.2.3):"$( : IP saddr + )"$(ipv4_to_bytes 192.0.2.1):"$( : IP daddr + )"08:"$( : ICMP type + )"00:"$( : ICMP code + )"8b:f2:"$( : ICMP csum + )"1f:6a:"$( : ICMP request identifier + )"00:01:"$( : ICMP request seq. number + )"4f:ff:c5:5b:00:00:00:00:"$( : ICMP payload + )"6d:74:0b:00:00:00:00:00:"$( : + )"10:11:12:13:14:15:16:17:"$( : + )"18:19:1a:1b:1c:1d:1e:1f:"$( : + )"20:21:22:23:24:25:26:27:"$( : + )"28:29:2a:2b:2c:2d:2e:2f:"$( : + )"30:31:32:33:34:35:36:37" +} + +vxlan_device_add() +{ + adf_ip_link_add vx1 up type vxlan id 1000 \ + local 192.0.2.17 dstport "$VXPORT" \ + nolearning noudpcsum tos inherit ttl 100 "$@" + adf_ip_link_set_master vx1 br1 +} + +vxlan_all_reserved_bits() +{ + local i + + for ((i=0; i < 64; i++)); do + if ((i == 4 || i >= 32 && i < 56)); then + continue + fi + echo $i + done +} + +vxlan_ping_vanilla() +{ + vxlan_ping_do 10 $rp2 $(mac_get $rp1) 192.0.2.17 $(mac_get $h1) 1000 +} + +vxlan_ping_reserved() +{ + for bit in $(vxlan_all_reserved_bits); do + vxlan_ping_do 1 $rp2 $(mac_get $rp1) \ + 192.0.2.17 $(mac_get $h1) 1000 "$bit" + ((n++)) + done +} + +vxlan_ping_test() +{ + local what=$1; shift + local get_stat=$1; shift + local expect=$1; shift + + RET=0 + + local t0=$($get_stat) + + "$@" + check_err $? "Failure when running $@" + + local t1=$($get_stat) + local delta=$((t1 - t0)) + + ((expect == delta)) + check_err $? "Expected to capture $expect packets, got $delta." + + log_test "$what" +} + +__default_test_do() +{ + local n_allowed_bits=$1; shift + local what=$1; shift + + vxlan_ping_test "$what: clean packets" \ + "tc_rule_stats_get $h1 77 ingress" \ + 10 vxlan_ping_vanilla + + local t0=$(link_stats_get vx1 rx errors) + vxlan_ping_test "$what: mangled packets" \ + "tc_rule_stats_get $h1 77 ingress" \ + $n_allowed_bits vxlan_ping_reserved + local t1=$(link_stats_get vx1 rx errors) + + RET=0 + local expect=$((39 - n_allowed_bits)) + local delta=$((t1 - t0)) + ((expect == delta)) + check_err $? "Expected $expect error packets, got $delta." + log_test "$what: drops reported" +} + +default_test_do() +{ + vxlan_device_add + __default_test_do 0 "Default" +} + +default_test() +{ + in_defer_scope \ + default_test_do +} + +plain_test_do() +{ + vxlan_device_add reserved_bits 0xf7ffffff000000ff + __default_test_do 0 "reserved_bits 0xf7ffffff000000ff" +} + +plain_test() +{ + in_defer_scope \ + plain_test_do +} + +reserved_test() +{ + local bit=$1; shift + + local allowed_bytes=$(vxlan_header_bytes 0xffffff $bit) + local reserved_bytes=$(neg_bytes $allowed_bytes) + local reserved_bits=${reserved_bytes//:/} + + vxlan_device_add reserved_bits 0x$reserved_bits + __default_test_do 1 "reserved_bits 0x$reserved_bits" +} + +reserved_0_test() +{ + in_defer_scope \ + reserved_test 0 +} + +reserved_10_test() +{ + in_defer_scope \ + reserved_test 10 +} + +reserved_31_test() +{ + in_defer_scope \ + reserved_test 31 +} + +reserved_56_test() +{ + in_defer_scope \ + reserved_test 56 +} + +reserved_63_test() +{ + in_defer_scope \ + reserved_test 63 +} + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/gre_ipv6_lladdr.sh b/tools/testing/selftests/net/gre_ipv6_lladdr.sh new file mode 100755 index 000000000000..48eb999a3120 --- /dev/null +++ b/tools/testing/selftests/net/gre_ipv6_lladdr.sh @@ -0,0 +1,184 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ./lib.sh + +PAUSE_ON_FAIL="no" + +# The trap function handler +# +exit_cleanup_all() +{ + cleanup_all_ns + + exit "${EXIT_STATUS}" +} + +# Add fake IPv4 and IPv6 networks on the loopback device, to be used as +# underlay by future GRE devices. +# +setup_basenet() +{ + ip -netns "${NS0}" link set dev lo up + ip -netns "${NS0}" address add dev lo 192.0.2.10/24 + ip -netns "${NS0}" address add dev lo 2001:db8::10/64 nodad +} + +# Check the IPv6 configuration of a network device. +# +# We currently check the generation of the link-local IPv6 address and the +# creation of the ff00::/8 multicast route. +# +# Parameters: +# +# * $1: The network device to test +# * $2: An extra regular expression that should be matched (to verify the +# presence of extra attributes) +# * $3: The expected return code from grep (to allow checking the absence of +# a link-local address) +# * $4: The user visible name for the scenario being tested +# +check_ipv6_device_config() +{ + local DEV="$1" + local EXTRA_MATCH="$2" + local XRET="$3" + local MSG="$4" + + RET=0 + set +e + ip -netns "${NS0}" -6 address show dev "${DEV}" scope link | grep "fe80::" | grep -q "${EXTRA_MATCH}" + check_err_fail "${XRET}" $? "IPv6 link-local address generation" + + ip -netns "${NS0}" -6 route show table local type multicast ff00::/8 proto kernel | grep -q "${DEV}" + check_err_fail 0 $? "IPv6 multicast route creation" + + log_test "${MSG}" + set -e +} + +# Create a GRE device and verify that it gets an IPv6 link-local address as +# expected. +# +# Parameters: +# +# * $1: The device type (gre, ip6gre, gretap or ip6gretap) +# * $2: The local underlay IP address (can be an IPv4, an IPv6 or "any") +# * $3: The remote underlay IP address (can be an IPv4, an IPv6 or "any") +# * $4: The IPv6 interface identifier generation mode to use for the GRE +# device (eui64, none, stable-privacy or random). +# +test_gre_device() +{ + local GRE_TYPE="$1" + local LOCAL_IP="$2" + local REMOTE_IP="$3" + local MODE="$4" + local ADDR_GEN_MODE + local MATCH_REGEXP + local MSG + + ip link add netns "${NS0}" name gretest type "${GRE_TYPE}" local "${LOCAL_IP}" remote "${REMOTE_IP}" + + case "${MODE}" in + "eui64") + ADDR_GEN_MODE=0 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 0 (EUI64), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + "none") + ADDR_GEN_MODE=1 + MATCH_REGEXP="" + MSG="${GRE_TYPE}, mode: 1 (none), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=1 # No link-local address should be generated + ;; + "stable-privacy") + ADDR_GEN_MODE=2 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 2 (stable privacy), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + # Initialise stable_secret (required for stable-privacy mode) + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.stable_secret="2001:db8::abcd" + ;; + "random") + ADDR_GEN_MODE=3 + MATCH_REGEXP="stable-privacy" + MSG="${GRE_TYPE}, mode: 3 (random), ${LOCAL_IP} -> ${REMOTE_IP}" + XRET=0 + ;; + esac + + # Check the IPv6 device configuration when it goes up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + ip -netns "${NS0}" link set dev gretest up + check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "config: ${MSG}" + + # Now disable link-local address generation + ip -netns "${NS0}" link set dev gretest down + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode=1 + ip -netns "${NS0}" link set dev gretest up + + # Check the IPv6 device configuration when link-local address + # generation is re-enabled while the device is already up + ip netns exec "${NS0}" sysctl -qw net.ipv6.conf.gretest.addr_gen_mode="${ADDR_GEN_MODE}" + check_ipv6_device_config gretest "${MATCH_REGEXP}" "${XRET}" "update: ${MSG}" + + ip -netns "${NS0}" link del dev gretest +} + +test_gre4() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "gre" "gretap"; do + printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 192.0.2.10 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 192.0.2.11 "${MODE}" + test_gre_device "${GRE_TYPE}" 192.0.2.10 any "${MODE}" + done + done +} + +test_gre6() +{ + local GRE_TYPE + local MODE + + for GRE_TYPE in "ip6gre" "ip6gretap"; do + printf "\n####\nTesting IPv6 configuration of ${GRE_TYPE} devices\n####\n\n" + + for MODE in "eui64" "none" "stable-privacy" "random"; do + test_gre_device "${GRE_TYPE}" 2001:db8::10 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" any 2001:db8::11 "${MODE}" + test_gre_device "${GRE_TYPE}" 2001:db8::10 any "${MODE}" + done + done +} + +usage() +{ + echo "Usage: $0 [-p]" + exit 1 +} + +while getopts :p o +do + case $o in + p) PAUSE_ON_FAIL="yes";; + *) usage;; + esac +done + +setup_ns NS0 + +set -e +trap exit_cleanup_all EXIT + +setup_basenet + +test_gre4 +test_gre6 diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c deleted file mode 100644 index b2184847e388..000000000000 --- a/tools/testing/selftests/net/gro.c +++ /dev/null @@ -1,1328 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * This testsuite provides conformance testing for GRO coalescing. - * - * Test cases: - * 1.data - * Data packets of the same size and same header setup with correct - * sequence numbers coalesce. The one exception being the last data - * packet coalesced: it can be smaller than the rest and coalesced - * as long as it is in the same flow. - * 2.ack - * Pure ACK does not coalesce. - * 3.flags - * Specific test cases: no packets with PSH, SYN, URG, RST set will - * be coalesced. - * 4.tcp - * Packets with incorrect checksum, non-consecutive seqno and - * different TCP header options shouldn't coalesce. Nit: given that - * some extension headers have paddings, such as timestamp, headers - * that are padding differently would not be coalesced. - * 5.ip: - * Packets with different (ECN, TTL, TOS) header, ip options or - * ip fragments (ipv6) shouldn't coalesce. - * 6.large: - * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce. - * - * MSS is defined as 4096 - header because if it is too small - * (i.e. 1500 MTU - header), it will result in many packets, - * increasing the "large" test case's flakiness. This is because - * due to time sensitivity in the coalescing window, the receiver - * may not coalesce all of the packets. - * - * Note the timing issue applies to all of the test cases, so some - * flakiness is to be expected. - * - */ - -#define _GNU_SOURCE - -#include <arpa/inet.h> -#include <errno.h> -#include <error.h> -#include <getopt.h> -#include <linux/filter.h> -#include <linux/if_packet.h> -#include <linux/ipv6.h> -#include <net/ethernet.h> -#include <net/if.h> -#include <netinet/in.h> -#include <netinet/ip.h> -#include <netinet/ip6.h> -#include <netinet/tcp.h> -#include <stdbool.h> -#include <stddef.h> -#include <stdio.h> -#include <stdarg.h> -#include <string.h> -#include <unistd.h> - -#include "../kselftest.h" - -#define DPORT 8000 -#define SPORT 1500 -#define PAYLOAD_LEN 100 -#define NUM_PACKETS 4 -#define START_SEQ 100 -#define START_ACK 100 -#define ETH_P_NONE 0 -#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) -#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) -#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) -#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS) -#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) -#define MIN_EXTHDR_SIZE 8 -#define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00" -#define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11" - -#define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */ -#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) - -static const char *addr6_src = "fdaa::2"; -static const char *addr6_dst = "fdaa::1"; -static const char *addr4_src = "192.168.1.200"; -static const char *addr4_dst = "192.168.1.100"; -static int proto = -1; -static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN]; -static char *testname = "data"; -static char *ifname = "eth0"; -static char *smac = "aa:00:00:00:00:02"; -static char *dmac = "aa:00:00:00:00:01"; -static bool verbose; -static bool tx_socket = true; -static int tcp_offset = -1; -static int total_hdr_len = -1; -static int ethhdr_proto = -1; -static const int num_flush_id_cases = 6; - -static void vlog(const char *fmt, ...) -{ - va_list args; - - if (verbose) { - va_start(args, fmt); - vfprintf(stderr, fmt, args); - va_end(args); - } -} - -static void setup_sock_filter(int fd) -{ - const int dport_off = tcp_offset + offsetof(struct tcphdr, dest); - const int ethproto_off = offsetof(struct ethhdr, h_proto); - int optlen = 0; - int ipproto_off, opt_ipproto_off; - int next_off; - - if (proto == PF_INET) - next_off = offsetof(struct iphdr, protocol); - else - next_off = offsetof(struct ipv6hdr, nexthdr); - ipproto_off = ETH_HLEN + next_off; - - /* Overridden later if exthdrs are used: */ - opt_ipproto_off = ipproto_off; - - if (strcmp(testname, "ip") == 0) { - if (proto == PF_INET) - optlen = sizeof(struct ip_timestamp); - else { - BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE); - BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE); - BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE); - - /* same size for HBH and Fragment extension header types */ - optlen = MIN_EXTHDR_SIZE; - opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr) - + offsetof(struct ip6_ext, ip6e_nxt); - } - } - - /* this filter validates the following: - * - packet is IPv4/IPv6 according to the running test. - * - packet is TCP. Also handles the case of one extension header and then TCP. - * - checks the packet tcp dport equals to DPORT. Also handles the case of one - * extension header and then TCP. - */ - struct sock_filter filter[] = { - BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9), - BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0), - BPF_STMT(BPF_LD + BPF_B + BPF_ABS, opt_ipproto_off), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5), - BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0), - BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1), - BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF), - BPF_STMT(BPF_RET + BPF_K, 0), - }; - - struct sock_fprog bpf = { - .len = ARRAY_SIZE(filter), - .filter = filter, - }; - - if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0) - error(1, errno, "error setting filter"); -} - -static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum) -{ - uint16_t *words = data; - int i; - - for (i = 0; i < len / 2; i++) - sum += words[i]; - if (len & 1) - sum += ((char *)data)[len - 1]; - return sum; -} - -static uint16_t checksum_fold(void *data, size_t len, uint32_t sum) -{ - sum = checksum_nofold(data, len, sum); - while (sum > 0xFFFF) - sum = (sum & 0xFFFF) + (sum >> 16); - return ~sum; -} - -static uint16_t tcp_checksum(void *buf, int payload_len) -{ - struct pseudo_header6 { - struct in6_addr saddr; - struct in6_addr daddr; - uint16_t protocol; - uint16_t payload_len; - } ph6; - struct pseudo_header4 { - struct in_addr saddr; - struct in_addr daddr; - uint16_t protocol; - uint16_t payload_len; - } ph4; - uint32_t sum = 0; - - if (proto == PF_INET6) { - if (inet_pton(AF_INET6, addr6_src, &ph6.saddr) != 1) - error(1, errno, "inet_pton6 source ip pseudo"); - if (inet_pton(AF_INET6, addr6_dst, &ph6.daddr) != 1) - error(1, errno, "inet_pton6 dest ip pseudo"); - ph6.protocol = htons(IPPROTO_TCP); - ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len); - - sum = checksum_nofold(&ph6, sizeof(ph6), 0); - } else if (proto == PF_INET) { - if (inet_pton(AF_INET, addr4_src, &ph4.saddr) != 1) - error(1, errno, "inet_pton source ip pseudo"); - if (inet_pton(AF_INET, addr4_dst, &ph4.daddr) != 1) - error(1, errno, "inet_pton dest ip pseudo"); - ph4.protocol = htons(IPPROTO_TCP); - ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len); - - sum = checksum_nofold(&ph4, sizeof(ph4), 0); - } - - return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum); -} - -static void read_MAC(uint8_t *mac_addr, char *mac) -{ - if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx", - &mac_addr[0], &mac_addr[1], &mac_addr[2], - &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6) - error(1, 0, "sscanf"); -} - -static void fill_datalinklayer(void *buf) -{ - struct ethhdr *eth = buf; - - memcpy(eth->h_dest, dst_mac, ETH_ALEN); - memcpy(eth->h_source, src_mac, ETH_ALEN); - eth->h_proto = ethhdr_proto; -} - -static void fill_networklayer(void *buf, int payload_len) -{ - struct ipv6hdr *ip6h = buf; - struct iphdr *iph = buf; - - if (proto == PF_INET6) { - memset(ip6h, 0, sizeof(*ip6h)); - - ip6h->version = 6; - ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len); - ip6h->nexthdr = IPPROTO_TCP; - ip6h->hop_limit = 8; - if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1) - error(1, errno, "inet_pton source ip6"); - if (inet_pton(AF_INET6, addr6_dst, &ip6h->daddr) != 1) - error(1, errno, "inet_pton dest ip6"); - } else if (proto == PF_INET) { - memset(iph, 0, sizeof(*iph)); - - iph->version = 4; - iph->ihl = 5; - iph->ttl = 8; - iph->protocol = IPPROTO_TCP; - iph->tot_len = htons(sizeof(struct tcphdr) + - payload_len + sizeof(struct iphdr)); - iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */ - if (inet_pton(AF_INET, addr4_src, &iph->saddr) != 1) - error(1, errno, "inet_pton source ip"); - if (inet_pton(AF_INET, addr4_dst, &iph->daddr) != 1) - error(1, errno, "inet_pton dest ip"); - iph->check = checksum_fold(buf, sizeof(struct iphdr), 0); - } -} - -static void fill_transportlayer(void *buf, int seq_offset, int ack_offset, - int payload_len, int fin) -{ - struct tcphdr *tcph = buf; - - memset(tcph, 0, sizeof(*tcph)); - - tcph->source = htons(SPORT); - tcph->dest = htons(DPORT); - tcph->seq = ntohl(START_SEQ + seq_offset); - tcph->ack_seq = ntohl(START_ACK + ack_offset); - tcph->ack = 1; - tcph->fin = fin; - tcph->doff = 5; - tcph->window = htons(TCP_MAXWIN); - tcph->urg_ptr = 0; - tcph->check = tcp_checksum(tcph, payload_len); -} - -static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr) -{ - int ret = -1; - - ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr)); - if (ret == -1) - error(1, errno, "sendto failure"); - if (ret != len) - error(1, errno, "sendto wrong length"); -} - -static void create_packet(void *buf, int seq_offset, int ack_offset, - int payload_len, int fin) -{ - memset(buf, 0, total_hdr_len); - memset(buf + total_hdr_len, 'a', payload_len); - fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset, - payload_len, fin); - fill_networklayer(buf + ETH_HLEN, payload_len); - fill_datalinklayer(buf); -} - -/* send one extra flag, not first and not last pkt */ -static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn, - int rst, int urg) -{ - static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN]; - static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; - int payload_len, pkt_size, flag, i; - struct tcphdr *tcph; - - payload_len = PAYLOAD_LEN * psh; - pkt_size = total_hdr_len + payload_len; - flag = NUM_PACKETS / 2; - - create_packet(flag_buf, flag * payload_len, 0, payload_len, 0); - - tcph = (struct tcphdr *)(flag_buf + tcp_offset); - tcph->psh = psh; - tcph->syn = syn; - tcph->rst = rst; - tcph->urg = urg; - tcph->check = 0; - tcph->check = tcp_checksum(tcph, payload_len); - - for (i = 0; i < NUM_PACKETS + 1; i++) { - if (i == flag) { - write_packet(fd, flag_buf, pkt_size, daddr); - continue; - } - create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); - write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); - } -} - -/* Test for data of same length, smaller than previous - * and of different lengths - */ -static void send_data_pkts(int fd, struct sockaddr_ll *daddr, - int payload_len1, int payload_len2) -{ - static char buf[ETH_HLEN + IP_MAXPACKET]; - - create_packet(buf, 0, 0, payload_len1, 0); - write_packet(fd, buf, total_hdr_len + payload_len1, daddr); - create_packet(buf, payload_len1, 0, payload_len2, 0); - write_packet(fd, buf, total_hdr_len + payload_len2, daddr); -} - -/* If incoming segments make tracked segment length exceed - * legal IP datagram length, do not coalesce - */ -static void send_large(int fd, struct sockaddr_ll *daddr, int remainder) -{ - static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS]; - static char last[TOTAL_HDR_LEN + MSS]; - static char new_seg[TOTAL_HDR_LEN + MSS]; - int i; - - for (i = 0; i < NUM_LARGE_PKT; i++) - create_packet(pkts[i], i * MSS, 0, MSS, 0); - create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0); - create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0); - - for (i = 0; i < NUM_LARGE_PKT; i++) - write_packet(fd, pkts[i], total_hdr_len + MSS, daddr); - write_packet(fd, last, total_hdr_len + remainder, daddr); - write_packet(fd, new_seg, total_hdr_len + remainder, daddr); -} - -/* Pure acks and dup acks don't coalesce */ -static void send_ack(int fd, struct sockaddr_ll *daddr) -{ - static char buf[MAX_HDR_LEN]; - - create_packet(buf, 0, 0, 0, 0); - write_packet(fd, buf, total_hdr_len, daddr); - write_packet(fd, buf, total_hdr_len, daddr); - create_packet(buf, 0, 1, 0, 0); - write_packet(fd, buf, total_hdr_len, daddr); -} - -static void recompute_packet(char *buf, char *no_ext, int extlen) -{ - struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset); - struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); - struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); - - memmove(buf, no_ext, total_hdr_len); - memmove(buf + total_hdr_len + extlen, - no_ext + total_hdr_len, PAYLOAD_LEN); - - tcphdr->doff = tcphdr->doff + (extlen / 4); - tcphdr->check = 0; - tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen); - if (proto == PF_INET) { - iph->tot_len = htons(ntohs(iph->tot_len) + extlen); - iph->check = 0; - iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); - } else { - ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); - } -} - -static void tcp_write_options(char *buf, int kind, int ts) -{ - struct tcp_option_ts { - uint8_t kind; - uint8_t len; - uint32_t tsval; - uint32_t tsecr; - } *opt_ts = (void *)buf; - struct tcp_option_window { - uint8_t kind; - uint8_t len; - uint8_t shift; - } *opt_window = (void *)buf; - - switch (kind) { - case TCPOPT_NOP: - buf[0] = TCPOPT_NOP; - break; - case TCPOPT_WINDOW: - memset(opt_window, 0, sizeof(struct tcp_option_window)); - opt_window->kind = TCPOPT_WINDOW; - opt_window->len = TCPOLEN_WINDOW; - opt_window->shift = 0; - break; - case TCPOPT_TIMESTAMP: - memset(opt_ts, 0, sizeof(struct tcp_option_ts)); - opt_ts->kind = TCPOPT_TIMESTAMP; - opt_ts->len = TCPOLEN_TIMESTAMP; - opt_ts->tsval = ts; - opt_ts->tsecr = 0; - break; - default: - error(1, 0, "unimplemented TCP option"); - break; - } -} - -/* TCP with options is always a permutation of {TS, NOP, NOP}. - * Implement different orders to verify coalescing stops. - */ -static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order) -{ - switch (order) { - case 0: - tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0); - tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0); - tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */, - TCPOPT_TIMESTAMP, ts); - break; - case 1: - tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0); - tcp_write_options(buf + total_hdr_len + 1, - TCPOPT_TIMESTAMP, ts); - tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP, - TCPOPT_NOP, 0); - break; - case 2: - tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts); - tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1, - TCPOPT_NOP, 0); - tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2, - TCPOPT_NOP, 0); - break; - default: - error(1, 0, "unknown order"); - break; - } - recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA); -} - -/* Packets with invalid checksum don't coalesce. */ -static void send_changed_checksum(int fd, struct sockaddr_ll *daddr) -{ - static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; - struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset); - int pkt_size = total_hdr_len + PAYLOAD_LEN; - - create_packet(buf, 0, 0, PAYLOAD_LEN, 0); - write_packet(fd, buf, pkt_size, daddr); - - create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); - tcph->check = tcph->check - 1; - write_packet(fd, buf, pkt_size, daddr); -} - - /* Packets with non-consecutive sequence number don't coalesce.*/ -static void send_changed_seq(int fd, struct sockaddr_ll *daddr) -{ - static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; - struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset); - int pkt_size = total_hdr_len + PAYLOAD_LEN; - - create_packet(buf, 0, 0, PAYLOAD_LEN, 0); - write_packet(fd, buf, pkt_size, daddr); - - create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); - tcph->seq = ntohl(htonl(tcph->seq) + 1); - tcph->check = 0; - tcph->check = tcp_checksum(tcph, PAYLOAD_LEN); - write_packet(fd, buf, pkt_size, daddr); -} - - /* Packet with different timestamp option or different timestamps - * don't coalesce. - */ -static void send_changed_ts(int fd, struct sockaddr_ll *daddr) -{ - static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; - static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA]; - int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA; - - create_packet(buf, 0, 0, PAYLOAD_LEN, 0); - add_standard_tcp_options(extpkt, buf, 0, 0); - write_packet(fd, extpkt, pkt_size, daddr); - - create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); - add_standard_tcp_options(extpkt, buf, 0, 0); - write_packet(fd, extpkt, pkt_size, daddr); - - create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); - add_standard_tcp_options(extpkt, buf, 100, 0); - write_packet(fd, extpkt, pkt_size, daddr); - - create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0); - add_standard_tcp_options(extpkt, buf, 100, 1); - write_packet(fd, extpkt, pkt_size, daddr); - - create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0); - add_standard_tcp_options(extpkt, buf, 100, 2); - write_packet(fd, extpkt, pkt_size, daddr); -} - -/* Packet with different tcp options don't coalesce. */ -static void send_diff_opt(int fd, struct sockaddr_ll *daddr) -{ - static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; - static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA]; - static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG]; - int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA; - int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG; - - create_packet(buf, 0, 0, PAYLOAD_LEN, 0); - add_standard_tcp_options(extpkt1, buf, 0, 0); - write_packet(fd, extpkt1, extpkt1_size, daddr); - - create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); - add_standard_tcp_options(extpkt1, buf, 0, 0); - write_packet(fd, extpkt1, extpkt1_size, daddr); - - create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); - tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0); - tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0); - recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1); - write_packet(fd, extpkt2, extpkt2_size, daddr); -} - -static void add_ipv4_ts_option(void *buf, void *optpkt) -{ - struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset); - int optlen = sizeof(struct ip_timestamp); - struct iphdr *iph; - - if (optlen % 4) - error(1, 0, "ipv4 timestamp length is not a multiple of 4B"); - - ts->ipt_code = IPOPT_TS; - ts->ipt_len = optlen; - ts->ipt_ptr = 5; - ts->ipt_flg = IPOPT_TS_TSONLY; - - memcpy(optpkt, buf, tcp_offset); - memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset, - sizeof(struct tcphdr) + PAYLOAD_LEN); - - iph = (struct iphdr *)(optpkt + ETH_HLEN); - iph->ihl = 5 + (optlen / 4); - iph->tot_len = htons(ntohs(iph->tot_len) + optlen); - iph->check = 0; - iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0); -} - -static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext_payload) -{ - struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset); - struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN); - char *exthdr_payload_start = (char *)(exthdr + 1); - - exthdr->hdrlen = 0; - exthdr->nexthdr = IPPROTO_TCP; - - memcpy(exthdr_payload_start, ext_payload, MIN_EXTHDR_SIZE - sizeof(*exthdr)); - - memcpy(optpkt, buf, tcp_offset); - memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset, - sizeof(struct tcphdr) + PAYLOAD_LEN); - - iph->nexthdr = exthdr_type; - iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE); -} - -static void fix_ip4_checksum(struct iphdr *iph) -{ - iph->check = 0; - iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); -} - -static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) -{ - static char buf1[MAX_HDR_LEN + PAYLOAD_LEN]; - static char buf2[MAX_HDR_LEN + PAYLOAD_LEN]; - static char buf3[MAX_HDR_LEN + PAYLOAD_LEN]; - bool send_three = false; - struct iphdr *iph1; - struct iphdr *iph2; - struct iphdr *iph3; - - iph1 = (struct iphdr *)(buf1 + ETH_HLEN); - iph2 = (struct iphdr *)(buf2 + ETH_HLEN); - iph3 = (struct iphdr *)(buf3 + ETH_HLEN); - - create_packet(buf1, 0, 0, PAYLOAD_LEN, 0); - create_packet(buf2, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); - create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); - - switch (tcase) { - case 0: /* DF=1, Incrementing - should coalesce */ - iph1->frag_off |= htons(IP_DF); - iph1->id = htons(8); - - iph2->frag_off |= htons(IP_DF); - iph2->id = htons(9); - break; - - case 1: /* DF=1, Fixed - should coalesce */ - iph1->frag_off |= htons(IP_DF); - iph1->id = htons(8); - - iph2->frag_off |= htons(IP_DF); - iph2->id = htons(8); - break; - - case 2: /* DF=0, Incrementing - should coalesce */ - iph1->frag_off &= ~htons(IP_DF); - iph1->id = htons(8); - - iph2->frag_off &= ~htons(IP_DF); - iph2->id = htons(9); - break; - - case 3: /* DF=0, Fixed - should not coalesce */ - iph1->frag_off &= ~htons(IP_DF); - iph1->id = htons(8); - - iph2->frag_off &= ~htons(IP_DF); - iph2->id = htons(8); - break; - - case 4: /* DF=1, two packets incrementing, and one fixed - should - * coalesce only the first two packets - */ - iph1->frag_off |= htons(IP_DF); - iph1->id = htons(8); - - iph2->frag_off |= htons(IP_DF); - iph2->id = htons(9); - - iph3->frag_off |= htons(IP_DF); - iph3->id = htons(9); - send_three = true; - break; - - case 5: /* DF=1, two packets fixed, and one incrementing - should - * coalesce only the first two packets - */ - iph1->frag_off |= htons(IP_DF); - iph1->id = htons(8); - - iph2->frag_off |= htons(IP_DF); - iph2->id = htons(8); - - iph3->frag_off |= htons(IP_DF); - iph3->id = htons(9); - send_three = true; - break; - } - - fix_ip4_checksum(iph1); - fix_ip4_checksum(iph2); - write_packet(fd, buf1, total_hdr_len + PAYLOAD_LEN, daddr); - write_packet(fd, buf2, total_hdr_len + PAYLOAD_LEN, daddr); - - if (send_three) { - fix_ip4_checksum(iph3); - write_packet(fd, buf3, total_hdr_len + PAYLOAD_LEN, daddr); - } -} - -static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt) -{ - for (int i = 0; i < num_flush_id_cases; i++) { - sleep(1); - send_flush_id_case(fd, daddr, i); - sleep(1); - write_packet(fd, fin_pkt, total_hdr_len, daddr); - } -} - -static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2) -{ - static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; - static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE]; - - create_packet(buf, 0, 0, PAYLOAD_LEN, 0); - add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1); - write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); - - create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); - add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2); - write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); -} - -/* IPv4 options shouldn't coalesce */ -static void send_ip_options(int fd, struct sockaddr_ll *daddr) -{ - static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; - static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)]; - int optlen = sizeof(struct ip_timestamp); - int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen; - - create_packet(buf, 0, 0, PAYLOAD_LEN, 0); - write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); - - create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); - add_ipv4_ts_option(buf, optpkt); - write_packet(fd, optpkt, pkt_size, daddr); - - create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); - write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr); -} - -/* IPv4 fragments shouldn't coalesce */ -static void send_fragment4(int fd, struct sockaddr_ll *daddr) -{ - static char buf[IP_MAXPACKET]; - struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); - int pkt_size = total_hdr_len + PAYLOAD_LEN; - - create_packet(buf, 0, 0, PAYLOAD_LEN, 0); - write_packet(fd, buf, pkt_size, daddr); - - /* Once fragmented, packet would retain the total_len. - * Tcp header is prepared as if rest of data is in follow-up frags, - * but follow up frags aren't actually sent. - */ - memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2); - fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0); - fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN); - fill_datalinklayer(buf); - - iph->frag_off = htons(0x6000); // DF = 1, MF = 1 - iph->check = 0; - iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); - write_packet(fd, buf, pkt_size, daddr); -} - -/* IPv4 packets with different ttl don't coalesce.*/ -static void send_changed_ttl(int fd, struct sockaddr_ll *daddr) -{ - int pkt_size = total_hdr_len + PAYLOAD_LEN; - static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; - struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); - - create_packet(buf, 0, 0, PAYLOAD_LEN, 0); - write_packet(fd, buf, pkt_size, daddr); - - create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); - iph->ttl = 7; - iph->check = 0; - iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); - write_packet(fd, buf, pkt_size, daddr); -} - -/* Packets with different tos don't coalesce.*/ -static void send_changed_tos(int fd, struct sockaddr_ll *daddr) -{ - int pkt_size = total_hdr_len + PAYLOAD_LEN; - static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; - struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); - struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); - - create_packet(buf, 0, 0, PAYLOAD_LEN, 0); - write_packet(fd, buf, pkt_size, daddr); - - create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); - if (proto == PF_INET) { - iph->tos = 1; - iph->check = 0; - iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); - } else if (proto == PF_INET6) { - ip6h->priority = 0xf; - } - write_packet(fd, buf, pkt_size, daddr); -} - -/* Packets with different ECN don't coalesce.*/ -static void send_changed_ECN(int fd, struct sockaddr_ll *daddr) -{ - int pkt_size = total_hdr_len + PAYLOAD_LEN; - static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; - struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN); - - create_packet(buf, 0, 0, PAYLOAD_LEN, 0); - write_packet(fd, buf, pkt_size, daddr); - - create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); - if (proto == PF_INET) { - buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10 - iph->check = 0; - iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); - } else { - buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10 - } - write_packet(fd, buf, pkt_size, daddr); -} - -/* IPv6 fragments and packets with extensions don't coalesce.*/ -static void send_fragment6(int fd, struct sockaddr_ll *daddr) -{ - static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; - static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN + - sizeof(struct ip6_frag)]; - struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN); - struct ip6_frag *frag = (void *)(extpkt + tcp_offset); - int extlen = sizeof(struct ip6_frag); - int bufpkt_len = total_hdr_len + PAYLOAD_LEN; - int extpkt_len = bufpkt_len + extlen; - int i; - - for (i = 0; i < 2; i++) { - create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0); - write_packet(fd, buf, bufpkt_len, daddr); - } - sleep(1); - create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); - memset(extpkt, 0, extpkt_len); - - ip6h->nexthdr = IPPROTO_FRAGMENT; - ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen); - frag->ip6f_nxt = IPPROTO_TCP; - - memcpy(extpkt, buf, tcp_offset); - memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset, - sizeof(struct tcphdr) + PAYLOAD_LEN); - write_packet(fd, extpkt, extpkt_len, daddr); - - create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0); - write_packet(fd, buf, bufpkt_len, daddr); -} - -static void bind_packetsocket(int fd) -{ - struct sockaddr_ll daddr = {}; - - daddr.sll_family = AF_PACKET; - daddr.sll_protocol = ethhdr_proto; - daddr.sll_ifindex = if_nametoindex(ifname); - if (daddr.sll_ifindex == 0) - error(1, errno, "if_nametoindex"); - - if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0) - error(1, errno, "could not bind socket"); -} - -static void set_timeout(int fd) -{ - struct timeval timeout; - - timeout.tv_sec = 3; - timeout.tv_usec = 0; - if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout, - sizeof(timeout)) < 0) - error(1, errno, "cannot set timeout, setsockopt failed"); -} - -static void check_recv_pkts(int fd, int *correct_payload, - int correct_num_pkts) -{ - static char buffer[IP_MAXPACKET + ETH_HLEN + 1]; - struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN); - struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN); - struct tcphdr *tcph; - bool bad_packet = false; - int tcp_ext_len = 0; - int ip_ext_len = 0; - int pkt_size = -1; - int data_len = 0; - int num_pkt = 0; - int i; - - vlog("Expected {"); - for (i = 0; i < correct_num_pkts; i++) - vlog("%d ", correct_payload[i]); - vlog("}, Total %d packets\nReceived {", correct_num_pkts); - - while (1) { - ip_ext_len = 0; - pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0); - if (pkt_size < 0) - error(1, errno, "could not receive"); - - if (iph->version == 4) - ip_ext_len = (iph->ihl - 5) * 4; - else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP) - ip_ext_len = MIN_EXTHDR_SIZE; - - tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len); - - if (tcph->fin) - break; - - tcp_ext_len = (tcph->doff - 5) * 4; - data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len; - /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3. - * Ipv4/tcp packets without at least 6 bytes of data will be padded. - * Packet sockets are protocol agnostic, and will not trim the padding. - */ - if (pkt_size == ETH_ZLEN && iph->version == 4) { - data_len = ntohs(iph->tot_len) - - sizeof(struct tcphdr) - sizeof(struct iphdr); - } - vlog("%d ", data_len); - if (data_len != correct_payload[num_pkt]) { - vlog("[!=%d]", correct_payload[num_pkt]); - bad_packet = true; - } - num_pkt++; - } - vlog("}, Total %d packets.\n", num_pkt); - if (num_pkt != correct_num_pkts) - error(1, 0, "incorrect number of packets"); - if (bad_packet) - error(1, 0, "incorrect packet geometry"); - - printf("Test succeeded\n\n"); -} - -static void gro_sender(void) -{ - static char fin_pkt[MAX_HDR_LEN]; - struct sockaddr_ll daddr = {}; - int txfd = -1; - - txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW); - if (txfd < 0) - error(1, errno, "socket creation"); - - memset(&daddr, 0, sizeof(daddr)); - daddr.sll_ifindex = if_nametoindex(ifname); - if (daddr.sll_ifindex == 0) - error(1, errno, "if_nametoindex"); - daddr.sll_family = AF_PACKET; - memcpy(daddr.sll_addr, dst_mac, ETH_ALEN); - daddr.sll_halen = ETH_ALEN; - create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1); - - if (strcmp(testname, "data") == 0) { - send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - } else if (strcmp(testname, "ack") == 0) { - send_ack(txfd, &daddr); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - } else if (strcmp(testname, "flags") == 0) { - send_flags(txfd, &daddr, 1, 0, 0, 0); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - send_flags(txfd, &daddr, 0, 1, 0, 0); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - send_flags(txfd, &daddr, 0, 0, 1, 0); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - send_flags(txfd, &daddr, 0, 0, 0, 1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - } else if (strcmp(testname, "tcp") == 0) { - send_changed_checksum(txfd, &daddr); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - send_changed_seq(txfd, &daddr); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - send_changed_ts(txfd, &daddr); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - send_diff_opt(txfd, &daddr); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - } else if (strcmp(testname, "ip") == 0) { - send_changed_ECN(txfd, &daddr); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - send_changed_tos(txfd, &daddr); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - if (proto == PF_INET) { - /* Modified packets may be received out of order. - * Sleep function added to enforce test boundaries - * so that fin pkts are not received prior to other pkts. - */ - sleep(1); - send_changed_ttl(txfd, &daddr); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - sleep(1); - send_ip_options(txfd, &daddr); - sleep(1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - sleep(1); - send_fragment4(txfd, &daddr); - sleep(1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - test_flush_id(txfd, &daddr, fin_pkt); - } else if (proto == PF_INET6) { - sleep(1); - send_fragment6(txfd, &daddr); - sleep(1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - sleep(1); - /* send IPv6 packets with ext header with same payload */ - send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1); - sleep(1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - sleep(1); - /* send IPv6 packets with ext header with different payload */ - send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2); - sleep(1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - } - } else if (strcmp(testname, "large") == 0) { - /* 20 is the difference between min iphdr size - * and min ipv6hdr size. Like MAX_HDR_SIZE, - * MAX_PAYLOAD is defined with the larger header of the two. - */ - int offset = proto == PF_INET ? 20 : 0; - int remainder = (MAX_PAYLOAD + offset) % MSS; - - send_large(txfd, &daddr, remainder); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - - send_large(txfd, &daddr, remainder + 1); - write_packet(txfd, fin_pkt, total_hdr_len, &daddr); - } else { - error(1, 0, "Unknown testcase"); - } - - if (close(txfd)) - error(1, errno, "socket close"); -} - -static void gro_receiver(void) -{ - static int correct_payload[NUM_PACKETS]; - int rxfd = -1; - - rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE)); - if (rxfd < 0) - error(1, 0, "socket creation"); - setup_sock_filter(rxfd); - set_timeout(rxfd); - bind_packetsocket(rxfd); - - memset(correct_payload, 0, sizeof(correct_payload)); - - if (strcmp(testname, "data") == 0) { - printf("pure data packet of same size: "); - correct_payload[0] = PAYLOAD_LEN * 2; - check_recv_pkts(rxfd, correct_payload, 1); - - printf("large data packets followed by a smaller one: "); - correct_payload[0] = PAYLOAD_LEN * 1.5; - check_recv_pkts(rxfd, correct_payload, 1); - - printf("small data packets followed by a larger one: "); - correct_payload[0] = PAYLOAD_LEN / 2; - correct_payload[1] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 2); - } else if (strcmp(testname, "ack") == 0) { - printf("duplicate ack and pure ack: "); - check_recv_pkts(rxfd, correct_payload, 3); - } else if (strcmp(testname, "flags") == 0) { - correct_payload[0] = PAYLOAD_LEN * 3; - correct_payload[1] = PAYLOAD_LEN * 2; - - printf("psh flag ends coalescing: "); - check_recv_pkts(rxfd, correct_payload, 2); - - correct_payload[0] = PAYLOAD_LEN * 2; - correct_payload[1] = 0; - correct_payload[2] = PAYLOAD_LEN * 2; - printf("syn flag ends coalescing: "); - check_recv_pkts(rxfd, correct_payload, 3); - - printf("rst flag ends coalescing: "); - check_recv_pkts(rxfd, correct_payload, 3); - - printf("urg flag ends coalescing: "); - check_recv_pkts(rxfd, correct_payload, 3); - } else if (strcmp(testname, "tcp") == 0) { - correct_payload[0] = PAYLOAD_LEN; - correct_payload[1] = PAYLOAD_LEN; - correct_payload[2] = PAYLOAD_LEN; - correct_payload[3] = PAYLOAD_LEN; - - printf("changed checksum does not coalesce: "); - check_recv_pkts(rxfd, correct_payload, 2); - - printf("Wrong Seq number doesn't coalesce: "); - check_recv_pkts(rxfd, correct_payload, 2); - - printf("Different timestamp doesn't coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - check_recv_pkts(rxfd, correct_payload, 4); - - printf("Different options doesn't coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - check_recv_pkts(rxfd, correct_payload, 2); - } else if (strcmp(testname, "ip") == 0) { - correct_payload[0] = PAYLOAD_LEN; - correct_payload[1] = PAYLOAD_LEN; - - printf("different ECN doesn't coalesce: "); - check_recv_pkts(rxfd, correct_payload, 2); - - printf("different tos doesn't coalesce: "); - check_recv_pkts(rxfd, correct_payload, 2); - - if (proto == PF_INET) { - printf("different ttl doesn't coalesce: "); - check_recv_pkts(rxfd, correct_payload, 2); - - printf("ip options doesn't coalesce: "); - correct_payload[2] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 3); - - printf("fragmented ip4 doesn't coalesce: "); - check_recv_pkts(rxfd, correct_payload, 2); - - /* is_atomic checks */ - printf("DF=1, Incrementing - should coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - check_recv_pkts(rxfd, correct_payload, 1); - - printf("DF=1, Fixed - should coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - check_recv_pkts(rxfd, correct_payload, 1); - - printf("DF=0, Incrementing - should coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - check_recv_pkts(rxfd, correct_payload, 1); - - printf("DF=0, Fixed - should not coalesce: "); - correct_payload[0] = PAYLOAD_LEN; - correct_payload[1] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 2); - - printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: "); - correct_payload[0] = PAYLOAD_LEN * 2; - correct_payload[1] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 2); - - printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: "); - correct_payload[0] = PAYLOAD_LEN * 2; - correct_payload[1] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 2); - } else if (proto == PF_INET6) { - /* GRO doesn't check for ipv6 hop limit when flushing. - * Hence no corresponding test to the ipv4 case. - */ - printf("fragmented ip6 doesn't coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - correct_payload[1] = PAYLOAD_LEN; - correct_payload[2] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 3); - - printf("ipv6 with ext header does coalesce: "); - correct_payload[0] = PAYLOAD_LEN * 2; - check_recv_pkts(rxfd, correct_payload, 1); - - printf("ipv6 with ext header with different payloads doesn't coalesce: "); - correct_payload[0] = PAYLOAD_LEN; - correct_payload[1] = PAYLOAD_LEN; - check_recv_pkts(rxfd, correct_payload, 2); - } - } else if (strcmp(testname, "large") == 0) { - int offset = proto == PF_INET ? 20 : 0; - int remainder = (MAX_PAYLOAD + offset) % MSS; - - correct_payload[0] = (MAX_PAYLOAD + offset); - correct_payload[1] = remainder; - printf("Shouldn't coalesce if exceed IP max pkt size: "); - check_recv_pkts(rxfd, correct_payload, 2); - - /* last segment sent individually, doesn't start new segment */ - correct_payload[0] = correct_payload[0] - remainder; - correct_payload[1] = remainder + 1; - correct_payload[2] = remainder + 1; - check_recv_pkts(rxfd, correct_payload, 3); - } else { - error(1, 0, "Test case error, should never trigger"); - } - - if (close(rxfd)) - error(1, 0, "socket close"); -} - -static void parse_args(int argc, char **argv) -{ - static const struct option opts[] = { - { "daddr", required_argument, NULL, 'd' }, - { "dmac", required_argument, NULL, 'D' }, - { "iface", required_argument, NULL, 'i' }, - { "ipv4", no_argument, NULL, '4' }, - { "ipv6", no_argument, NULL, '6' }, - { "rx", no_argument, NULL, 'r' }, - { "saddr", required_argument, NULL, 's' }, - { "smac", required_argument, NULL, 'S' }, - { "test", required_argument, NULL, 't' }, - { "verbose", no_argument, NULL, 'v' }, - { 0, 0, 0, 0 } - }; - int c; - - while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) { - switch (c) { - case '4': - proto = PF_INET; - ethhdr_proto = htons(ETH_P_IP); - break; - case '6': - proto = PF_INET6; - ethhdr_proto = htons(ETH_P_IPV6); - break; - case 'd': - addr4_dst = addr6_dst = optarg; - break; - case 'D': - dmac = optarg; - break; - case 'i': - ifname = optarg; - break; - case 'r': - tx_socket = false; - break; - case 's': - addr4_src = addr6_src = optarg; - break; - case 'S': - smac = optarg; - break; - case 't': - testname = optarg; - break; - case 'v': - verbose = true; - break; - default: - error(1, 0, "%s invalid option %c\n", __func__, c); - break; - } - } -} - -int main(int argc, char **argv) -{ - parse_args(argc, argv); - - if (proto == PF_INET) { - tcp_offset = ETH_HLEN + sizeof(struct iphdr); - total_hdr_len = tcp_offset + sizeof(struct tcphdr); - } else if (proto == PF_INET6) { - tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr); - total_hdr_len = MAX_HDR_LEN; - } else { - error(1, 0, "Protocol family is not ipv4 or ipv6"); - } - - read_MAC(src_mac, smac); - read_MAC(dst_mac, dmac); - - if (tx_socket) - gro_sender(); - else - gro_receiver(); - - fprintf(stderr, "Gro::%s test passed.\n", testname); - return 0; -} diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh deleted file mode 100755 index 02c21ff4ca81..000000000000 --- a/tools/testing/selftests/net/gro.sh +++ /dev/null @@ -1,104 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -readonly SERVER_MAC="aa:00:00:00:00:02" -readonly CLIENT_MAC="aa:00:00:00:00:01" -readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large") -readonly PROTOS=("ipv4" "ipv6") -dev="" -test="all" -proto="ipv4" - -run_test() { - local server_pid=0 - local exit_code=0 - local protocol=$1 - local test=$2 - local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \ - "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" ) - - setup_ns - # Each test is run 3 times to deflake, because given the receive timing, - # not all packets that should coalesce will be considered in the same flow - # on every try. - for tries in {1..3}; do - # Actual test starts here - ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \ - 1>>log.txt & - server_pid=$! - sleep 0.5 # to allow for socket init - ip netns exec $client_ns ./gro "${ARGS[@]}" "--iface" "client" \ - 1>>log.txt - wait "${server_pid}" - exit_code=$? - if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \ - ${exit_code} -ne 0 ]]; then - echo "Ignoring errors due to slow environment" 1>&2 - exit_code=0 - fi - if [[ "${exit_code}" -eq 0 ]]; then - break; - fi - done - cleanup_ns - echo ${exit_code} -} - -run_all_tests() { - local failed_tests=() - for proto in "${PROTOS[@]}"; do - for test in "${TESTS[@]}"; do - echo "running test ${proto} ${test}" >&2 - exit_code=$(run_test $proto $test) - if [[ "${exit_code}" -ne 0 ]]; then - failed_tests+=("${proto}_${test}") - fi; - done; - done - if [[ ${#failed_tests[@]} -ne 0 ]]; then - echo "failed tests: ${failed_tests[*]}. \ - Please see log.txt for more logs" - exit 1 - else - echo "All Tests Succeeded!" - fi; -} - -usage() { - echo "Usage: $0 \ - [-i <DEV>] \ - [-t data|ack|flags|tcp|ip|large] \ - [-p <ipv4|ipv6>]" 1>&2; - exit 1; -} - -while getopts "i:t:p:" opt; do - case "${opt}" in - i) - dev="${OPTARG}" - ;; - t) - test="${OPTARG}" - ;; - p) - proto="${OPTARG}" - ;; - *) - usage - ;; - esac -done - -if [ -n "$dev" ]; then - source setup_loopback.sh -else - source setup_veth.sh -fi - -setup -trap cleanup EXIT -if [[ "${test}" == "all" ]]; then - run_all_tests -else - run_test "${proto}" "${test}" -fi; diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile index 884cd2cc0681..4b6afc0fe9f8 100644 --- a/tools/testing/selftests/net/hsr/Makefile +++ b/tools/testing/selftests/net/hsr/Makefile @@ -2,7 +2,11 @@ top_srcdir = ../../../../.. -TEST_PROGS := hsr_ping.sh hsr_redbox.sh +TEST_PROGS := \ + hsr_ping.sh \ + hsr_redbox.sh \ +# end of TEST_PROGS + TEST_FILES += hsr_common.sh include ../../lib.mk diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config index 241542441c51..205cc4d3d64b 100644 --- a/tools/testing/selftests/net/hsr/config +++ b/tools/testing/selftests/net/hsr/config @@ -1,5 +1,6 @@ +CONFIG_BRIDGE=y +CONFIG_HSR=y CONFIG_IPV6=y CONFIG_NET_SCH_NETEM=m -CONFIG_HSR=y CONFIG_VETH=y -CONFIG_BRIDGE=y +CONFIG_VLAN_8021Q=m diff --git a/tools/testing/selftests/net/hsr/hsr_common.sh b/tools/testing/selftests/net/hsr/hsr_common.sh index 8e97b1f2e7e5..1dc882ac1c74 100644 --- a/tools/testing/selftests/net/hsr/hsr_common.sh +++ b/tools/testing/selftests/net/hsr/hsr_common.sh @@ -15,7 +15,7 @@ do_ping() { local netns="$1" local connect_addr="$2" - local ping_args="-q -c 2" + local ping_args="-q -c 2 -i 0.1" if is_v6 "${connect_addr}"; then $ipv6 || return 0 @@ -36,7 +36,7 @@ do_ping_long() { local netns="$1" local connect_addr="$2" - local ping_args="-q -c 10" + local ping_args="-q -c 10 -i 0.1" if is_v6 "${connect_addr}"; then $ipv6 || return 0 diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index f5d207fc770a..5a65f4f836be 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -175,6 +175,100 @@ setup_hsr_interfaces() ip -net "$ns3" link set hsr3 up } +setup_vlan_interfaces() { + ip -net "$ns1" link add link hsr1 name hsr1.2 type vlan id 2 + ip -net "$ns1" link add link hsr1 name hsr1.3 type vlan id 3 + ip -net "$ns1" link add link hsr1 name hsr1.4 type vlan id 4 + ip -net "$ns1" link add link hsr1 name hsr1.5 type vlan id 5 + + ip -net "$ns2" link add link hsr2 name hsr2.2 type vlan id 2 + ip -net "$ns2" link add link hsr2 name hsr2.3 type vlan id 3 + ip -net "$ns2" link add link hsr2 name hsr2.4 type vlan id 4 + ip -net "$ns2" link add link hsr2 name hsr2.5 type vlan id 5 + + ip -net "$ns3" link add link hsr3 name hsr3.2 type vlan id 2 + ip -net "$ns3" link add link hsr3 name hsr3.3 type vlan id 3 + ip -net "$ns3" link add link hsr3 name hsr3.4 type vlan id 4 + ip -net "$ns3" link add link hsr3 name hsr3.5 type vlan id 5 + + ip -net "$ns1" addr add 100.64.2.1/24 dev hsr1.2 + ip -net "$ns1" addr add 100.64.3.1/24 dev hsr1.3 + ip -net "$ns1" addr add 100.64.4.1/24 dev hsr1.4 + ip -net "$ns1" addr add 100.64.5.1/24 dev hsr1.5 + + ip -net "$ns2" addr add 100.64.2.2/24 dev hsr2.2 + ip -net "$ns2" addr add 100.64.3.2/24 dev hsr2.3 + ip -net "$ns2" addr add 100.64.4.2/24 dev hsr2.4 + ip -net "$ns2" addr add 100.64.5.2/24 dev hsr2.5 + + ip -net "$ns3" addr add 100.64.2.3/24 dev hsr3.2 + ip -net "$ns3" addr add 100.64.3.3/24 dev hsr3.3 + ip -net "$ns3" addr add 100.64.4.3/24 dev hsr3.4 + ip -net "$ns3" addr add 100.64.5.3/24 dev hsr3.5 + + ip -net "$ns1" link set dev hsr1.2 up + ip -net "$ns1" link set dev hsr1.3 up + ip -net "$ns1" link set dev hsr1.4 up + ip -net "$ns1" link set dev hsr1.5 up + + ip -net "$ns2" link set dev hsr2.2 up + ip -net "$ns2" link set dev hsr2.3 up + ip -net "$ns2" link set dev hsr2.4 up + ip -net "$ns2" link set dev hsr2.5 up + + ip -net "$ns3" link set dev hsr3.2 up + ip -net "$ns3" link set dev hsr3.3 up + ip -net "$ns3" link set dev hsr3.4 up + ip -net "$ns3" link set dev hsr3.5 up + +} + +hsr_vlan_ping() { + do_ping "$ns1" 100.64.2.2 + do_ping "$ns1" 100.64.3.2 + do_ping "$ns1" 100.64.4.2 + do_ping "$ns1" 100.64.5.2 + + do_ping "$ns1" 100.64.2.3 + do_ping "$ns1" 100.64.3.3 + do_ping "$ns1" 100.64.4.3 + do_ping "$ns1" 100.64.5.3 + + do_ping "$ns2" 100.64.2.1 + do_ping "$ns2" 100.64.3.1 + do_ping "$ns2" 100.64.4.1 + do_ping "$ns2" 100.64.5.1 + + do_ping "$ns2" 100.64.2.3 + do_ping "$ns2" 100.64.3.3 + do_ping "$ns2" 100.64.4.3 + do_ping "$ns2" 100.64.5.3 + + do_ping "$ns3" 100.64.2.1 + do_ping "$ns3" 100.64.3.1 + do_ping "$ns3" 100.64.4.1 + do_ping "$ns3" 100.64.5.1 + + do_ping "$ns3" 100.64.2.2 + do_ping "$ns3" 100.64.3.2 + do_ping "$ns3" 100.64.4.2 + do_ping "$ns3" 100.64.5.2 +} + +run_vlan_tests() { + vlan_challenged_hsr1=$(ip net exec "$ns1" ethtool -k hsr1 | grep "vlan-challenged" | awk '{print $2}') + vlan_challenged_hsr2=$(ip net exec "$ns2" ethtool -k hsr2 | grep "vlan-challenged" | awk '{print $2}') + vlan_challenged_hsr3=$(ip net exec "$ns3" ethtool -k hsr3 | grep "vlan-challenged" | awk '{print $2}') + + if [[ "$vlan_challenged_hsr1" = "off" || "$vlan_challenged_hsr2" = "off" || "$vlan_challenged_hsr3" = "off" ]]; then + echo "INFO: Running VLAN tests" + setup_vlan_interfaces + hsr_vlan_ping + else + echo "INFO: Not Running VLAN tests as the device does not support VLAN" + fi +} + check_prerequisites setup_ns ns1 ns2 ns3 @@ -183,9 +277,13 @@ trap cleanup_all_ns EXIT setup_hsr_interfaces 0 do_complete_ping_test +run_vlan_tests + setup_ns ns1 ns2 ns3 setup_hsr_interfaces 1 do_complete_ping_test +run_vlan_tests + exit $ret diff --git a/tools/testing/selftests/net/hsr/settings b/tools/testing/selftests/net/hsr/settings new file mode 100644 index 000000000000..0fbc037f2aa8 --- /dev/null +++ b/tools/testing/selftests/net/hsr/settings @@ -0,0 +1 @@ +timeout=50 diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index d6f0e449c029..b13c89a99ecb 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -178,8 +178,6 @@ setup() else ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0 - ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0 ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10 diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c index 76e604e4810e..7bfeeb133705 100644 --- a/tools/testing/selftests/net/io_uring_zerocopy_tx.c +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c @@ -106,14 +106,14 @@ static void do_tx(int domain, int type, int protocol) ret = io_uring_queue_init(512, &ring, 0); if (ret) - error(1, ret, "io_uring: queue init"); + error(1, -ret, "io_uring: queue init"); iov.iov_base = payload; iov.iov_len = cfg_payload_len; ret = io_uring_register_buffers(&ring, &iov, 1); if (ret) - error(1, ret, "io_uring: buffer registration"); + error(1, -ret, "io_uring: buffer registration"); tstop = gettimeofday_ms() + cfg_runtime_ms; do { @@ -149,24 +149,24 @@ static void do_tx(int domain, int type, int protocol) ret = io_uring_submit(&ring); if (ret != cfg_nr_reqs) - error(1, ret, "submit"); + error(1, -ret, "submit"); if (cfg_cork) do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0); for (i = 0; i < cfg_nr_reqs; i++) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) - error(1, ret, "wait cqe"); + error(1, -ret, "wait cqe"); if (cqe->user_data != NONZC_TAG && cqe->user_data != ZC_TAG) - error(1, -EINVAL, "invalid cqe->user_data"); + error(1, EINVAL, "invalid cqe->user_data"); if (cqe->flags & IORING_CQE_F_NOTIF) { if (cqe->flags & IORING_CQE_F_MORE) - error(1, -EINVAL, "invalid notif flags"); + error(1, EINVAL, "invalid notif flags"); if (compl_cqes <= 0) - error(1, -EINVAL, "notification mismatch"); + error(1, EINVAL, "notification mismatch"); compl_cqes--; i--; io_uring_cqe_seen(&ring); @@ -174,14 +174,14 @@ static void do_tx(int domain, int type, int protocol) } if (cqe->flags & IORING_CQE_F_MORE) { if (cqe->user_data != ZC_TAG) - error(1, cqe->res, "unexpected F_MORE"); + error(1, -cqe->res, "unexpected F_MORE"); compl_cqes++; } if (cqe->res >= 0) { packets++; bytes += cqe->res; } else if (cqe->res != -EAGAIN) { - error(1, cqe->res, "send failed"); + error(1, -cqe->res, "send failed"); } io_uring_cqe_seen(&ring); } @@ -190,11 +190,11 @@ static void do_tx(int domain, int type, int protocol) while (compl_cqes) { ret = io_uring_wait_cqe(&ring, &cqe); if (ret) - error(1, ret, "wait cqe"); + error(1, -ret, "wait cqe"); if (cqe->flags & IORING_CQE_F_MORE) - error(1, -EINVAL, "invalid notif flags"); + error(1, EINVAL, "invalid notif flags"); if (!(cqe->flags & IORING_CQE_F_NOTIF)) - error(1, -EINVAL, "missing notif flag"); + error(1, EINVAL, "missing notif flag"); io_uring_cqe_seen(&ring); compl_cqes--; diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index 12491850ae98..845c26dd01a9 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -3,119 +3,106 @@ # # Author: Justin Iurman <justin.iurman@uliege.be> # -# This script evaluates the IOAM insertion for IPv6 by checking the IOAM data -# consistency directly inside packets on the receiver side. Tests are divided -# into three categories: OUTPUT (evaluates the IOAM processing by the sender), -# INPUT (evaluates the IOAM processing by a receiver) and GLOBAL (evaluates -# wider use cases that do not fall into the other two categories). Both OUTPUT -# and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL -# tests use the entire three-node topology (alpha, beta, gamma). Each test is -# documented inside its own handler in the code below. +# This script evaluates IOAM for IPv6 by checking local IOAM configurations and +# IOAM data inside packets. There are three categories of tests: LOCAL, OUTPUT, +# and INPUT. The former (LOCAL) checks all IOAM related configurations locally +# without sending packets. OUTPUT tests verify the processing of an IOAM +# encapsulating node, while INPUT tests verify the processing of an IOAM transit +# node. Both OUTPUT and INPUT tests send packets. Each test is documented inside +# its own handler. # -# An IOAM domain is configured from Alpha to Gamma but not on the reverse path. -# When either Beta or Gamma is the destination (depending on the test category), -# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop. +# The topology used for OUTPUT and INPUT tests is made of three nodes: +# - Alpha (the IOAM encapsulating node) +# - Beta (the IOAM transit node) +# - Gamma (the receiver) ** # +# An IOAM domain is configured from Alpha to Beta, but not on the reverse path. +# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop. # -# +-------------------+ +-------------------+ -# | | | | -# | Alpha netns | | Gamma netns | -# | | | | -# | +-------------+ | | +-------------+ | -# | | veth0 | | | | veth0 | | -# | | db01::2/64 | | | | db02::2/64 | | -# | +-------------+ | | +-------------+ | -# | . | | . | -# +-------------------+ +-------------------+ -# . . -# . . -# . . -# +----------------------------------------------------+ -# | . . | -# | +-------------+ +-------------+ | -# | | veth0 | | veth1 | | -# | | db01::1/64 | ................ | db02::1/64 | | -# | +-------------+ +-------------+ | -# | | -# | Beta netns | -# | | -# +----------------------------------------------------+ +# ** Gamma is required because ioam6_parser.c uses a packet socket and we need +# to see IOAM data inserted by the very last node (Beta), which would happen +# _after_ we get a copy of the packet on Beta. Note that using an +# IPv6 raw socket with IPV6_RECVHOPOPTS on Beta would not be enough: we also +# need to access the IPv6 header to check some fields (e.g., source and +# destination addresses), which is not possible in that case. As a +# consequence, we need Gamma as a receiver to run ioam6_parser.c which uses a +# packet socket. # # +# +-----------------------+ +-----------------------+ +# | | | | +# | Alpha netns | | Gamma netns | +# | | | | +# | +-------------------+ | | +-------------------+ | +# | | veth0 | | | | veth0 | | +# | | 2001:db8:1::2/64 | | | | 2001:db8:2::2/64 | | +# | +-------------------+ | | +-------------------+ | +# | . | | . | +# +-----------.-----------+ +-----------.-----------+ +# . . +# . . +# . . +# +-----------.----------------------------------.-----------+ +# | . . | +# | +-------------------+ +-------------------+ | +# | | veth0 | | veth1 | | +# | | 2001:db8:1::1/64 | ............ | 2001:db8:2::1/64 | | +# | +-------------------+ +-------------------+ | +# | | +# | Beta netns | +# | | +# +----------------------------------------------------------+ # -# ============================================================= -# | Alpha - IOAM configuration | -# +===========================================================+ -# | Node ID | 1 | -# +-----------------------------------------------------------+ -# | Node Wide ID | 11111111 | -# +-----------------------------------------------------------+ -# | Ingress ID | 0xffff (default value) | -# +-----------------------------------------------------------+ -# | Ingress Wide ID | 0xffffffff (default value) | -# +-----------------------------------------------------------+ -# | Egress ID | 101 | -# +-----------------------------------------------------------+ -# | Egress Wide ID | 101101 | -# +-----------------------------------------------------------+ -# | Namespace Data | 0xdeadbee0 | -# +-----------------------------------------------------------+ -# | Namespace Wide Data | 0xcafec0caf00dc0de | -# +-----------------------------------------------------------+ -# | Schema ID | 777 | -# +-----------------------------------------------------------+ -# | Schema Data | something that will be 4n-aligned | -# +-----------------------------------------------------------+ # # -# ============================================================= -# | Beta - IOAM configuration | -# +===========================================================+ -# | Node ID | 2 | -# +-----------------------------------------------------------+ -# | Node Wide ID | 22222222 | -# +-----------------------------------------------------------+ -# | Ingress ID | 201 | -# +-----------------------------------------------------------+ -# | Ingress Wide ID | 201201 | -# +-----------------------------------------------------------+ -# | Egress ID | 202 | -# +-----------------------------------------------------------+ -# | Egress Wide ID | 202202 | -# +-----------------------------------------------------------+ -# | Namespace Data | 0xdeadbee1 | -# +-----------------------------------------------------------+ -# | Namespace Wide Data | 0xcafec0caf11dc0de | -# +-----------------------------------------------------------+ -# | Schema ID | 666 | -# +-----------------------------------------------------------+ -# | Schema Data | Hello there -Obi | -# +-----------------------------------------------------------+ +# +==========================================================+ +# | Alpha - IOAM configuration | +# +=====================+====================================+ +# | Node ID | 1 | +# +---------------------+------------------------------------+ +# | Node Wide ID | 11111111 | +# +---------------------+------------------------------------+ +# | Ingress ID | 0xffff (default value) | +# +---------------------+------------------------------------+ +# | Ingress Wide ID | 0xffffffff (default value) | +# +---------------------+------------------------------------+ +# | Egress ID | 101 | +# +---------------------+------------------------------------+ +# | Egress Wide ID | 101101 | +# +---------------------+------------------------------------+ +# | Namespace Data | 0xdeadbeef | +# +---------------------+------------------------------------+ +# | Namespace Wide Data | 0xcafec0caf00dc0de | +# +---------------------+------------------------------------+ +# | Schema ID | 777 | +# +---------------------+------------------------------------+ +# | Schema Data | something that will be 4n-aligned | +# +---------------------+------------------------------------+ # # -# ============================================================= -# | Gamma - IOAM configuration | -# +===========================================================+ -# | Node ID | 3 | -# +-----------------------------------------------------------+ -# | Node Wide ID | 33333333 | -# +-----------------------------------------------------------+ -# | Ingress ID | 301 | -# +-----------------------------------------------------------+ -# | Ingress Wide ID | 301301 | -# +-----------------------------------------------------------+ -# | Egress ID | 0xffff (default value) | -# +-----------------------------------------------------------+ -# | Egress Wide ID | 0xffffffff (default value) | -# +-----------------------------------------------------------+ -# | Namespace Data | 0xdeadbee2 | -# +-----------------------------------------------------------+ -# | Namespace Wide Data | 0xcafec0caf22dc0de | -# +-----------------------------------------------------------+ -# | Schema ID | 0xffffff (= None) | -# +-----------------------------------------------------------+ -# | Schema Data | | -# +-----------------------------------------------------------+ +# +==========================================================+ +# | Beta - IOAM configuration | +# +=====================+====================================+ +# | Node ID | 2 | +# +---------------------+------------------------------------+ +# | Node Wide ID | 22222222 | +# +---------------------+------------------------------------+ +# | Ingress ID | 201 | +# +---------------------+------------------------------------+ +# | Ingress Wide ID | 201201 | +# +---------------------+------------------------------------+ +# | Egress ID | 202 | +# +---------------------+------------------------------------+ +# | Egress Wide ID | 202202 | +# +---------------------+------------------------------------+ +# | Namespace Data | 0xffffffff (default value) | +# +---------------------+------------------------------------+ +# | Namespace Wide Data | 0xffffffffffffffff (default value) | +# +---------------------+------------------------------------+ +# | Schema ID | 0xffffff (= None) | +# +---------------------+------------------------------------+ +# | Schema Data | | +# +---------------------+------------------------------------+ source lib.sh @@ -128,64 +115,69 @@ source lib.sh ################################################################################ ALPHA=( - 1 # ID - 11111111 # Wide ID - 0xffff # Ingress ID - 0xffffffff # Ingress Wide ID - 101 # Egress ID - 101101 # Egress Wide ID - 0xdeadbee0 # Namespace Data - 0xcafec0caf00dc0de # Namespace Wide Data - 777 # Schema ID (0xffffff = None) - "something that will be 4n-aligned" # Schema Data + 1 # ID + 11111111 # Wide ID + 0xffff # Ingress ID (default value) + 0xffffffff # Ingress Wide ID (default value) + 101 # Egress ID + 101101 # Egress Wide ID + 0xdeadbeef # Namespace Data + 0xcafec0caf00dc0de # Namespace Wide Data + 777 # Schema ID + "something that will be 4n-aligned" # Schema Data ) BETA=( - 2 - 22222222 - 201 - 201201 - 202 - 202202 - 0xdeadbee1 - 0xcafec0caf11dc0de - 666 - "Hello there -Obi" + 2 # ID + 22222222 # Wide ID + 201 # Ingress ID + 201201 # Ingress Wide ID + 202 # Egress ID + 202202 # Egress Wide ID + 0xffffffff # Namespace Data (empty value) + 0xffffffffffffffff # Namespace Wide Data (empty value) + 0xffffff # Schema ID (empty value) + "" # Schema Data (empty value) ) -GAMMA=( - 3 - 33333333 - 301 - 301301 - 0xffff - 0xffffffff - 0xdeadbee2 - 0xcafec0caf22dc0de - 0xffffff - "" -) +TESTS_LOCAL=" + local_sysctl_ioam_id + local_sysctl_ioam_id_wide + local_sysctl_ioam_intf_id + local_sysctl_ioam_intf_id_wide + local_sysctl_ioam_intf_enabled + local_ioam_namespace + local_ioam_schema + local_ioam_schema_namespace + local_route_ns + local_route_tunsrc + local_route_tundst + local_route_trace_type + local_route_trace_size + local_route_trace_type_bits + local_route_trace_size_values +" TESTS_OUTPUT=" - out_undef_ns - out_no_room - out_bits - out_full_supp_trace + output_undef_ns + output_no_room + output_no_room_oss + output_bits + output_sizes + output_full_supp_trace " TESTS_INPUT=" - in_undef_ns - in_no_room - in_oflag - in_bits - in_full_supp_trace + input_undef_ns + input_no_room + input_no_room_oss + input_disabled + input_oflag + input_bits + input_sizes + input_full_supp_trace " -TESTS_GLOBAL=" - fwd_full_supp_trace -" - - ################################################################################ # # # LIBRARY # @@ -194,66 +186,64 @@ TESTS_GLOBAL=" check_kernel_compatibility() { - setup_ns ioam_tmp_node - ip link add name veth0 netns $ioam_tmp_node type veth \ - peer name veth1 netns $ioam_tmp_node + setup_ns ioam_tmp_node &>/dev/null + local ret=$? - ip -netns $ioam_tmp_node link set veth0 up - ip -netns $ioam_tmp_node link set veth1 up + ip link add name veth0 netns $ioam_tmp_node type veth \ + peer name veth1 netns $ioam_tmp_node &>/dev/null + ret=$((ret + $?)) - ip -netns $ioam_tmp_node ioam namespace add 0 - ns_ad=$? + ip -netns $ioam_tmp_node link set veth0 up &>/dev/null + ret=$((ret + $?)) - ip -netns $ioam_tmp_node ioam namespace show | grep -q "namespace 0" - ns_sh=$? + ip -netns $ioam_tmp_node link set veth1 up &>/dev/null + ret=$((ret + $?)) - if [[ $ns_ad != 0 || $ns_sh != 0 ]] + if [ $ret != 0 ] then - echo "SKIP: kernel version probably too old, missing ioam support" - ip link del veth0 2>/dev/null || true - cleanup_ns $ioam_tmp_node || true + echo "SKIP: Setup failed." + cleanup_ns $ioam_tmp_node exit $ksft_skip fi - ip -netns $ioam_tmp_node route add db02::/64 encap ioam6 mode inline \ - trace prealloc type 0x800000 ns 0 size 4 dev veth0 - tr_ad=$? + ip -netns $ioam_tmp_node route add 2001:db8:2::/64 \ + encap ioam6 trace prealloc type 0x800000 ns 0 size 4 dev veth0 &>/dev/null + ret=$? - ip -netns $ioam_tmp_node -6 route | grep -q "encap ioam6" - tr_sh=$? + ip -netns $ioam_tmp_node -6 route 2>/dev/null | grep -q "encap ioam6" + ret=$((ret + $?)) - if [[ $tr_ad != 0 || $tr_sh != 0 ]] + if [ $ret != 0 ] then - echo "SKIP: cannot attach an ioam trace to a route, did you compile" \ - "without CONFIG_IPV6_IOAM6_LWTUNNEL?" - ip link del veth0 2>/dev/null || true - cleanup_ns $ioam_tmp_node || true + echo "SKIP: Cannot attach an IOAM trace to a route. Was your kernel" \ + "compiled without CONFIG_IPV6_IOAM6_LWTUNNEL? Are you running an" \ + "old kernel? Are you using an old version of iproute2?" + cleanup_ns $ioam_tmp_node exit $ksft_skip fi - ip link del veth0 2>/dev/null || true - cleanup_ns $ioam_tmp_node || true + cleanup_ns $ioam_tmp_node - lsmod | grep -q "ip6_tunnel" + lsmod 2>/dev/null | grep -q "ip6_tunnel" ip6tnl_loaded=$? - if [ $ip6tnl_loaded = 0 ] + if [ $ip6tnl_loaded == 0 ] then encap_tests=0 else modprobe ip6_tunnel &>/dev/null - lsmod | grep -q "ip6_tunnel" + lsmod 2>/dev/null | grep -q "ip6_tunnel" encap_tests=$? if [ $encap_tests != 0 ] then - ip a | grep -q "ip6tnl0" + ip a 2>/dev/null | grep -q "ip6tnl0" encap_tests=$? if [ $encap_tests != 0 ] then echo "Note: ip6_tunnel not found neither as a module nor inside the" \ - "kernel, tests that require it (encap mode) will be omitted" + "kernel. Any tests that require it will be skipped." fi fi fi @@ -261,477 +251,1400 @@ check_kernel_compatibility() cleanup() { - ip link del ioam-veth-alpha 2>/dev/null || true - ip link del ioam-veth-gamma 2>/dev/null || true - - cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma || true + cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma if [ $ip6tnl_loaded != 0 ] then - modprobe -r ip6_tunnel 2>/dev/null || true + modprobe -r ip6_tunnel &>/dev/null fi } setup() { - setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma + setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma &>/dev/null ip link add name ioam-veth-alpha netns $ioam_node_alpha type veth \ - peer name ioam-veth-betaL netns $ioam_node_beta + peer name ioam-veth-betaL netns $ioam_node_beta &>/dev/null ip link add name ioam-veth-betaR netns $ioam_node_beta type veth \ - peer name ioam-veth-gamma netns $ioam_node_gamma - - ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0 - ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0 - ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 - ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 - - ip -netns $ioam_node_alpha addr add db01::2/64 dev veth0 - ip -netns $ioam_node_alpha link set veth0 up - ip -netns $ioam_node_alpha link set lo up - ip -netns $ioam_node_alpha route add db02::/64 via db01::1 dev veth0 - ip -netns $ioam_node_alpha route del db01::/64 - ip -netns $ioam_node_alpha route add db01::/64 dev veth0 - - ip -netns $ioam_node_beta addr add db01::1/64 dev veth0 - ip -netns $ioam_node_beta addr add db02::1/64 dev veth1 - ip -netns $ioam_node_beta link set veth0 up - ip -netns $ioam_node_beta link set veth1 up - ip -netns $ioam_node_beta link set lo up - - ip -netns $ioam_node_gamma addr add db02::2/64 dev veth0 - ip -netns $ioam_node_gamma link set veth0 up - ip -netns $ioam_node_gamma link set lo up - ip -netns $ioam_node_gamma route add db01::/64 via db02::1 dev veth0 - - # - IOAM config - - ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} - ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} - ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} - ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} - ip -netns $ioam_node_alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} - ip -netns $ioam_node_alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}" - ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]} - - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.all.forwarding=1 - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} - ip -netns $ioam_node_beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]} - ip -netns $ioam_node_beta ioam schema add ${BETA[8]} "${BETA[9]}" - ip -netns $ioam_node_beta ioam namespace set 123 schema ${BETA[8]} - - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]} - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]} - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]} - ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]} - ip -netns $ioam_node_gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]} + peer name ioam-veth-gamma netns $ioam_node_gamma &>/dev/null + + ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0 &>/dev/null + ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0 &>/dev/null + ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 &>/dev/null + ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 &>/dev/null + + ip -netns $ioam_node_alpha addr add 2001:db8:1::50/64 dev veth0 &>/dev/null + ip -netns $ioam_node_alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null + ip -netns $ioam_node_alpha link set veth0 up &>/dev/null + ip -netns $ioam_node_alpha link set lo up &>/dev/null + ip -netns $ioam_node_alpha route add 2001:db8:2::/64 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + ip -netns $ioam_node_beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null + ip -netns $ioam_node_beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null + ip -netns $ioam_node_beta link set veth0 up &>/dev/null + ip -netns $ioam_node_beta link set veth1 up &>/dev/null + ip -netns $ioam_node_beta link set lo up &>/dev/null + + ip -netns $ioam_node_gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null + ip -netns $ioam_node_gamma link set veth0 up &>/dev/null + ip -netns $ioam_node_gamma link set lo up &>/dev/null + ip -netns $ioam_node_gamma route add 2001:db8:1::/64 \ + via 2001:db8:2::1 dev veth0 &>/dev/null + + # - Alpha: IOAM config - + ip netns exec $ioam_node_alpha \ + sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} &>/dev/null + ip netns exec $ioam_node_alpha \ + sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} &>/dev/null + ip netns exec $ioam_node_alpha \ + sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} &>/dev/null + ip netns exec $ioam_node_alpha \ + sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} &>/dev/null + ip -netns $ioam_node_alpha \ + ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} &>/dev/null + ip -netns $ioam_node_alpha \ + ioam schema add ${ALPHA[8]} "${ALPHA[9]}" &>/dev/null + ip -netns $ioam_node_alpha \ + ioam namespace set 123 schema ${ALPHA[8]} &>/dev/null + + # - Beta: IOAM config - + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.ioam6_id=${BETA[0]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} &>/dev/null + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} &>/dev/null + ip -netns $ioam_node_beta ioam namespace add 123 &>/dev/null sleep 1 - ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 db02::2 &>/dev/null + ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null if [ $? != 0 ] then - echo "Setup FAILED" - cleanup &>/dev/null - exit 0 + echo "SKIP: Setup failed." + cleanup + exit $ksft_skip fi } log_test_passed() { - local desc=$1 - printf "TEST: %-60s [ OK ]\n" "${desc}" + printf " - TEST: %-57s [ OK ]\n" "$1" + npassed=$((npassed+1)) } -log_test_failed() +log_test_skipped() { - local desc=$1 - printf "TEST: %-60s [FAIL]\n" "${desc}" + printf " - TEST: %-57s [SKIP]\n" "$1" + nskipped=$((nskipped+1)) } -log_results() +log_test_failed() { - echo "- Tests passed: ${npassed}" - echo "- Tests failed: ${nfailed}" + printf " - TEST: %-57s [FAIL]\n" "$1" + nfailed=$((nfailed+1)) } run_test() { local name=$1 local desc=$2 - local node_src=$3 - local node_dst=$4 - local ip6_dst=$5 - local trace_type=$6 - local ioam_ns=$7 - local type=$8 - - ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type & + local ip6_src=$3 + local trace_type=$4 + local trace_size=$5 + local ioam_ns=$6 + local type=$7 + + ip netns exec $ioam_node_gamma \ + ./ioam6_parser veth0 $name $ip6_src 2001:db8:2::2 \ + $trace_type $trace_size $ioam_ns $type & local spid=$! sleep 0.1 - ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null + ip netns exec $ioam_node_alpha ping6 -t 64 -c 1 -W 1 2001:db8:2::2 &>/dev/null if [ $? != 0 ] then - nfailed=$((nfailed+1)) log_test_failed "${desc}" kill -2 $spid &>/dev/null else wait $spid - if [ $? = 0 ] - then - npassed=$((npassed+1)) - log_test_passed "${desc}" - else - nfailed=$((nfailed+1)) - log_test_failed "${desc}" - fi + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" fi } run() { + local test + + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" + echo + printf "| %-28s LOCAL tests %-29s |" echo - printf "%0.s-" {1..74} + printf "+" + printf "%0.s-" {1..72} + printf "+" echo - echo "OUTPUT tests" - printf "%0.s-" {1..74} + + echo + echo "Global config" + for test in $TESTS_LOCAL + do + $test + done + + echo + echo "Inline mode" + for test in $TESTS_LOCAL + do + $test "inline" + done + + echo + echo "Encap mode" + for test in $TESTS_LOCAL + do + $test "encap" + done + + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" + echo + printf "| %-28s OUTPUT tests %-28s |" + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" echo # set OUTPUT settings - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 &>/dev/null - for t in $TESTS_OUTPUT + echo + echo "Inline mode" + for test in $TESTS_OUTPUT do - $t "inline" - [ $encap_tests = 0 ] && $t "encap" + $test "inline" done - # clean OUTPUT settings - ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip -netns $ioam_node_alpha route change db01::/64 dev veth0 + echo + echo "Encap mode" + for test in $TESTS_OUTPUT + do + $test "encap" + done + echo + echo "Encap mode (with tunsrc)" + for test in $TESTS_OUTPUT + do + $test "encap" "tunsrc" + done + + # clean OUTPUT settings + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 &>/dev/null echo - printf "%0.s-" {1..74} + printf "+" + printf "%0.s-" {1..72} + printf "+" echo - echo "INPUT tests" - printf "%0.s-" {1..74} + printf "| %-28s INPUT tests %-29s |" + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" echo # set INPUT settings - ip -netns $ioam_node_alpha ioam namespace del 123 + ip -netns $ioam_node_alpha ioam namespace del 123 &>/dev/null - for t in $TESTS_INPUT + echo + echo "Inline mode" + for test in $TESTS_INPUT do - $t "inline" - [ $encap_tests = 0 ] && $t "encap" + $test "inline" + done + + echo + echo "Encap mode" + for test in $TESTS_INPUT + do + $test "encap" done # clean INPUT settings - ip -netns $ioam_node_alpha ioam namespace add 123 \ - data ${ALPHA[6]} wide ${ALPHA[7]} - ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]} - ip -netns $ioam_node_alpha route change db01::/64 dev veth0 + ip -netns $ioam_node_alpha \ + ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} &>/dev/null + ip -netns $ioam_node_alpha \ + ioam namespace set 123 schema ${ALPHA[8]} &>/dev/null echo - printf "%0.s-" {1..74} + printf "+" + printf "%0.s-" {1..72} + printf "+" echo - echo "GLOBAL tests" - printf "%0.s-" {1..74} + printf "| %-30s Results %-31s |" + echo + printf "+" + printf "%0.s-" {1..72} + printf "+" echo - for t in $TESTS_GLOBAL - do - $t "inline" - [ $encap_tests = 0 ] && $t "encap" - done - echo - log_results + echo "- Passed: ${npassed}" + echo "- Skipped: ${nskipped}" + echo "- Failed: ${nfailed}" + echo } bit2type=( 0x800000 0x400000 0x200000 0x100000 0x080000 0x040000 0x020000 0x010000 0x008000 0x004000 0x002000 0x001000 0x000800 0x000400 0x000200 0x000100 - 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002 + 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002 0x000001 ) -bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 ) +bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 0 ) ################################################################################ # # -# OUTPUT tests # +# LOCAL tests # # # -# Two nodes (sender/receiver), IOAM disabled on ingress for the receiver. # ################################################################################ -out_undef_ns() +local_sysctl_ioam_id() +{ + ############################################################################## + # Make sure the sysctl "net.ipv6.ioam6_id" works as expected. # + ############################################################################## + local desc="Sysctl net.ipv6.ioam6_id" + + [ ! -z $1 ] && return + + ip netns exec $ioam_node_alpha \ + sysctl net.ipv6.ioam6_id 2>/dev/null | grep -wq ${ALPHA[0]} + + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} + +local_sysctl_ioam_id_wide() { ############################################################################## - # Make sure that the encap node won't fill the trace if the chosen IOAM # - # namespace is not configured locally. # + # Make sure the sysctl "net.ipv6.ioam6_id_wide" works as expected. # ############################################################################## - local desc="Unknown IOAM namespace" + local desc="Sysctl net.ipv6.ioam6_id_wide" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + [ ! -z $1 ] && return - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0x800000 ns 0 size 4 dev veth0 + ip netns exec $ioam_node_alpha \ + sysctl net.ipv6.ioam6_id_wide 2>/dev/null | grep -wq ${ALPHA[1]} - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0x800000 0 $1 + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down +local_sysctl_ioam_intf_id() +{ + ############################################################################## + # Make sure the sysctl "net.ipv6.conf.XX.ioam6_id" works as expected. # + ############################################################################## + local desc="Sysctl net.ipv6.conf.XX.ioam6_id" + + [ ! -z $1 ] && return + + ip netns exec $ioam_node_alpha \ + sysctl net.ipv6.conf.veth0.ioam6_id 2>/dev/null | grep -wq ${ALPHA[4]} + + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" } -out_no_room() +local_sysctl_ioam_intf_id_wide() { ############################################################################## - # Make sure that the encap node won't fill the trace and will set the # - # Overflow flag since there is no room enough for its data. # + # Make sure the sysctl "net.ipv6.conf.XX.ioam6_id_wide" works as expected. # ############################################################################## - local desc="Missing trace room" + local desc="Sysctl net.ipv6.conf.XX.ioam6_id_wide" + + [ ! -z $1 ] && return + + ip netns exec $ioam_node_alpha \ + sysctl net.ipv6.conf.veth0.ioam6_id_wide 2>/dev/null | grep -wq ${ALPHA[5]} + + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up +local_sysctl_ioam_intf_enabled() +{ + ############################################################################## + # Make sure the sysctl "net.ipv6.conf.XX.ioam6_enabled" works as expected. # + ############################################################################## + local desc="Sysctl net.ipv6.conf.XX.ioam6_enabled" - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + [ ! -z $1 ] && return - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xc00000 123 $1 + ip netns exec $ioam_node_beta \ + sysctl net.ipv6.conf.veth0.ioam6_enabled 2>/dev/null | grep -wq 1 - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + [ $? == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" } -out_bits() +local_ioam_namespace() { ############################################################################## - # Make sure that, for each trace type bit, the encap node will either: # - # (i) fill the trace with its data when it is a supported bit # - # (ii) not fill the trace with its data when it is an unsupported bit # + # Make sure the creation of an IOAM Namespace works as expected. # ############################################################################## - local desc="Trace type with bit <n> only" + local desc="Create an IOAM Namespace" - local tmp=${bit2size[22]} - bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) + [ ! -z $1 ] && return - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + ip -netns $ioam_node_alpha \ + ioam namespace show 2>/dev/null | grep -wq 123 + local ret=$? - for i in {0..22} - do - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ - dev veth0 &>/dev/null + ip -netns $ioam_node_alpha \ + ioam namespace show 2>/dev/null | grep -wq ${ALPHA[6]} + ret=$((ret + $?)) - local cmd_res=$? - local descr="${desc/<n>/$i}" + ip -netns $ioam_node_alpha \ + ioam namespace show 2>/dev/null | grep -wq ${ALPHA[7]} + ret=$((ret + $?)) + + [ $ret == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} + +local_ioam_schema() +{ + ############################################################################## + # Make sure the creation of an IOAM Schema works as expected. # + ############################################################################## + local desc="Create an IOAM Schema" + + [ ! -z $1 ] && return + + ip -netns $ioam_node_alpha \ + ioam schema show 2>/dev/null | grep -wq ${ALPHA[8]} + local ret=$? + + local sc_data=$( + for i in `seq 0 $((${#ALPHA[9]}-1))` + do + chr=${ALPHA[9]:i:1} + printf "%x " "'${chr}" + done + ) + + ip -netns $ioam_node_alpha \ + ioam schema show 2>/dev/null | grep -q "$sc_data" + ret=$((ret + $?)) + + [ $ret == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} + +local_ioam_schema_namespace() +{ + ############################################################################## + # Make sure the binding of a Schema to a Namespace works as expected. # + ############################################################################## + local desc="Bind an IOAM Schema to an IOAM Namespace" + + [ ! -z $1 ] && return + + ip -netns $ioam_node_alpha \ + ioam namespace show 2>/dev/null | grep -wq ${ALPHA[8]} + local ret=$? + + ip -netns $ioam_node_alpha \ + ioam schema show 2>/dev/null | grep -wq 123 + ret=$((ret + $?)) + + [ $ret == 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}" +} + +local_route_ns() +{ + ############################################################################## + # Make sure the Namespace-ID is always provided, whatever the mode. # + ############################################################################## + local desc="Mandatory Namespace-ID" + local mode + + [ -z $1 ] && return + + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_tunsrc() +{ + ############################################################################## + # Make sure the Tunnel Source is only (and possibly) used with encap mode. # + ############################################################################## + local desc + local mode + local mode_tunsrc - if [[ $i -ge 12 && $i -le 21 ]] + [ -z $1 ] && return + + if [ "$1" == "encap" ] + then + desc="Optional Tunnel Source" + mode="$1 tundst 2001:db8:2::2" + mode_tunsrc="$1 tunsrc 2001:db8:1::50 tundst 2001:db8:2::2" + else + desc="Unneeded Tunnel Source" + mode="$1" + mode_tunsrc="$1 tunsrc 2001:db8:1::50" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode_tunsrc trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + if [ "$1" == "encap" ] + then + [[ $ret1 != 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + else + [[ $ret1 != 0 || $ret2 == 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_tundst() +{ + ############################################################################## + # Make sure the Tunnel Destination is only (and always) used with encap mode.# + ############################################################################## + local desc + + [ -z $1 ] && return + + [ "$1" == "encap" ] && desc="Mandatory Tunnel Destination" \ + || desc="Unneeded Tunnel Destination" + + local mode="$1" + local mode_tundst="$1 tundst 2001:db8:2::2" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode_tundst trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + if [ "$1" == "encap" ] + then + [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + else + [[ $ret1 != 0 || $ret2 == 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_trace_type() +{ + ############################################################################## + # Make sure the Trace Type is always provided, whatever the mode. # + ############################################################################## + local desc="Mandatory Trace Type" + local mode + + [ -z $1 ] && return + + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_trace_size() +{ + ############################################################################## + # Make sure the Trace Size is always provided, whatever the mode. # + ############################################################################## + local desc="Mandatory Trace Size" + local mode + + [ -z $1 ] && return + + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret1=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + local ret2=$? + + [[ $ret1 == 0 || $ret2 != 0 ]] && log_test_failed "${desc}" \ + || log_test_passed "${desc}" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null +} + +local_route_trace_type_bits() +{ + ############################################################################## + # Make sure only allowed bits (0-11 and 22) are accepted. # + ############################################################################## + local desc="Trace Type bits" + local mode + + [ -z $1 ] && return + + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" + + local i + for i in {0..23} + do + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type ${bit2type[$i]} ns 0 size 4 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [[ ($? == 0 && (($i -ge 12 && $i -le 21) || $i == 23)) || + ($? != 0 && (($i -ge 0 && $i -le 11) || $i == 22)) ]] then - if [ $cmd_res != 0 ] - then - npassed=$((npassed+1)) - log_test_passed "$descr ($1 mode)" - else - nfailed=$((nfailed+1)) - log_test_failed "$descr ($1 mode)" - fi - else - run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \ - $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 + local err=1 + break fi done - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + [ -z $err ] && log_test_passed "${desc}" || log_test_failed "${desc}" - bit2size[22]=$tmp + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null } -out_full_supp_trace() +local_route_trace_size_values() { ############################################################################## - # Make sure that the encap node will correctly fill a full trace. Be careful,# - # "full trace" here does NOT mean all bits (only supported ones). # + # Make sure only allowed sizes (multiples of four in [4,244]) are accepted. # ############################################################################## - local desc="Full supported trace" + local desc="Trace Size values" + local mode - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + [ -z $1 ] && return - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xfff002 ns 123 size 100 dev veth0 + [ "$1" == "encap" ] && mode="$1 tundst 2001:db8:2::2" || mode="$1" - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xfff002 123 $1 + # we also try the next multiple of four after the MAX to check it's refused + local i + for i in {0..248} + do + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type 0x800000 ns 0 size $i \ + via 2001:db8:1::1 dev veth0 &>/dev/null - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + if [[ ($? == 0 && ($i == 0 || $i == 248 || $(( $i % 4 )) != 0)) || + ($? != 0 && $i != 0 && $i != 248 && $(( $i % 4 )) == 0) ]] + then + local err=1 + break + fi + done + + [ -z $err ] && log_test_passed "${desc}" || log_test_failed "${desc}" + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null } ################################################################################ # # -# INPUT tests # +# OUTPUT tests # # # -# Two nodes (sender/receiver), the sender MUST NOT fill the trace upon # -# insertion -> the IOAM namespace configured on the sender is removed # -# and is used in the inserted trace to force the sender not to fill it. # ################################################################################ -in_undef_ns() +output_undef_ns() { ############################################################################## - # Make sure that the receiving node won't fill the trace if the related IOAM # - # namespace is not configured locally. # + # Make sure an IOAM encapsulating node does NOT fill the trace when the # + # corresponding IOAM Namespace-ID is not configured locally. # ############################################################################## - local desc="Unknown IOAM namespace" + local desc="Unknown IOAM Namespace-ID" + local ns=0 + local tr_type=0x800000 + local tr_size=4 + local mode="$1" + local saddr="2001:db8:1::2" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0x800000 ns 0 size 4 dev veth0 + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0x800000 0 $1 + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } -in_no_room() +output_no_room() { ############################################################################## - # Make sure that the receiving node won't fill the trace and will set the # - # Overflow flag if there is no room enough for its data. # + # Make sure an IOAM encapsulating node does NOT fill the trace AND sets the # + # Overflow flag when there is not enough room for its data. # ############################################################################## - local desc="Missing trace room" + local desc="Missing room for data" + local ns=123 + local tr_type=0xc00000 + local tr_size=4 + local mode="$1" + local saddr="2001:db8:1::2" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xc00000 123 $1 + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } -in_bits() +output_no_room_oss() { ############################################################################## - # Make sure that, for each trace type bit, the receiving node will either: # - # (i) fill the trace with its data when it is a supported bit # - # (ii) not fill the trace with its data when it is an unsupported bit # + # Make sure an IOAM encapsulating node does NOT fill the trace AND sets the # + # Overflow flag when there is not enough room for the Opaque State Snapshot. # ############################################################################## - local desc="Trace type with bit <n> only" + local desc="Missing room for Opaque State Snapshot" + local ns=123 + local tr_type=0x000002 + local tr_size=4 + local mode="$1" + local saddr="2001:db8:1::2" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi - local tmp=${bit2size[22]} - bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +output_bits() +{ + ############################################################################## + # Make sure an IOAM encapsulating node implements all supported bits by # + # checking it correctly fills the trace with its data. # + ############################################################################## + local desc="Trace Type with supported bit <n> only" + local ns=123 + local mode="$1" + local saddr="2001:db8:1::2" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + if [ "$1" == "encap" ] + then + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + local tmp=${bit2size[22]} + bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) + local i for i in {0..11} {22..22} do - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ - dev veth0 + local descr="${desc/<n>/$i}" + + if [[ "$1" == "encap" && $encap_tests != 0 ]] + then + log_test_skipped "${descr}" + continue + fi - run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \ - $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc \ + type ${bit2type[$i]} ns $ns size ${bit2size[$i]} \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test "output_bit$i" "${descr}" $saddr \ + ${bit2type[$i]} ${bit2size[$i]} $ns $1 + else + log_test_failed "${descr}" + fi done - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null bit2size[22]=$tmp } -in_oflag() +output_sizes() { ############################################################################## - # Make sure that the receiving node won't fill the trace since the Overflow # - # flag is set. # + # Make sure an IOAM encapsulating node allocates supported sizes correctly. # ############################################################################## - local desc="Overflow flag is set" + local desc="Trace Size of <n> bytes" + local ns=0 + local tr_type=0x800000 + local mode="$1" + local saddr="2001:db8:1::2" - # Exception: - # Here, we need the sender to set the Overflow flag. For that, we will add - # back the IOAM namespace that was previously configured on the sender. - ip -netns $ioam_node_alpha ioam namespace add 123 + if [ "$1" == "encap" ] + then + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xc00000 ns 123 size 4 dev veth0 + local i + for i in $(seq 4 4 244) + do + local descr="${desc/<n>/$i}" - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xc00000 123 $1 + if [[ "$1" == "encap" && $encap_tests != 0 ]] + then + log_test_skipped "${descr}" + continue + fi - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $i \ + via 2001:db8:1::1 dev veth0 &>/dev/null - # And we clean the exception for this test to get things back to normal for - # other INPUT tests - ip -netns $ioam_node_alpha ioam namespace del 123 + if [ $? == 0 ] + then + run_test "output_size$i" "${descr}" $saddr $tr_type $i $ns $1 + else + log_test_failed "${descr}" + fi + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } -in_full_supp_trace() +output_full_supp_trace() { ############################################################################## - # Make sure that the receiving node will correctly fill a full trace. Be # - # careful, "full trace" here does NOT mean all bits (only supported ones). # + # Make sure an IOAM encapsulating node correctly fills a trace when all # + # supported bits are set. # ############################################################################## local desc="Full supported trace" + local ns=123 + local tr_type=0xfff002 + local tr_size + local mode="$1" + local saddr="2001:db8:1::2" - [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi - ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ - trace prealloc type 0xfff002 ns 123 size 80 dev veth0 + if [ "$2" == "tunsrc" ] + then + saddr="2001:db8:1::50" + mode+=" tunsrc 2001:db8:1::50" + fi - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ - db01::1 0xfff002 123 $1 + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down + local i + tr_size=$(( ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) + for i in {0..11} {22..22} + do + tr_size=$((tr_size + bit2size[$i])) + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" $saddr $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } ################################################################################ # # -# GLOBAL tests # +# INPUT tests # # # -# Three nodes (sender/router/receiver), IOAM fully enabled on every node. # ################################################################################ -fwd_full_supp_trace() +input_undef_ns() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace when the corresponding IOAM # + # Namespace-ID is not configured locally. # + ############################################################################## + local desc="Unknown IOAM Namespace-ID" + local ns=0 + local tr_type=0x800000 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_no_room() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace AND sets the Overflow flag # + # when there is not enough room for its data. # + ############################################################################## + local desc="Missing room for data" + local ns=123 + local tr_type=0xc00000 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_no_room_oss() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace AND sets the Overflow flag # + # when there is not enough room for the Opaque State Snapshot. # + ############################################################################## + local desc="Missing room for Opaque State Snapshot" + local ns=123 + local tr_type=0x000002 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_disabled() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace when IOAM is not enabled on # + # the corresponding (ingress) interface. # + ############################################################################## + local desc="IOAM disabled on ingress interface" + local ns=123 + local tr_type=0x800000 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + # Exception: disable IOAM on ingress interface + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 &>/dev/null + local ret=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + ret=$((ret + $?)) + + if [ $ret == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + # Clean Exception + ip netns exec $ioam_node_beta \ + sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 &>/dev/null + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_oflag() +{ + ############################################################################## + # Make sure an IOAM node does NOT fill the trace when the Overflow flag is # + # set. # + ############################################################################## + local desc="Overflow flag is set" + local ns=123 + local tr_type=0xc00000 + local tr_size=4 + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + # Exception: + # Here, we need the sender to set the Overflow flag. For that, we will add + # back the IOAM namespace that was previously configured on the sender. + ip -netns $ioam_node_alpha ioam namespace add 123 &>/dev/null + local ret=$? + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + ret=$((ret + $?)) + + if [ $ret == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + # Clean Exception + ip -netns $ioam_node_alpha ioam namespace del 123 &>/dev/null + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_bits() +{ + ############################################################################## + # Make sure an IOAM node implements all supported bits by checking it # + # correctly fills the trace with its data. # + ############################################################################## + local desc="Trace Type with supported bit <n> only" + local ns=123 + local mode="$1" + + if [ "$1" == "encap" ] + then + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + local tmp=${bit2size[22]} + bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) + + local i + for i in {0..11} {22..22} + do + local descr="${desc/<n>/$i}" + + if [[ "$1" == "encap" && $encap_tests != 0 ]] + then + log_test_skipped "${descr}" + continue + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc \ + type ${bit2type[$i]} ns $ns size ${bit2size[$i]} \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test "input_bit$i" "${descr}" 2001:db8:1::2 \ + ${bit2type[$i]} ${bit2size[$i]} $ns $1 + else + log_test_failed "${descr}" + fi + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null + + bit2size[22]=$tmp +} + +input_sizes() { ############################################################################## - # Make sure that all three nodes correctly filled the full supported trace # - # by checking that the trace data is consistent with the predefined config. # + # Make sure an IOAM node handles all supported sizes correctly. # ############################################################################## - local desc="Forward - Full supported trace" + local desc="Trace Size of <n> bytes" + local ns=123 + local tr_type=0x800000 + local mode="$1" + + if [ "$1" == "encap" ] + then + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi - [ "$1" = "encap" ] && mode="$1 tundst db02::2" || mode="$1" - [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 up + local i + for i in $(seq 4 4 244) + do + local descr="${desc/<n>/$i}" - ip -netns $ioam_node_alpha route change db02::/64 encap ioam6 mode $mode \ - trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0 + if [[ "$1" == "encap" && $encap_tests != 0 ]] + then + log_test_skipped "${descr}" + continue + fi - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \ - db02::2 0xfff002 123 $1 + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $i \ + via 2001:db8:1::1 dev veth0 &>/dev/null - [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down + if [ $? == 0 ] + then + run_test "input_size$i" "${descr}" 2001:db8:1::2 $tr_type $i $ns $1 + else + log_test_failed "${descr}" + fi + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null +} + +input_full_supp_trace() +{ + ############################################################################## + # Make sure an IOAM node correctly fills a trace when all supported bits are # + # set. # + ############################################################################## + local desc="Full supported trace" + local ns=123 + local tr_type=0xfff002 + local tr_size + local mode="$1" + + if [ "$1" == "encap" ] + then + if [ $encap_tests != 0 ] + then + log_test_skipped "${desc}" + return + fi + + mode+=" tundst 2001:db8:2::2" + ip -netns $ioam_node_gamma link set ip6tnl0 up &>/dev/null + fi + + local i + tr_size=$(( ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) + for i in {0..11} {22..22} + do + tr_size=$((tr_size + bit2size[$i])) + done + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 \ + encap ioam6 mode $mode trace prealloc type $tr_type ns $ns size $tr_size \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + if [ $? == 0 ] + then + run_test ${FUNCNAME[0]} "${desc}" 2001:db8:1::2 $tr_type $tr_size $ns $1 + else + log_test_failed "${desc}" + fi + + ip -netns $ioam_node_alpha \ + route change 2001:db8:2::/64 via 2001:db8:1::1 dev veth0 &>/dev/null + + [ "$1" == "encap" ] && ip -netns $ioam_node_gamma \ + link set ip6tnl0 down &>/dev/null } @@ -742,30 +1655,29 @@ fwd_full_supp_trace() ################################################################################ npassed=0 +nskipped=0 nfailed=0 if [ "$(id -u)" -ne 0 ] then - echo "SKIP: Need root privileges" + echo "SKIP: Need root privileges." exit $ksft_skip fi if [ ! -x "$(command -v ip)" ] then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -ip ioam &>/dev/null -if [ $? = 1 ] -then - echo "SKIP: iproute2 too old, missing ioam command" + echo "SKIP: Could not run test without ip tool." exit $ksft_skip fi check_kernel_compatibility - -cleanup &>/dev/null setup run -cleanup &>/dev/null +cleanup + +if [ $nfailed != 0 ] +then + exit $ksft_fail +fi + +exit $ksft_pass diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index 895e5bb5044b..de4b5c9e8a74 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -8,8 +8,10 @@ #include <errno.h> #include <limits.h> #include <linux/const.h> +#include <linux/if_ether.h> #include <linux/ioam6.h> #include <linux/ipv6.h> +#include <stdbool.h> #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -40,7 +42,7 @@ static struct ioam_config node1 = { .egr_id = 101, .ingr_wide = 0xffffffff, /* default value */ .egr_wide = 101101, - .ns_data = 0xdeadbee0, + .ns_data = 0xdeadbeef, .ns_wide = 0xcafec0caf00dc0de, .sc_id = 777, .sc_data = "something that will be 4n-aligned", @@ -54,33 +56,22 @@ static struct ioam_config node2 = { .egr_id = 202, .ingr_wide = 201201, .egr_wide = 202202, - .ns_data = 0xdeadbee1, - .ns_wide = 0xcafec0caf11dc0de, - .sc_id = 666, - .sc_data = "Hello there -Obi", - .hlim = 63, -}; - -static struct ioam_config node3 = { - .id = 3, - .wide = 33333333, - .ingr_id = 301, - .egr_id = 0xffff, /* default value */ - .ingr_wide = 301301, - .egr_wide = 0xffffffff, /* default value */ - .ns_data = 0xdeadbee2, - .ns_wide = 0xcafec0caf22dc0de, + .ns_data = 0xffffffff, /* default value */ + .ns_wide = 0xffffffffffffffff, /* default value */ .sc_id = 0xffffff, /* default value */ .sc_data = NULL, - .hlim = 62, + .hlim = 63, }; enum { /********** * OUTPUT * **********/ + __TEST_OUT_MIN, + TEST_OUT_UNDEF_NS, TEST_OUT_NO_ROOM, + TEST_OUT_NO_ROOM_OSS, TEST_OUT_BIT0, TEST_OUT_BIT1, TEST_OUT_BIT2, @@ -94,13 +85,80 @@ enum { TEST_OUT_BIT10, TEST_OUT_BIT11, TEST_OUT_BIT22, + TEST_OUT_SIZE4, + TEST_OUT_SIZE8, + TEST_OUT_SIZE12, + TEST_OUT_SIZE16, + TEST_OUT_SIZE20, + TEST_OUT_SIZE24, + TEST_OUT_SIZE28, + TEST_OUT_SIZE32, + TEST_OUT_SIZE36, + TEST_OUT_SIZE40, + TEST_OUT_SIZE44, + TEST_OUT_SIZE48, + TEST_OUT_SIZE52, + TEST_OUT_SIZE56, + TEST_OUT_SIZE60, + TEST_OUT_SIZE64, + TEST_OUT_SIZE68, + TEST_OUT_SIZE72, + TEST_OUT_SIZE76, + TEST_OUT_SIZE80, + TEST_OUT_SIZE84, + TEST_OUT_SIZE88, + TEST_OUT_SIZE92, + TEST_OUT_SIZE96, + TEST_OUT_SIZE100, + TEST_OUT_SIZE104, + TEST_OUT_SIZE108, + TEST_OUT_SIZE112, + TEST_OUT_SIZE116, + TEST_OUT_SIZE120, + TEST_OUT_SIZE124, + TEST_OUT_SIZE128, + TEST_OUT_SIZE132, + TEST_OUT_SIZE136, + TEST_OUT_SIZE140, + TEST_OUT_SIZE144, + TEST_OUT_SIZE148, + TEST_OUT_SIZE152, + TEST_OUT_SIZE156, + TEST_OUT_SIZE160, + TEST_OUT_SIZE164, + TEST_OUT_SIZE168, + TEST_OUT_SIZE172, + TEST_OUT_SIZE176, + TEST_OUT_SIZE180, + TEST_OUT_SIZE184, + TEST_OUT_SIZE188, + TEST_OUT_SIZE192, + TEST_OUT_SIZE196, + TEST_OUT_SIZE200, + TEST_OUT_SIZE204, + TEST_OUT_SIZE208, + TEST_OUT_SIZE212, + TEST_OUT_SIZE216, + TEST_OUT_SIZE220, + TEST_OUT_SIZE224, + TEST_OUT_SIZE228, + TEST_OUT_SIZE232, + TEST_OUT_SIZE236, + TEST_OUT_SIZE240, + TEST_OUT_SIZE244, TEST_OUT_FULL_SUPP_TRACE, + __TEST_OUT_MAX, + /********* * INPUT * *********/ + __TEST_IN_MIN, + TEST_IN_UNDEF_NS, TEST_IN_NO_ROOM, + TEST_IN_NO_ROOM_OSS, + TEST_IN_DISABLED, TEST_IN_OFLAG, TEST_IN_BIT0, TEST_IN_BIT1, @@ -115,36 +173,107 @@ enum { TEST_IN_BIT10, TEST_IN_BIT11, TEST_IN_BIT22, + TEST_IN_SIZE4, + TEST_IN_SIZE8, + TEST_IN_SIZE12, + TEST_IN_SIZE16, + TEST_IN_SIZE20, + TEST_IN_SIZE24, + TEST_IN_SIZE28, + TEST_IN_SIZE32, + TEST_IN_SIZE36, + TEST_IN_SIZE40, + TEST_IN_SIZE44, + TEST_IN_SIZE48, + TEST_IN_SIZE52, + TEST_IN_SIZE56, + TEST_IN_SIZE60, + TEST_IN_SIZE64, + TEST_IN_SIZE68, + TEST_IN_SIZE72, + TEST_IN_SIZE76, + TEST_IN_SIZE80, + TEST_IN_SIZE84, + TEST_IN_SIZE88, + TEST_IN_SIZE92, + TEST_IN_SIZE96, + TEST_IN_SIZE100, + TEST_IN_SIZE104, + TEST_IN_SIZE108, + TEST_IN_SIZE112, + TEST_IN_SIZE116, + TEST_IN_SIZE120, + TEST_IN_SIZE124, + TEST_IN_SIZE128, + TEST_IN_SIZE132, + TEST_IN_SIZE136, + TEST_IN_SIZE140, + TEST_IN_SIZE144, + TEST_IN_SIZE148, + TEST_IN_SIZE152, + TEST_IN_SIZE156, + TEST_IN_SIZE160, + TEST_IN_SIZE164, + TEST_IN_SIZE168, + TEST_IN_SIZE172, + TEST_IN_SIZE176, + TEST_IN_SIZE180, + TEST_IN_SIZE184, + TEST_IN_SIZE188, + TEST_IN_SIZE192, + TEST_IN_SIZE196, + TEST_IN_SIZE200, + TEST_IN_SIZE204, + TEST_IN_SIZE208, + TEST_IN_SIZE212, + TEST_IN_SIZE216, + TEST_IN_SIZE220, + TEST_IN_SIZE224, + TEST_IN_SIZE228, + TEST_IN_SIZE232, + TEST_IN_SIZE236, + TEST_IN_SIZE240, + TEST_IN_SIZE244, TEST_IN_FULL_SUPP_TRACE, - /********** - * GLOBAL * - **********/ - TEST_FWD_FULL_SUPP_TRACE, + __TEST_IN_MAX, __TEST_MAX, }; -static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, - __u32 trace_type, __u16 ioam_ns) +static int check_header(int tid, struct ioam6_trace_hdr *trace, + __u32 trace_type, __u8 trace_size, __u16 ioam_ns) { - if (__be16_to_cpu(ioam6h->namespace_id) != ioam_ns || - __be32_to_cpu(ioam6h->type_be32) != (trace_type << 8)) + if (__be16_to_cpu(trace->namespace_id) != ioam_ns || + __be32_to_cpu(trace->type_be32) != (trace_type << 8)) return 1; switch (tid) { case TEST_OUT_UNDEF_NS: case TEST_IN_UNDEF_NS: - return ioam6h->overflow || - ioam6h->nodelen != 1 || - ioam6h->remlen != 1; + case TEST_IN_DISABLED: + return trace->overflow == 1 || + trace->nodelen != 1 || + trace->remlen != 1; case TEST_OUT_NO_ROOM: case TEST_IN_NO_ROOM: case TEST_IN_OFLAG: - return !ioam6h->overflow || - ioam6h->nodelen != 2 || - ioam6h->remlen != 1; + return trace->overflow == 0 || + trace->nodelen != 2 || + trace->remlen != 1; + + case TEST_OUT_NO_ROOM_OSS: + return trace->overflow == 0 || + trace->nodelen != 0 || + trace->remlen != 1; + + case TEST_IN_NO_ROOM_OSS: + case TEST_OUT_BIT22: + case TEST_IN_BIT22: + return trace->overflow == 1 || + trace->nodelen != 0 || + trace->remlen != 0; case TEST_OUT_BIT0: case TEST_IN_BIT0: @@ -164,9 +293,9 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, case TEST_IN_BIT7: case TEST_OUT_BIT11: case TEST_IN_BIT11: - return ioam6h->overflow || - ioam6h->nodelen != 1 || - ioam6h->remlen; + return trace->overflow == 1 || + trace->nodelen != 1 || + trace->remlen != 0; case TEST_OUT_BIT8: case TEST_IN_BIT8: @@ -174,22 +303,145 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, case TEST_IN_BIT9: case TEST_OUT_BIT10: case TEST_IN_BIT10: - return ioam6h->overflow || - ioam6h->nodelen != 2 || - ioam6h->remlen; - - case TEST_OUT_BIT22: - case TEST_IN_BIT22: - return ioam6h->overflow || - ioam6h->nodelen || - ioam6h->remlen; + return trace->overflow == 1 || + trace->nodelen != 2 || + trace->remlen != 0; + + case TEST_OUT_SIZE4: + case TEST_OUT_SIZE8: + case TEST_OUT_SIZE12: + case TEST_OUT_SIZE16: + case TEST_OUT_SIZE20: + case TEST_OUT_SIZE24: + case TEST_OUT_SIZE28: + case TEST_OUT_SIZE32: + case TEST_OUT_SIZE36: + case TEST_OUT_SIZE40: + case TEST_OUT_SIZE44: + case TEST_OUT_SIZE48: + case TEST_OUT_SIZE52: + case TEST_OUT_SIZE56: + case TEST_OUT_SIZE60: + case TEST_OUT_SIZE64: + case TEST_OUT_SIZE68: + case TEST_OUT_SIZE72: + case TEST_OUT_SIZE76: + case TEST_OUT_SIZE80: + case TEST_OUT_SIZE84: + case TEST_OUT_SIZE88: + case TEST_OUT_SIZE92: + case TEST_OUT_SIZE96: + case TEST_OUT_SIZE100: + case TEST_OUT_SIZE104: + case TEST_OUT_SIZE108: + case TEST_OUT_SIZE112: + case TEST_OUT_SIZE116: + case TEST_OUT_SIZE120: + case TEST_OUT_SIZE124: + case TEST_OUT_SIZE128: + case TEST_OUT_SIZE132: + case TEST_OUT_SIZE136: + case TEST_OUT_SIZE140: + case TEST_OUT_SIZE144: + case TEST_OUT_SIZE148: + case TEST_OUT_SIZE152: + case TEST_OUT_SIZE156: + case TEST_OUT_SIZE160: + case TEST_OUT_SIZE164: + case TEST_OUT_SIZE168: + case TEST_OUT_SIZE172: + case TEST_OUT_SIZE176: + case TEST_OUT_SIZE180: + case TEST_OUT_SIZE184: + case TEST_OUT_SIZE188: + case TEST_OUT_SIZE192: + case TEST_OUT_SIZE196: + case TEST_OUT_SIZE200: + case TEST_OUT_SIZE204: + case TEST_OUT_SIZE208: + case TEST_OUT_SIZE212: + case TEST_OUT_SIZE216: + case TEST_OUT_SIZE220: + case TEST_OUT_SIZE224: + case TEST_OUT_SIZE228: + case TEST_OUT_SIZE232: + case TEST_OUT_SIZE236: + case TEST_OUT_SIZE240: + case TEST_OUT_SIZE244: + return trace->overflow == 1 || + trace->nodelen != 1 || + trace->remlen != trace_size / 4; + + case TEST_IN_SIZE4: + case TEST_IN_SIZE8: + case TEST_IN_SIZE12: + case TEST_IN_SIZE16: + case TEST_IN_SIZE20: + case TEST_IN_SIZE24: + case TEST_IN_SIZE28: + case TEST_IN_SIZE32: + case TEST_IN_SIZE36: + case TEST_IN_SIZE40: + case TEST_IN_SIZE44: + case TEST_IN_SIZE48: + case TEST_IN_SIZE52: + case TEST_IN_SIZE56: + case TEST_IN_SIZE60: + case TEST_IN_SIZE64: + case TEST_IN_SIZE68: + case TEST_IN_SIZE72: + case TEST_IN_SIZE76: + case TEST_IN_SIZE80: + case TEST_IN_SIZE84: + case TEST_IN_SIZE88: + case TEST_IN_SIZE92: + case TEST_IN_SIZE96: + case TEST_IN_SIZE100: + case TEST_IN_SIZE104: + case TEST_IN_SIZE108: + case TEST_IN_SIZE112: + case TEST_IN_SIZE116: + case TEST_IN_SIZE120: + case TEST_IN_SIZE124: + case TEST_IN_SIZE128: + case TEST_IN_SIZE132: + case TEST_IN_SIZE136: + case TEST_IN_SIZE140: + case TEST_IN_SIZE144: + case TEST_IN_SIZE148: + case TEST_IN_SIZE152: + case TEST_IN_SIZE156: + case TEST_IN_SIZE160: + case TEST_IN_SIZE164: + case TEST_IN_SIZE168: + case TEST_IN_SIZE172: + case TEST_IN_SIZE176: + case TEST_IN_SIZE180: + case TEST_IN_SIZE184: + case TEST_IN_SIZE188: + case TEST_IN_SIZE192: + case TEST_IN_SIZE196: + case TEST_IN_SIZE200: + case TEST_IN_SIZE204: + case TEST_IN_SIZE208: + case TEST_IN_SIZE212: + case TEST_IN_SIZE216: + case TEST_IN_SIZE220: + case TEST_IN_SIZE224: + case TEST_IN_SIZE228: + case TEST_IN_SIZE232: + case TEST_IN_SIZE236: + case TEST_IN_SIZE240: + case TEST_IN_SIZE244: + return trace->overflow == 1 || + trace->nodelen != 1 || + trace->remlen != (trace_size / 4) - trace->nodelen; case TEST_OUT_FULL_SUPP_TRACE: case TEST_IN_FULL_SUPP_TRACE: - case TEST_FWD_FULL_SUPP_TRACE: - return ioam6h->overflow || - ioam6h->nodelen != 15 || - ioam6h->remlen; + return trace->overflow == 1 || + trace->nodelen != 15 || + trace->remlen != 0; default: break; @@ -198,167 +450,137 @@ static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h, return 1; } -static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, - const struct ioam_config cnf) +static int check_data(struct ioam6_trace_hdr *trace, __u8 trace_size, + const struct ioam_config cnf, bool is_output) { - unsigned int len; + unsigned int len, i; __u8 aligned; __u64 raw64; __u32 raw32; + __u8 *p; - if (ioam6h->type.bit0) { - raw32 = __be32_to_cpu(*((__u32 *)*p)); - if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff)) - return 1; - *p += sizeof(__u32); - } - - if (ioam6h->type.bit1) { - raw32 = __be32_to_cpu(*((__u32 *)*p)); - if (cnf.ingr_id != (raw32 >> 16) || - cnf.egr_id != (raw32 & 0xffff)) - return 1; - *p += sizeof(__u32); - } - - if (ioam6h->type.bit2) - *p += sizeof(__u32); - - if (ioam6h->type.bit3) - *p += sizeof(__u32); - - if (ioam6h->type.bit4) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) - return 1; - *p += sizeof(__u32); - } - - if (ioam6h->type.bit5) { - if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ns_data) - return 1; - *p += sizeof(__u32); - } - - if (ioam6h->type.bit6) - *p += sizeof(__u32); + if (trace->type.bit12 | trace->type.bit13 | trace->type.bit14 | + trace->type.bit15 | trace->type.bit16 | trace->type.bit17 | + trace->type.bit18 | trace->type.bit19 | trace->type.bit20 | + trace->type.bit21 | trace->type.bit23) + return 1; - if (ioam6h->type.bit7) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + for (i = 0; i < trace->remlen * 4; i++) { + if (trace->data[i] != 0) return 1; - *p += sizeof(__u32); } - if (ioam6h->type.bit8) { - raw64 = __be64_to_cpu(*((__u64 *)*p)); - if (cnf.hlim != (raw64 >> 56) || - cnf.wide != (raw64 & 0xffffffffffffff)) - return 1; - *p += sizeof(__u64); - } + if (trace->remlen * 4 == trace_size) + return 0; - if (ioam6h->type.bit9) { - if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ingr_wide) - return 1; - *p += sizeof(__u32); + p = trace->data + trace->remlen * 4; - if (__be32_to_cpu(*((__u32 *)*p)) != cnf.egr_wide) + if (trace->type.bit0) { + raw32 = __be32_to_cpu(*((__u32 *)p)); + if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff)) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit10) { - if (__be64_to_cpu(*((__u64 *)*p)) != cnf.ns_wide) + if (trace->type.bit1) { + raw32 = __be32_to_cpu(*((__u32 *)p)); + if (cnf.ingr_id != (raw32 >> 16) || + cnf.egr_id != (raw32 & 0xffff)) return 1; - *p += sizeof(__u64); + p += sizeof(__u32); } - if (ioam6h->type.bit11) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit2) { + raw32 = __be32_to_cpu(*((__u32 *)p)); + if ((is_output && raw32 != 0xffffffff) || + (!is_output && (raw32 == 0 || raw32 == 0xffffffff))) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit12) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit3) { + raw32 = __be32_to_cpu(*((__u32 *)p)); + if ((is_output && raw32 != 0xffffffff) || + (!is_output && (raw32 == 0 || raw32 == 0xffffffff))) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit13) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit4) { + if (__be32_to_cpu(*((__u32 *)p)) != 0xffffffff) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit14) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit5) { + if (__be32_to_cpu(*((__u32 *)p)) != cnf.ns_data) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit15) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit6) { + if (__be32_to_cpu(*((__u32 *)p)) == 0xffffffff) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit16) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit7) { + if (__be32_to_cpu(*((__u32 *)p)) != 0xffffffff) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit17) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit8) { + raw64 = __be64_to_cpu(*((__u64 *)p)); + if (cnf.hlim != (raw64 >> 56) || + cnf.wide != (raw64 & 0xffffffffffffff)) return 1; - *p += sizeof(__u32); + p += sizeof(__u64); } - if (ioam6h->type.bit18) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit9) { + if (__be32_to_cpu(*((__u32 *)p)) != cnf.ingr_wide) return 1; - *p += sizeof(__u32); - } + p += sizeof(__u32); - if (ioam6h->type.bit19) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (__be32_to_cpu(*((__u32 *)p)) != cnf.egr_wide) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit20) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit10) { + if (__be64_to_cpu(*((__u64 *)p)) != cnf.ns_wide) return 1; - *p += sizeof(__u32); + p += sizeof(__u64); } - if (ioam6h->type.bit21) { - if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff) + if (trace->type.bit11) { + if (__be32_to_cpu(*((__u32 *)p)) != 0xffffffff) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); } - if (ioam6h->type.bit22) { + if (trace->type.bit22) { len = cnf.sc_data ? strlen(cnf.sc_data) : 0; aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0; - raw32 = __be32_to_cpu(*((__u32 *)*p)); + raw32 = __be32_to_cpu(*((__u32 *)p)); if (aligned != (raw32 >> 24) * 4 || cnf.sc_id != (raw32 & 0xffffff)) return 1; - *p += sizeof(__u32); + p += sizeof(__u32); if (cnf.sc_data) { - if (strncmp((char *)*p, cnf.sc_data, len)) + if (strncmp((char *)p, cnf.sc_data, len)) return 1; - *p += len; + p += len; aligned -= len; while (aligned--) { - if (**p != '\0') + if (*p != '\0') return 1; - *p += sizeof(__u8); + p += sizeof(__u8); } } } @@ -366,151 +588,351 @@ static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h, return 0; } -static int check_ioam_header_and_data(int tid, struct ioam6_trace_hdr *ioam6h, - __u32 trace_type, __u16 ioam_ns) +static int check_ioam_trace(int tid, struct ioam6_trace_hdr *trace, + __u32 trace_type, __u8 trace_size, __u16 ioam_ns) { - __u8 *p; - - if (check_ioam_header(tid, ioam6h, trace_type, ioam_ns)) + if (check_header(tid, trace, trace_type, trace_size, ioam_ns)) return 1; - p = ioam6h->data + ioam6h->remlen * 4; - - switch (tid) { - case TEST_OUT_BIT0: - case TEST_OUT_BIT1: - case TEST_OUT_BIT2: - case TEST_OUT_BIT3: - case TEST_OUT_BIT4: - case TEST_OUT_BIT5: - case TEST_OUT_BIT6: - case TEST_OUT_BIT7: - case TEST_OUT_BIT8: - case TEST_OUT_BIT9: - case TEST_OUT_BIT10: - case TEST_OUT_BIT11: - case TEST_OUT_BIT22: - case TEST_OUT_FULL_SUPP_TRACE: - return check_ioam6_data(&p, ioam6h, node1); - - case TEST_IN_BIT0: - case TEST_IN_BIT1: - case TEST_IN_BIT2: - case TEST_IN_BIT3: - case TEST_IN_BIT4: - case TEST_IN_BIT5: - case TEST_IN_BIT6: - case TEST_IN_BIT7: - case TEST_IN_BIT8: - case TEST_IN_BIT9: - case TEST_IN_BIT10: - case TEST_IN_BIT11: - case TEST_IN_BIT22: - case TEST_IN_FULL_SUPP_TRACE: - { - __u32 tmp32 = node2.egr_wide; - __u16 tmp16 = node2.egr_id; - int res; - - node2.egr_id = 0xffff; - node2.egr_wide = 0xffffffff; + if (tid > __TEST_OUT_MIN && tid < __TEST_OUT_MAX) + return check_data(trace, trace_size, node1, true); - res = check_ioam6_data(&p, ioam6h, node2); - - node2.egr_id = tmp16; - node2.egr_wide = tmp32; - - return res; - } - - case TEST_FWD_FULL_SUPP_TRACE: - if (check_ioam6_data(&p, ioam6h, node3)) - return 1; - if (check_ioam6_data(&p, ioam6h, node2)) - return 1; - return check_ioam6_data(&p, ioam6h, node1); - - default: - break; - } + if (tid > __TEST_IN_MIN && tid < __TEST_IN_MAX) + return check_data(trace, trace_size, node2, false); return 1; } static int str2id(const char *tname) { - if (!strcmp("out_undef_ns", tname)) + if (!strcmp("output_undef_ns", tname)) return TEST_OUT_UNDEF_NS; - if (!strcmp("out_no_room", tname)) + if (!strcmp("output_no_room", tname)) return TEST_OUT_NO_ROOM; - if (!strcmp("out_bit0", tname)) + if (!strcmp("output_no_room_oss", tname)) + return TEST_OUT_NO_ROOM_OSS; + if (!strcmp("output_bit0", tname)) return TEST_OUT_BIT0; - if (!strcmp("out_bit1", tname)) + if (!strcmp("output_bit1", tname)) return TEST_OUT_BIT1; - if (!strcmp("out_bit2", tname)) + if (!strcmp("output_bit2", tname)) return TEST_OUT_BIT2; - if (!strcmp("out_bit3", tname)) + if (!strcmp("output_bit3", tname)) return TEST_OUT_BIT3; - if (!strcmp("out_bit4", tname)) + if (!strcmp("output_bit4", tname)) return TEST_OUT_BIT4; - if (!strcmp("out_bit5", tname)) + if (!strcmp("output_bit5", tname)) return TEST_OUT_BIT5; - if (!strcmp("out_bit6", tname)) + if (!strcmp("output_bit6", tname)) return TEST_OUT_BIT6; - if (!strcmp("out_bit7", tname)) + if (!strcmp("output_bit7", tname)) return TEST_OUT_BIT7; - if (!strcmp("out_bit8", tname)) + if (!strcmp("output_bit8", tname)) return TEST_OUT_BIT8; - if (!strcmp("out_bit9", tname)) + if (!strcmp("output_bit9", tname)) return TEST_OUT_BIT9; - if (!strcmp("out_bit10", tname)) + if (!strcmp("output_bit10", tname)) return TEST_OUT_BIT10; - if (!strcmp("out_bit11", tname)) + if (!strcmp("output_bit11", tname)) return TEST_OUT_BIT11; - if (!strcmp("out_bit22", tname)) + if (!strcmp("output_bit22", tname)) return TEST_OUT_BIT22; - if (!strcmp("out_full_supp_trace", tname)) + if (!strcmp("output_size4", tname)) + return TEST_OUT_SIZE4; + if (!strcmp("output_size8", tname)) + return TEST_OUT_SIZE8; + if (!strcmp("output_size12", tname)) + return TEST_OUT_SIZE12; + if (!strcmp("output_size16", tname)) + return TEST_OUT_SIZE16; + if (!strcmp("output_size20", tname)) + return TEST_OUT_SIZE20; + if (!strcmp("output_size24", tname)) + return TEST_OUT_SIZE24; + if (!strcmp("output_size28", tname)) + return TEST_OUT_SIZE28; + if (!strcmp("output_size32", tname)) + return TEST_OUT_SIZE32; + if (!strcmp("output_size36", tname)) + return TEST_OUT_SIZE36; + if (!strcmp("output_size40", tname)) + return TEST_OUT_SIZE40; + if (!strcmp("output_size44", tname)) + return TEST_OUT_SIZE44; + if (!strcmp("output_size48", tname)) + return TEST_OUT_SIZE48; + if (!strcmp("output_size52", tname)) + return TEST_OUT_SIZE52; + if (!strcmp("output_size56", tname)) + return TEST_OUT_SIZE56; + if (!strcmp("output_size60", tname)) + return TEST_OUT_SIZE60; + if (!strcmp("output_size64", tname)) + return TEST_OUT_SIZE64; + if (!strcmp("output_size68", tname)) + return TEST_OUT_SIZE68; + if (!strcmp("output_size72", tname)) + return TEST_OUT_SIZE72; + if (!strcmp("output_size76", tname)) + return TEST_OUT_SIZE76; + if (!strcmp("output_size80", tname)) + return TEST_OUT_SIZE80; + if (!strcmp("output_size84", tname)) + return TEST_OUT_SIZE84; + if (!strcmp("output_size88", tname)) + return TEST_OUT_SIZE88; + if (!strcmp("output_size92", tname)) + return TEST_OUT_SIZE92; + if (!strcmp("output_size96", tname)) + return TEST_OUT_SIZE96; + if (!strcmp("output_size100", tname)) + return TEST_OUT_SIZE100; + if (!strcmp("output_size104", tname)) + return TEST_OUT_SIZE104; + if (!strcmp("output_size108", tname)) + return TEST_OUT_SIZE108; + if (!strcmp("output_size112", tname)) + return TEST_OUT_SIZE112; + if (!strcmp("output_size116", tname)) + return TEST_OUT_SIZE116; + if (!strcmp("output_size120", tname)) + return TEST_OUT_SIZE120; + if (!strcmp("output_size124", tname)) + return TEST_OUT_SIZE124; + if (!strcmp("output_size128", tname)) + return TEST_OUT_SIZE128; + if (!strcmp("output_size132", tname)) + return TEST_OUT_SIZE132; + if (!strcmp("output_size136", tname)) + return TEST_OUT_SIZE136; + if (!strcmp("output_size140", tname)) + return TEST_OUT_SIZE140; + if (!strcmp("output_size144", tname)) + return TEST_OUT_SIZE144; + if (!strcmp("output_size148", tname)) + return TEST_OUT_SIZE148; + if (!strcmp("output_size152", tname)) + return TEST_OUT_SIZE152; + if (!strcmp("output_size156", tname)) + return TEST_OUT_SIZE156; + if (!strcmp("output_size160", tname)) + return TEST_OUT_SIZE160; + if (!strcmp("output_size164", tname)) + return TEST_OUT_SIZE164; + if (!strcmp("output_size168", tname)) + return TEST_OUT_SIZE168; + if (!strcmp("output_size172", tname)) + return TEST_OUT_SIZE172; + if (!strcmp("output_size176", tname)) + return TEST_OUT_SIZE176; + if (!strcmp("output_size180", tname)) + return TEST_OUT_SIZE180; + if (!strcmp("output_size184", tname)) + return TEST_OUT_SIZE184; + if (!strcmp("output_size188", tname)) + return TEST_OUT_SIZE188; + if (!strcmp("output_size192", tname)) + return TEST_OUT_SIZE192; + if (!strcmp("output_size196", tname)) + return TEST_OUT_SIZE196; + if (!strcmp("output_size200", tname)) + return TEST_OUT_SIZE200; + if (!strcmp("output_size204", tname)) + return TEST_OUT_SIZE204; + if (!strcmp("output_size208", tname)) + return TEST_OUT_SIZE208; + if (!strcmp("output_size212", tname)) + return TEST_OUT_SIZE212; + if (!strcmp("output_size216", tname)) + return TEST_OUT_SIZE216; + if (!strcmp("output_size220", tname)) + return TEST_OUT_SIZE220; + if (!strcmp("output_size224", tname)) + return TEST_OUT_SIZE224; + if (!strcmp("output_size228", tname)) + return TEST_OUT_SIZE228; + if (!strcmp("output_size232", tname)) + return TEST_OUT_SIZE232; + if (!strcmp("output_size236", tname)) + return TEST_OUT_SIZE236; + if (!strcmp("output_size240", tname)) + return TEST_OUT_SIZE240; + if (!strcmp("output_size244", tname)) + return TEST_OUT_SIZE244; + if (!strcmp("output_full_supp_trace", tname)) return TEST_OUT_FULL_SUPP_TRACE; - if (!strcmp("in_undef_ns", tname)) + if (!strcmp("input_undef_ns", tname)) return TEST_IN_UNDEF_NS; - if (!strcmp("in_no_room", tname)) + if (!strcmp("input_no_room", tname)) return TEST_IN_NO_ROOM; - if (!strcmp("in_oflag", tname)) + if (!strcmp("input_no_room_oss", tname)) + return TEST_IN_NO_ROOM_OSS; + if (!strcmp("input_disabled", tname)) + return TEST_IN_DISABLED; + if (!strcmp("input_oflag", tname)) return TEST_IN_OFLAG; - if (!strcmp("in_bit0", tname)) + if (!strcmp("input_bit0", tname)) return TEST_IN_BIT0; - if (!strcmp("in_bit1", tname)) + if (!strcmp("input_bit1", tname)) return TEST_IN_BIT1; - if (!strcmp("in_bit2", tname)) + if (!strcmp("input_bit2", tname)) return TEST_IN_BIT2; - if (!strcmp("in_bit3", tname)) + if (!strcmp("input_bit3", tname)) return TEST_IN_BIT3; - if (!strcmp("in_bit4", tname)) + if (!strcmp("input_bit4", tname)) return TEST_IN_BIT4; - if (!strcmp("in_bit5", tname)) + if (!strcmp("input_bit5", tname)) return TEST_IN_BIT5; - if (!strcmp("in_bit6", tname)) + if (!strcmp("input_bit6", tname)) return TEST_IN_BIT6; - if (!strcmp("in_bit7", tname)) + if (!strcmp("input_bit7", tname)) return TEST_IN_BIT7; - if (!strcmp("in_bit8", tname)) + if (!strcmp("input_bit8", tname)) return TEST_IN_BIT8; - if (!strcmp("in_bit9", tname)) + if (!strcmp("input_bit9", tname)) return TEST_IN_BIT9; - if (!strcmp("in_bit10", tname)) + if (!strcmp("input_bit10", tname)) return TEST_IN_BIT10; - if (!strcmp("in_bit11", tname)) + if (!strcmp("input_bit11", tname)) return TEST_IN_BIT11; - if (!strcmp("in_bit22", tname)) + if (!strcmp("input_bit22", tname)) return TEST_IN_BIT22; - if (!strcmp("in_full_supp_trace", tname)) + if (!strcmp("input_size4", tname)) + return TEST_IN_SIZE4; + if (!strcmp("input_size8", tname)) + return TEST_IN_SIZE8; + if (!strcmp("input_size12", tname)) + return TEST_IN_SIZE12; + if (!strcmp("input_size16", tname)) + return TEST_IN_SIZE16; + if (!strcmp("input_size20", tname)) + return TEST_IN_SIZE20; + if (!strcmp("input_size24", tname)) + return TEST_IN_SIZE24; + if (!strcmp("input_size28", tname)) + return TEST_IN_SIZE28; + if (!strcmp("input_size32", tname)) + return TEST_IN_SIZE32; + if (!strcmp("input_size36", tname)) + return TEST_IN_SIZE36; + if (!strcmp("input_size40", tname)) + return TEST_IN_SIZE40; + if (!strcmp("input_size44", tname)) + return TEST_IN_SIZE44; + if (!strcmp("input_size48", tname)) + return TEST_IN_SIZE48; + if (!strcmp("input_size52", tname)) + return TEST_IN_SIZE52; + if (!strcmp("input_size56", tname)) + return TEST_IN_SIZE56; + if (!strcmp("input_size60", tname)) + return TEST_IN_SIZE60; + if (!strcmp("input_size64", tname)) + return TEST_IN_SIZE64; + if (!strcmp("input_size68", tname)) + return TEST_IN_SIZE68; + if (!strcmp("input_size72", tname)) + return TEST_IN_SIZE72; + if (!strcmp("input_size76", tname)) + return TEST_IN_SIZE76; + if (!strcmp("input_size80", tname)) + return TEST_IN_SIZE80; + if (!strcmp("input_size84", tname)) + return TEST_IN_SIZE84; + if (!strcmp("input_size88", tname)) + return TEST_IN_SIZE88; + if (!strcmp("input_size92", tname)) + return TEST_IN_SIZE92; + if (!strcmp("input_size96", tname)) + return TEST_IN_SIZE96; + if (!strcmp("input_size100", tname)) + return TEST_IN_SIZE100; + if (!strcmp("input_size104", tname)) + return TEST_IN_SIZE104; + if (!strcmp("input_size108", tname)) + return TEST_IN_SIZE108; + if (!strcmp("input_size112", tname)) + return TEST_IN_SIZE112; + if (!strcmp("input_size116", tname)) + return TEST_IN_SIZE116; + if (!strcmp("input_size120", tname)) + return TEST_IN_SIZE120; + if (!strcmp("input_size124", tname)) + return TEST_IN_SIZE124; + if (!strcmp("input_size128", tname)) + return TEST_IN_SIZE128; + if (!strcmp("input_size132", tname)) + return TEST_IN_SIZE132; + if (!strcmp("input_size136", tname)) + return TEST_IN_SIZE136; + if (!strcmp("input_size140", tname)) + return TEST_IN_SIZE140; + if (!strcmp("input_size144", tname)) + return TEST_IN_SIZE144; + if (!strcmp("input_size148", tname)) + return TEST_IN_SIZE148; + if (!strcmp("input_size152", tname)) + return TEST_IN_SIZE152; + if (!strcmp("input_size156", tname)) + return TEST_IN_SIZE156; + if (!strcmp("input_size160", tname)) + return TEST_IN_SIZE160; + if (!strcmp("input_size164", tname)) + return TEST_IN_SIZE164; + if (!strcmp("input_size168", tname)) + return TEST_IN_SIZE168; + if (!strcmp("input_size172", tname)) + return TEST_IN_SIZE172; + if (!strcmp("input_size176", tname)) + return TEST_IN_SIZE176; + if (!strcmp("input_size180", tname)) + return TEST_IN_SIZE180; + if (!strcmp("input_size184", tname)) + return TEST_IN_SIZE184; + if (!strcmp("input_size188", tname)) + return TEST_IN_SIZE188; + if (!strcmp("input_size192", tname)) + return TEST_IN_SIZE192; + if (!strcmp("input_size196", tname)) + return TEST_IN_SIZE196; + if (!strcmp("input_size200", tname)) + return TEST_IN_SIZE200; + if (!strcmp("input_size204", tname)) + return TEST_IN_SIZE204; + if (!strcmp("input_size208", tname)) + return TEST_IN_SIZE208; + if (!strcmp("input_size212", tname)) + return TEST_IN_SIZE212; + if (!strcmp("input_size216", tname)) + return TEST_IN_SIZE216; + if (!strcmp("input_size220", tname)) + return TEST_IN_SIZE220; + if (!strcmp("input_size224", tname)) + return TEST_IN_SIZE224; + if (!strcmp("input_size228", tname)) + return TEST_IN_SIZE228; + if (!strcmp("input_size232", tname)) + return TEST_IN_SIZE232; + if (!strcmp("input_size236", tname)) + return TEST_IN_SIZE236; + if (!strcmp("input_size240", tname)) + return TEST_IN_SIZE240; + if (!strcmp("input_size244", tname)) + return TEST_IN_SIZE244; + if (!strcmp("input_full_supp_trace", tname)) return TEST_IN_FULL_SUPP_TRACE; - if (!strcmp("fwd_full_supp_trace", tname)) - return TEST_FWD_FULL_SUPP_TRACE; return -1; } +static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) +{ + return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | + (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | + (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | + (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; +} + static int get_u32(__u32 *val, const char *arg, int base) { unsigned long res; @@ -555,119 +977,124 @@ static int get_u16(__u16 *val, const char *arg, int base) return 0; } -static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { - [TEST_OUT_UNDEF_NS] = check_ioam_header, - [TEST_OUT_NO_ROOM] = check_ioam_header, - [TEST_OUT_BIT0] = check_ioam_header_and_data, - [TEST_OUT_BIT1] = check_ioam_header_and_data, - [TEST_OUT_BIT2] = check_ioam_header_and_data, - [TEST_OUT_BIT3] = check_ioam_header_and_data, - [TEST_OUT_BIT4] = check_ioam_header_and_data, - [TEST_OUT_BIT5] = check_ioam_header_and_data, - [TEST_OUT_BIT6] = check_ioam_header_and_data, - [TEST_OUT_BIT7] = check_ioam_header_and_data, - [TEST_OUT_BIT8] = check_ioam_header_and_data, - [TEST_OUT_BIT9] = check_ioam_header_and_data, - [TEST_OUT_BIT10] = check_ioam_header_and_data, - [TEST_OUT_BIT11] = check_ioam_header_and_data, - [TEST_OUT_BIT22] = check_ioam_header_and_data, - [TEST_OUT_FULL_SUPP_TRACE] = check_ioam_header_and_data, - [TEST_IN_UNDEF_NS] = check_ioam_header, - [TEST_IN_NO_ROOM] = check_ioam_header, - [TEST_IN_OFLAG] = check_ioam_header, - [TEST_IN_BIT0] = check_ioam_header_and_data, - [TEST_IN_BIT1] = check_ioam_header_and_data, - [TEST_IN_BIT2] = check_ioam_header_and_data, - [TEST_IN_BIT3] = check_ioam_header_and_data, - [TEST_IN_BIT4] = check_ioam_header_and_data, - [TEST_IN_BIT5] = check_ioam_header_and_data, - [TEST_IN_BIT6] = check_ioam_header_and_data, - [TEST_IN_BIT7] = check_ioam_header_and_data, - [TEST_IN_BIT8] = check_ioam_header_and_data, - [TEST_IN_BIT9] = check_ioam_header_and_data, - [TEST_IN_BIT10] = check_ioam_header_and_data, - [TEST_IN_BIT11] = check_ioam_header_and_data, - [TEST_IN_BIT22] = check_ioam_header_and_data, - [TEST_IN_FULL_SUPP_TRACE] = check_ioam_header_and_data, - [TEST_FWD_FULL_SUPP_TRACE] = check_ioam_header_and_data, -}; +static int get_u8(__u8 *val, const char *arg, int base) +{ + unsigned long res; + char *ptr; + + if (!arg || !*arg) + return -1; + res = strtoul(arg, &ptr, base); + + if (!ptr || ptr == arg || *ptr) + return -1; + + if (res == ULONG_MAX && errno == ERANGE) + return -1; + + if (res > 0xFFUL) + return -1; + + *val = res; + return 0; +} int main(int argc, char **argv) { - int fd, size, hoplen, tid, ret = 1, on = 1; - struct ioam6_hdr *opt; - struct cmsghdr *cmsg; - struct msghdr msg; - struct iovec iov; - __u8 buffer[512]; + __u8 buffer[512], *ptr, nexthdr, tr_size; + struct ioam6_trace_hdr *trace; + unsigned int hoplen, ret = 1; + struct ipv6_hopopt_hdr *hbh; + int fd, size, testname_id; + struct in6_addr src, dst; + struct ioam6_hdr *ioam6; + struct timeval timeout; + struct ipv6hdr *ipv6; __u32 tr_type; __u16 ioam_ns; - __u8 *ptr; - if (argc != 5) + if (argc != 9) goto out; - tid = str2id(argv[1]); - if (tid < 0 || !func[tid]) - goto out; + testname_id = str2id(argv[2]); - if (get_u32(&tr_type, argv[2], 16) || - get_u16(&ioam_ns, argv[3], 0)) + if (testname_id < 0 || + inet_pton(AF_INET6, argv[3], &src) != 1 || + inet_pton(AF_INET6, argv[4], &dst) != 1 || + get_u32(&tr_type, argv[5], 16) || + get_u8(&tr_size, argv[6], 0) || + get_u16(&ioam_ns, argv[7], 0)) goto out; - fd = socket(PF_INET6, SOCK_RAW, - !strcmp(argv[4], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6); + nexthdr = (!strcmp(argv[8], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6); + + hoplen = sizeof(*hbh); + hoplen += 2; // 2-byte padding for alignment + hoplen += sizeof(*ioam6); // IOAM option header + hoplen += sizeof(*trace); // IOAM trace header + hoplen += tr_size; // IOAM trace size + hoplen += (tr_size % 8); // optional padding + + fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6)); if (fd < 0) goto out; - setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS, &on, sizeof(on)); + if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, + argv[1], strlen(argv[1]))) + goto close; - iov.iov_len = 1; - iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer))); - if (!iov.iov_base) + timeout.tv_sec = 1; + timeout.tv_usec = 0; + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, + (const char *)&timeout, sizeof(timeout))) goto close; recv: - memset(&msg, 0, sizeof(msg)); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = buffer; - msg.msg_controllen = CMSG_SPACE(sizeof(buffer)); - - size = recvmsg(fd, &msg, 0); + size = recv(fd, buffer, sizeof(buffer), 0); if (size <= 0) goto close; - for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { - if (cmsg->cmsg_level != IPPROTO_IPV6 || - cmsg->cmsg_type != IPV6_HOPOPTS || - cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr)) - continue; + ipv6 = (struct ipv6hdr *)buffer; + + /* Skip packets that do not have the expected src/dst address or that + * do not have a Hop-by-hop. + */ + if (!ipv6_addr_equal(&ipv6->saddr, &src) || + !ipv6_addr_equal(&ipv6->daddr, &dst) || + ipv6->nexthdr != IPPROTO_HOPOPTS) + goto recv; + + /* Check Hbh's Next Header and Size. */ + hbh = (struct ipv6_hopopt_hdr *)(buffer + sizeof(*ipv6)); + if (hbh->nexthdr != nexthdr || hbh->hdrlen != (hoplen >> 3) - 1) + goto close; - ptr = (__u8 *)CMSG_DATA(cmsg); + /* Check we have a 2-byte padding for alignment. */ + ptr = (__u8 *)hbh + sizeof(*hbh); + if (ptr[0] != IPV6_TLV_PADN && ptr[1] != 0) + goto close; - hoplen = (ptr[1] + 1) << 3; - ptr += sizeof(struct ipv6_hopopt_hdr); + /* Check we now have the IOAM option. */ + ptr += 2; + if (ptr[0] != IPV6_TLV_IOAM) + goto close; - while (hoplen > 0) { - opt = (struct ioam6_hdr *)ptr; + /* Check its size and the IOAM option type. */ + ioam6 = (struct ioam6_hdr *)ptr; + if (ioam6->opt_len != sizeof(*ioam6) - 2 + sizeof(*trace) + tr_size || + ioam6->type != IOAM6_TYPE_PREALLOC) + goto close; - if (opt->opt_type == IPV6_TLV_IOAM && - opt->type == IOAM6_TYPE_PREALLOC) { - ptr += sizeof(*opt); - ret = func[tid](tid, - (struct ioam6_trace_hdr *)ptr, - tr_type, ioam_ns); - goto close; - } + trace = (struct ioam6_trace_hdr *)(ptr + sizeof(*ioam6)); - ptr += opt->opt_len + 2; - hoplen -= opt->opt_len + 2; - } - } + /* Check the trailing 4-byte padding (potentially). */ + ptr = (__u8 *)trace + sizeof(*trace) + tr_size; + if (tr_size % 8 && ptr[0] != IPV6_TLV_PADN && ptr[1] != 2 && + ptr[2] != 0 && ptr[3] != 0) + goto close; - goto recv; + /* Check the IOAM header and data. */ + ret = check_ioam_trace(testname_id, trace, tr_type, tr_size, ioam_ns); close: - free(iov.iov_base); close(fd); out: return ret; diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c index 29451d2244b7..e6834a6cfc8f 100644 --- a/tools/testing/selftests/net/ip_local_port_range.c +++ b/tools/testing/selftests/net/ip_local_port_range.c @@ -10,7 +10,7 @@ #include <fcntl.h> #include <netinet/ip.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" #ifndef IP_LOCAL_PORT_RANGE #define IP_LOCAL_PORT_RANGE 51 diff --git a/tools/testing/selftests/net/ip_local_port_range.sh b/tools/testing/selftests/net/ip_local_port_range.sh index 6c6ad346eaa0..4ff746db1256 100755 --- a/tools/testing/selftests/net/ip_local_port_range.sh +++ b/tools/testing/selftests/net/ip_local_port_range.sh @@ -2,4 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 ./in_netns.sh \ - sh -c 'sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && ./ip_local_port_range' + sh -c 'sysctl -q -w net.mptcp.enabled=1 && \ + sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && \ + ./ip_local_port_range' diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index be4a30a0d02a..0ccf484b1d9d 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -34,7 +34,7 @@ #include <time.h> #include <unistd.h> -#include "../kselftest.h" +#include "kselftest.h" #define printk(fmt, ...) \ ksft_print_msg("%d[%u] " fmt "\n", getpid(), __LINE__, ##__VA_ARGS__) @@ -227,7 +227,8 @@ static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz, attr->rta_len = RTA_LENGTH(size); attr->rta_type = rta_type; - memcpy(RTA_DATA(attr), payload, size); + if (payload) + memcpy(RTA_DATA(attr), payload, size); return 0; } diff --git a/tools/testing/selftests/net/ipv6_force_forwarding.sh b/tools/testing/selftests/net/ipv6_force_forwarding.sh new file mode 100755 index 000000000000..bf0243366caa --- /dev/null +++ b/tools/testing/selftests/net/ipv6_force_forwarding.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test IPv6 force_forwarding interface property +# +# This test verifies that the force_forwarding property works correctly: +# - When global forwarding is disabled, packets are not forwarded normally +# - When force_forwarding is enabled on an interface, packets are forwarded +# regardless of the global forwarding setting + +source lib.sh + +cleanup() { + cleanup_ns $ns1 $ns2 $ns3 +} + +trap cleanup EXIT + +setup_test() { + # Create three namespaces: sender, router, receiver + setup_ns ns1 ns2 ns3 + + # Create veth pairs: ns1 <-> ns2 <-> ns3 + ip link add name veth12 type veth peer name veth21 + ip link add name veth23 type veth peer name veth32 + + # Move interfaces to namespaces + ip link set veth12 netns $ns1 + ip link set veth21 netns $ns2 + ip link set veth23 netns $ns2 + ip link set veth32 netns $ns3 + + # Configure interfaces + ip -n $ns1 addr add 2001:db8:1::1/64 dev veth12 nodad + ip -n $ns2 addr add 2001:db8:1::2/64 dev veth21 nodad + ip -n $ns2 addr add 2001:db8:2::1/64 dev veth23 nodad + ip -n $ns3 addr add 2001:db8:2::2/64 dev veth32 nodad + + # Bring up interfaces + ip -n $ns1 link set veth12 up + ip -n $ns2 link set veth21 up + ip -n $ns2 link set veth23 up + ip -n $ns3 link set veth32 up + + # Add routes + ip -n $ns1 route add 2001:db8:2::/64 via 2001:db8:1::2 + ip -n $ns3 route add 2001:db8:1::/64 via 2001:db8:2::1 + + # Disable global forwarding + ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=0 +} + +test_force_forwarding() { + local ret=0 + + echo "TEST: force_forwarding functionality" + + # Check if force_forwarding sysctl exists + if ! ip netns exec $ns2 test -f /proc/sys/net/ipv6/conf/veth21/force_forwarding; then + echo "SKIP: force_forwarding not available" + return $ksft_skip + fi + + # Test 1: Without force_forwarding, ping should fail + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=0 + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=0 + + if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then + echo "FAIL: ping succeeded when forwarding disabled" + ret=1 + else + echo "PASS: forwarding disabled correctly" + fi + + # Test 2: With force_forwarding enabled, ping should succeed + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth21.force_forwarding=1 + ip netns exec $ns2 sysctl -qw net.ipv6.conf.veth23.force_forwarding=1 + + if ip netns exec $ns1 ping -6 -c 1 -W 2 2001:db8:2::2 &>/dev/null; then + echo "PASS: force_forwarding enabled forwarding" + else + echo "FAIL: ping failed with force_forwarding enabled" + ret=1 + fi + + return $ret +} + +echo "IPv6 force_forwarding test" +echo "==========================" + +setup_test +test_force_forwarding +ret=$? + +if [ $ret -eq 0 ]; then + echo "OK" + exit 0 +elif [ $ret -eq $ksft_skip ]; then + echo "SKIP" + exit $ksft_skip +else + echo "FAIL" + exit 1 +fi diff --git a/tools/testing/selftests/net/ipv6_fragmentation.c b/tools/testing/selftests/net/ipv6_fragmentation.c new file mode 100644 index 000000000000..672c9fe086a7 --- /dev/null +++ b/tools/testing/selftests/net/ipv6_fragmentation.c @@ -0,0 +1,114 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Author: Brett A C Sheffield <bacs@librecast.net> + * + * Kernel selftest for the IPv6 fragmentation regression which affected stable + * kernels: + * + * https://lore.kernel.org/stable/aElivdUXqd1OqgMY@karahi.gladserv.com + * + * Commit: a18dfa9925b9 ("ipv6: save dontfrag in cork") was backported to stable + * without some prerequisite commits. + * + * This caused a regression when sending IPv6 UDP packets by preventing + * fragmentation and instead returning -1 (EMSGSIZE). + * + * This selftest demonstrates the issue by sending an IPv6 UDP packet to + * localhost (::1) on the loopback interface from the autoconfigured link-local + * address. + * + * sendmsg(2) returns bytes sent correctly on a working kernel, and returns -1 + * (EMSGSIZE) when the regression is present. + * + * The regression was not present in the mainline kernel, but add this test to + * catch similar breakage in future. + */ + +#define _GNU_SOURCE + +#include <error.h> +#include <net/if.h> +#include <netinet/in.h> +#include <sched.h> +#include <stdio.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <unistd.h> +#include "kselftest.h" + +#define MTU 1500 +#define LARGER_THAN_MTU 8192 + +static void setup(void) +{ + struct ifreq ifr = { + .ifr_name = "lo" + }; + int ctl; + + /* we need to set MTU, so do this in a namespace to play nicely */ + if (unshare(CLONE_NEWNET) == -1) + error(KSFT_FAIL, errno, "unshare"); + + ctl = socket(AF_LOCAL, SOCK_STREAM, 0); + if (ctl == -1) + error(KSFT_FAIL, errno, "socket"); + + /* ensure MTU is smaller than what we plan to send */ + ifr.ifr_mtu = MTU; + if (ioctl(ctl, SIOCSIFMTU, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl: set MTU"); + + /* bring up interface */ + if (ioctl(ctl, SIOCGIFFLAGS, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl SIOCGIFFLAGS"); + ifr.ifr_flags = ifr.ifr_flags | IFF_UP; + if (ioctl(ctl, SIOCSIFFLAGS, &ifr) == -1) + error(KSFT_FAIL, errno, "ioctl: bring interface up"); + + if (close(ctl) == -1) + error(KSFT_FAIL, errno, "close"); +} + +int main(void) +{ + struct in6_addr addr = { + .s6_addr[15] = 0x01, /* ::1 */ + }; + struct sockaddr_in6 sa = { + .sin6_family = AF_INET6, + .sin6_addr = addr, + .sin6_port = htons(9) /* port 9/udp (DISCARD) */ + }; + static char buf[LARGER_THAN_MTU] = {0}; + struct iovec iov = { .iov_base = buf, .iov_len = sizeof(buf) }; + struct msghdr msg = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_name = (struct sockaddr *)&sa, + .msg_namelen = sizeof(sa), + }; + ssize_t rc; + int s; + + printf("Testing IPv6 fragmentation\n"); + setup(); + s = socket(AF_INET6, SOCK_DGRAM, 0); +send_again: + rc = sendmsg(s, &msg, 0); + if (rc == -1) { + /* if interface wasn't ready, try again */ + if (errno == EADDRNOTAVAIL) { + usleep(1000); + goto send_again; + } + error(KSFT_FAIL, errno, "sendmsg"); + } else if (rc != LARGER_THAN_MTU) { + error(KSFT_FAIL, errno, "sendmsg returned %zi, expected %i", + rc, LARGER_THAN_MTU); + } + printf("[PASS] sendmsg() returned %zi\n", rc); + if (close(s) == -1) + error(KSFT_FAIL, errno, "close"); + return KSFT_PASS; +} diff --git a/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh new file mode 100755 index 000000000000..c6866e42f95c --- /dev/null +++ b/tools/testing/selftests/net/ipv6_route_update_soft_lockup.sh @@ -0,0 +1,261 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing for potential kernel soft lockup during IPv6 routing table +# refresh under heavy outgoing IPv6 traffic. If a kernel soft lockup +# occurs, a kernel panic will be triggered to prevent associated issues. +# +# +# Test Environment Layout +# +# ┌----------------┐ ┌----------------┐ +# | SOURCE_NS | | SINK_NS | +# | NAMESPACE | | NAMESPACE | +# |(iperf3 clients)| |(iperf3 servers)| +# | | | | +# | | | | +# | ┌-----------| nexthops |---------┐ | +# | |veth_source|<--------------------------------------->|veth_sink|<┐ | +# | └-----------|2001:0DB8:1::0:1/96 2001:0DB8:1::1:1/96 |---------┘ | | +# | | ^ 2001:0DB8:1::1:2/96 | | | +# | | . . | fwd | | +# | ┌---------┐ | . . | | | +# | | IPv6 | | . . | V | +# | | routing | | . 2001:0DB8:1::1:80/96| ┌-----┐ | +# | | table | | . | | lo | | +# | | nexthop | | . └--------┴-----┴-┘ +# | | update | | ............................> 2001:0DB8:2::1:1/128 +# | └-------- ┘ | +# └----------------┘ +# +# The test script sets up two network namespaces, source_ns and sink_ns, +# connected via a veth link. Within source_ns, it continuously updates the +# IPv6 routing table by flushing and inserting IPV6_NEXTHOP_ADDR_COUNT nexthop +# IPs destined for SINK_LOOPBACK_IP_ADDR in sink_ns. This refresh occurs at a +# rate of 1/ROUTING_TABLE_REFRESH_PERIOD per second for TEST_DURATION seconds. +# +# Simultaneously, multiple iperf3 clients within source_ns generate heavy +# outgoing IPv6 traffic. Each client is assigned a unique port number starting +# at 5000 and incrementing sequentially. Each client targets a unique iperf3 +# server running in sink_ns, connected to the SINK_LOOPBACK_IFACE interface +# using the same port number. +# +# The number of iperf3 servers and clients is set to half of the total +# available cores on each machine. +# +# NOTE: We have tested this script on machines with various CPU specifications, +# ranging from lower to higher performance as listed below. The test script +# effectively triggered a kernel soft lockup on machines running an unpatched +# kernel in under a minute: +# +# - 1x Intel Xeon E-2278G 8-Core Processor @ 3.40GHz +# - 1x Intel Xeon E-2378G Processor 8-Core @ 2.80GHz +# - 1x AMD EPYC 7401P 24-Core Processor @ 2.00GHz +# - 1x AMD EPYC 7402P 24-Core Processor @ 2.80GHz +# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz +# - 1x Ampere Altra Q80-30 80-Core Processor @ 3.00GHz +# - 2x Intel Xeon Gold 5120 14-Core Processor @ 2.20GHz +# - 2x Intel Xeon Silver 4214 24-Core Processor @ 2.20GHz +# - 1x AMD EPYC 7502P 32-Core @ 2.50GHz +# - 1x Intel Xeon Gold 6314U 32-Core Processor @ 2.30GHz +# - 2x Intel Xeon Gold 6338 32-Core Processor @ 2.00GHz +# +# On less performant machines, you may need to increase the TEST_DURATION +# parameter to enhance the likelihood of encountering a race condition leading +# to a kernel soft lockup and avoid a false negative result. +# +# NOTE: The test may not produce the expected result in virtualized +# environments (e.g., qemu) due to differences in timing and CPU handling, +# which can affect the conditions needed to trigger a soft lockup. + +source lib.sh + +TEST_DURATION=300 +ROUTING_TABLE_REFRESH_PERIOD=0.01 + +IPERF3_BITRATE="300m" + + +IPV6_NEXTHOP_ADDR_COUNT="128" +IPV6_NEXTHOP_ADDR_MASK="96" +IPV6_NEXTHOP_PREFIX="2001:0DB8:1" + + +SOURCE_TEST_IFACE="veth_source" +SOURCE_TEST_IP_ADDR="2001:0DB8:1::0:1/96" + +SINK_TEST_IFACE="veth_sink" +# ${SINK_TEST_IFACE} is populated with the following range of IPv6 addresses: +# 2001:0DB8:1::1:1 to 2001:0DB8:1::1:${IPV6_NEXTHOP_ADDR_COUNT} +SINK_LOOPBACK_IFACE="lo" +SINK_LOOPBACK_IP_MASK="128" +SINK_LOOPBACK_IP_ADDR="2001:0DB8:2::1:1" + +nexthop_ip_list="" +termination_signal="" +kernel_softlokup_panic_prev_val="" + +terminate_ns_processes_by_pattern() { + local ns=$1 + local pattern=$2 + + for pid in $(ip netns pids ${ns}); do + [ -e /proc/$pid/cmdline ] && grep -qe "${pattern}" /proc/$pid/cmdline && kill -9 $pid + done +} + +cleanup() { + echo "info: cleaning up namespaces and terminating all processes within them..." + + + # Terminate iperf3 instances running in the source_ns. To avoid race + # conditions, first iterate over the PIDs and terminate those + # associated with the bash shells running the + # `while true; do iperf3 -c ...; done` loops. In a second iteration, + # terminate the individual `iperf3 -c ...` instances. + terminate_ns_processes_by_pattern ${source_ns} while + terminate_ns_processes_by_pattern ${source_ns} iperf3 + + # Repeat the same process for sink_ns + terminate_ns_processes_by_pattern ${sink_ns} while + terminate_ns_processes_by_pattern ${sink_ns} iperf3 + + # Check if any iperf3 instances are still running. This could happen + # if a core has entered an infinite loop and the timeout for detecting + # the soft lockup has not expired, but either the test interval has + # already elapsed or the test was terminated manually (e.g., with ^C) + for pid in $(ip netns pids ${source_ns}); do + if [ -e /proc/$pid/cmdline ] && grep -qe 'iperf3' /proc/$pid/cmdline; then + echo "FAIL: unable to terminate some iperf3 instances. Soft lockup is underway. A kernel panic is on the way!" + exit ${ksft_fail} + fi + done + + if [ "$termination_signal" == "SIGINT" ]; then + echo "SKIP: Termination due to ^C (SIGINT)" + elif [ "$termination_signal" == "SIGALRM" ]; then + echo "PASS: No kernel soft lockup occurred during this ${TEST_DURATION} second test" + fi + + cleanup_ns ${source_ns} ${sink_ns} + + sysctl -qw kernel.softlockup_panic=${kernel_softlokup_panic_prev_val} +} + +setup_prepare() { + setup_ns source_ns sink_ns + + ip -n ${source_ns} link add name ${SOURCE_TEST_IFACE} type veth peer name ${SINK_TEST_IFACE} netns ${sink_ns} + + # Setting up the Source namespace + ip -n ${source_ns} addr add ${SOURCE_TEST_IP_ADDR} dev ${SOURCE_TEST_IFACE} + ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} qlen 10000 + ip -n ${source_ns} link set dev ${SOURCE_TEST_IFACE} up + ip netns exec ${source_ns} sysctl -qw net.ipv6.fib_multipath_hash_policy=1 + + # Setting up the Sink namespace + ip -n ${sink_ns} addr add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} dev ${SINK_LOOPBACK_IFACE} + ip -n ${sink_ns} link set dev ${SINK_LOOPBACK_IFACE} up + ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_LOOPBACK_IFACE}.forwarding=1 + + ip -n ${sink_ns} link set ${SINK_TEST_IFACE} up + ip netns exec ${sink_ns} sysctl -qw net.ipv6.conf.${SINK_TEST_IFACE}.forwarding=1 + + + # Populate nexthop IPv6 addresses on the test interface in the sink_ns + echo "info: populating ${IPV6_NEXTHOP_ADDR_COUNT} IPv6 addresses on the ${SINK_TEST_IFACE} interface ..." + for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do + ip -n ${sink_ns} addr add ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" "${IP}")/${IPV6_NEXTHOP_ADDR_MASK} dev ${SINK_TEST_IFACE}; + done + + # Preparing list of nexthops + for IP in $(seq 1 ${IPV6_NEXTHOP_ADDR_COUNT}); do + nexthop_ip_list=$nexthop_ip_list" nexthop via ${IPV6_NEXTHOP_PREFIX}::$(printf "1:%x" $IP) dev ${SOURCE_TEST_IFACE} weight 1" + done +} + + +test_soft_lockup_during_routing_table_refresh() { + # Start num_of_iperf_servers iperf3 servers in the sink_ns namespace, + # each listening on ports starting at 5001 and incrementing + # sequentially. Since iperf3 instances may terminate unexpectedly, a + # while loop is used to automatically restart them in such cases. + echo "info: starting ${num_of_iperf_servers} iperf3 servers in the sink_ns namespace ..." + for i in $(seq 1 ${num_of_iperf_servers}); do + cmd="iperf3 --bind ${SINK_LOOPBACK_IP_ADDR} -s -p $(printf '5%03d' ${i}) --rcv-timeout 200 &>/dev/null" + ip netns exec ${sink_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null + done + + # Wait for the iperf3 servers to be ready + for i in $(seq ${num_of_iperf_servers}); do + port=$(printf '5%03d' ${i}); + wait_local_port_listen ${sink_ns} ${port} tcp + done + + # Continuously refresh the routing table in the background within + # the source_ns namespace + ip netns exec ${source_ns} bash -c " + while \$(ip netns list | grep -q ${source_ns}); do + ip -6 route add ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK} ${nexthop_ip_list}; + sleep ${ROUTING_TABLE_REFRESH_PERIOD}; + ip -6 route delete ${SINK_LOOPBACK_IP_ADDR}/${SINK_LOOPBACK_IP_MASK}; + done &" + + # Start num_of_iperf_servers iperf3 clients in the source_ns namespace, + # each sending TCP traffic on sequential ports starting at 5001. + # Since iperf3 instances may terminate unexpectedly (e.g., if the route + # to the server is deleted in the background during a route refresh), a + # while loop is used to automatically restart them in such cases. + echo "info: starting ${num_of_iperf_servers} iperf3 clients in the source_ns namespace ..." + for i in $(seq 1 ${num_of_iperf_servers}); do + cmd="iperf3 -c ${SINK_LOOPBACK_IP_ADDR} -p $(printf '5%03d' ${i}) --length 64 --bitrate ${IPERF3_BITRATE} -t 0 --connect-timeout 150 &>/dev/null" + ip netns exec ${source_ns} bash -c "while true; do ${cmd}; done &" &>/dev/null + done + + echo "info: IPv6 routing table is being updated at the rate of $(echo "1/${ROUTING_TABLE_REFRESH_PERIOD}" | bc)/s for ${TEST_DURATION} seconds ..." + echo "info: A kernel soft lockup, if detected, results in a kernel panic!" + + wait +} + +# Make sure 'iperf3' is installed, skip the test otherwise +if [ ! -x "$(command -v "iperf3")" ]; then + echo "SKIP: 'iperf3' is not installed. Skipping the test." + exit ${ksft_skip} +fi + +# Determine the number of cores on the machine +num_of_iperf_servers=$(( $(nproc)/2 )) + +# Check if we are running on a multi-core machine, skip the test otherwise +if [ "${num_of_iperf_servers}" -eq 0 ]; then + echo "SKIP: This test is not valid on a single core machine!" + exit ${ksft_skip} +fi + +# Since the kernel soft lockup we're testing causes at least one core to enter +# an infinite loop, destabilizing the host and likely affecting subsequent +# tests, we trigger a kernel panic instead of reporting a failure and +# continuing +kernel_softlokup_panic_prev_val=$(sysctl -n kernel.softlockup_panic) +sysctl -qw kernel.softlockup_panic=1 + +handle_sigint() { + termination_signal="SIGINT" + cleanup + exit ${ksft_skip} +} + +handle_sigalrm() { + termination_signal="SIGALRM" + cleanup + exit ${ksft_pass} +} + +trap handle_sigint SIGINT +trap handle_sigalrm SIGALRM + +(sleep ${TEST_DURATION} && kill -s SIGALRM $$)& + +setup_prepare +test_soft_lockup_during_routing_table_refresh diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index d0219032f773..f448bafb3f20 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -1,11 +1,17 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +net_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") +source "$net_dir/lib/sh/defer.sh" + ############################################################################## # Defines : "${WAIT_TIMEOUT:=20}" +# Whether to pause on after a failure. +: "${PAUSE_ON_FAIL:=no}" + BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms # Kselftest framework constants. @@ -17,6 +23,11 @@ ksft_skip=4 # namespace list created by setup_ns NS_LIST=() +# Exit status to return at the end. Set in case one of the tests fails. +EXIT_STATUS=0 +# Per-test return value. Clear at the beginning of each test. +RET=0 + ############################################################################## # Helpers @@ -32,7 +43,7 @@ __ksft_status_merge() weights[$i]=$((weight++)) done - if [[ ${weights[$a]} > ${weights[$b]} ]]; then + if [[ ${weights[$a]} -ge ${weights[$b]} ]]; then echo "$a" return 0 else @@ -125,6 +136,21 @@ slowwait_for_counter() slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@" } +# Check for existence of tools which are built as part of selftests +# but may also already exist in $PATH +check_gen_prog() +{ + local prog_name=$1; shift + + if ! which $prog_name >/dev/null 2>/dev/null; then + PATH=$PWD:$PATH + if ! which $prog_name >/dev/null; then + echo "'$prog_name' command not found; skipping tests" + exit $ksft_skip + fi + fi +} + remove_ns_list() { local item=$1 @@ -146,6 +172,7 @@ cleanup_ns() for ns in "$@"; do [ -z "${ns}" ] && continue + ip netns pids "${ns}" 2> /dev/null | xargs -r kill || true ip netns delete "${ns}" &> /dev/null || true if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then echo "Warn: Failed to remove namespace $ns" @@ -190,11 +217,61 @@ setup_ns() return $ksft_skip fi ip -n "${!ns_name}" link set lo up + ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.all.rp_filter=0 + ip netns exec "${!ns_name}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ns_list+=("${!ns_name}") done NS_LIST+=("${ns_list[@]}") } +# Create netdevsim with given id and net namespace. +create_netdevsim() { + local id="$1" + local ns="$2" + + modprobe netdevsim &> /dev/null + udevadm settle + + echo "$id 1" | ip netns exec $ns tee /sys/bus/netdevsim/new_device >/dev/null + local dev=$(ip netns exec $ns ls /sys/bus/netdevsim/devices/netdevsim$id/net) + ip -netns $ns link set dev $dev name nsim$id + ip -netns $ns link set dev nsim$id up + + echo nsim$id +} + +create_netdevsim_port() { + local nsim_id="$1" + local ns="$2" + local port_id="$3" + local perm_addr="$4" + local orig_dev + local new_dev + local nsim_path + + nsim_path="/sys/bus/netdevsim/devices/netdevsim$nsim_id" + + echo "$port_id $perm_addr" | ip netns exec "$ns" tee "$nsim_path"/new_port > /dev/null || return 1 + + orig_dev=$(ip netns exec "$ns" find "$nsim_path"/net/ -maxdepth 1 -name 'e*' | tail -n 1) + orig_dev=$(basename "$orig_dev") + new_dev="nsim${nsim_id}p$port_id" + + ip -netns "$ns" link set dev "$orig_dev" name "$new_dev" + ip -netns "$ns" link set dev "$new_dev" up + + echo "$new_dev" +} + +# Remove netdevsim with given id. +cleanup_netdevsim() { + local id="$1" + + if [ -d "/sys/bus/netdevsim/devices/netdevsim$id/net" ]; then + echo "$id" > /sys/bus/netdevsim/del_device + fi +} + tc_rule_stats_get() { local dev=$1; shift @@ -217,3 +294,378 @@ tc_rule_handle_stats_get() | jq ".[] | select(.options.handle == $handle) | \ .options.actions[0].stats$selector" } + +# attach a qdisc with two children match/no-match and a flower filter to match +tc_set_flower_counter() { + local -r ns=$1 + local -r ipver=$2 + local -r dev=$3 + local -r flower_expr=$4 + + tc -n $ns qdisc add dev $dev root handle 1: prio bands 2 \ + priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 + + tc -n $ns qdisc add dev $dev parent 1:1 handle 11: pfifo + tc -n $ns qdisc add dev $dev parent 1:2 handle 12: pfifo + + tc -n $ns filter add dev $dev parent 1: protocol ipv$ipver \ + flower $flower_expr classid 1:2 +} + +tc_get_flower_counter() { + local -r ns=$1 + local -r dev=$2 + + tc -n $ns -j -s qdisc show dev $dev handle 12: | jq .[0].packets +} + +ret_set_ksft_status() +{ + local ksft_status=$1; shift + local msg=$1; shift + + RET=$(ksft_status_merge $RET $ksft_status) + if (( $? )); then + retmsg=$msg + fi +} + +log_test_result() +{ + local test_name=$1; shift + local opt_str=$1; shift + local result=$1; shift + local retmsg=$1 + + printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" + if [[ $retmsg ]]; then + printf "\t%s\n" "$retmsg" + fi +} + +pause_on_fail() +{ + if [[ $PAUSE_ON_FAIL == yes ]]; then + echo "Hit enter to continue, 'q' to quit" + read a + [[ $a == q ]] && exit 1 + fi +} + +handle_test_result_pass() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" " OK " +} + +handle_test_result_fail() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" FAIL "$retmsg" + pause_on_fail +} + +handle_test_result_xfail() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" XFAIL "$retmsg" + pause_on_fail +} + +handle_test_result_skip() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" SKIP "$retmsg" +} + +log_test() +{ + local test_name=$1 + local opt_str=$2 + + if [[ $# -eq 2 ]]; then + opt_str="($opt_str)" + fi + + if ((RET == ksft_pass)); then + handle_test_result_pass "$test_name" "$opt_str" + elif ((RET == ksft_xfail)); then + handle_test_result_xfail "$test_name" "$opt_str" + elif ((RET == ksft_skip)); then + handle_test_result_skip "$test_name" "$opt_str" + else + handle_test_result_fail "$test_name" "$opt_str" + fi + + EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET) + return $RET +} + +log_test_skip() +{ + RET=$ksft_skip retmsg= log_test "$@" +} + +log_test_xfail() +{ + RET=$ksft_xfail retmsg= log_test "$@" +} + +log_info() +{ + local msg=$1 + + echo "INFO: $msg" +} + +tests_run() +{ + local current_test + + for current_test in ${TESTS:-$ALL_TESTS}; do + in_defer_scope \ + $current_test + done +} + +# Whether FAILs should be interpreted as XFAILs. Internal. +FAIL_TO_XFAIL= + +check_err() +{ + local err=$1 + local msg=$2 + + if ((err)); then + if [[ $FAIL_TO_XFAIL = yes ]]; then + ret_set_ksft_status $ksft_xfail "$msg" + else + ret_set_ksft_status $ksft_fail "$msg" + fi + fi +} + +check_fail() +{ + local err=$1 + local msg=$2 + + check_err $((!err)) "$msg" +} + +check_err_fail() +{ + local should_fail=$1; shift + local err=$1; shift + local what=$1; shift + + if ((should_fail)); then + check_fail $err "$what succeeded, but should have failed" + else + check_err $err "$what failed" + fi +} + +xfail() +{ + FAIL_TO_XFAIL=yes "$@" +} + +xfail_on_slow() +{ + if [[ $KSFT_MACHINE_SLOW = yes ]]; then + FAIL_TO_XFAIL=yes "$@" + else + "$@" + fi +} + +omit_on_slow() +{ + if [[ $KSFT_MACHINE_SLOW != yes ]]; then + "$@" + fi +} + +xfail_on_veth() +{ + local dev=$1; shift + local kind + + kind=$(ip -j -d link show dev $dev | + jq -r '.[].linkinfo.info_kind') + if [[ $kind = veth ]]; then + FAIL_TO_XFAIL=yes "$@" + else + "$@" + fi +} + +mac_get() +{ + local if_name=$1 + + ip -j link show dev $if_name | jq -r '.[]["address"]' +} + +kill_process() +{ + local pid=$1; shift + + # Suppress noise from killing the process. + { kill $pid && wait $pid; } 2>/dev/null +} + +check_command() +{ + local cmd=$1; shift + + if [[ ! -x "$(command -v "$cmd")" ]]; then + log_test_skip "$cmd not installed" + return $EXIT_STATUS + fi +} + +require_command() +{ + local cmd=$1; shift + + if ! check_command "$cmd"; then + exit $EXIT_STATUS + fi +} + +adf_ip_link_add() +{ + local name=$1; shift + + ip link add name "$name" "$@" && \ + defer ip link del dev "$name" +} + +adf_ip_link_set_master() +{ + local member=$1; shift + local master=$1; shift + + ip link set dev "$member" master "$master" && \ + defer ip link set dev "$member" nomaster +} + +adf_ip_link_set_addr() +{ + local name=$1; shift + local addr=$1; shift + + local old_addr=$(mac_get "$name") + ip link set dev "$name" address "$addr" && \ + defer ip link set dev "$name" address "$old_addr" +} + +ip_link_has_flag() +{ + local name=$1; shift + local flag=$1; shift + + local state=$(ip -j link show "$name" | + jq --arg flag "$flag" 'any(.[].flags.[]; . == $flag)') + [[ $state == true ]] +} + +ip_link_is_up() +{ + ip_link_has_flag "$1" UP +} + +adf_ip_link_set_up() +{ + local name=$1; shift + + if ! ip_link_is_up "$name"; then + ip link set dev "$name" up && \ + defer ip link set dev "$name" down + fi +} + +adf_ip_link_set_down() +{ + local name=$1; shift + + if ip_link_is_up "$name"; then + ip link set dev "$name" down && \ + defer ip link set dev "$name" up + fi +} + +adf_ip_addr_add() +{ + local name=$1; shift + + ip addr add dev "$name" "$@" && \ + defer ip addr del dev "$name" "$@" +} + +adf_ip_route_add() +{ + ip route add "$@" && \ + defer ip route del "$@" +} + +adf_bridge_vlan_add() +{ + bridge vlan add "$@" && \ + defer bridge vlan del "$@" +} + +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + local protocol="${3}" + local pattern + local i + + pattern=":$(printf "%04X" "${port}") " + + # for tcp protocol additionally check the socket state + [ ${protocol} = "tcp" ] && pattern="${pattern}0A" + for i in $(seq 10); do + if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \ + /proc/net/"${protocol}"* | grep -q "${pattern}"; then + break + fi + sleep 0.1 + done +} + +cmd_jq() +{ + local cmd=$1 + local jq_exp=$2 + local jq_opts=$3 + local ret + local output + + output="$($cmd)" + # it the command fails, return error right away + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + output=$(echo $output | jq -r $jq_opts "$jq_exp") + ret=$? + if [[ $ret -ne 0 ]]; then + return $ret + fi + echo $output + # return success only in case of non-empty output + [ ! -z "$output" ] +} diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore index 1ebc6187f421..bbc97d6bf556 100644 --- a/tools/testing/selftests/net/lib/.gitignore +++ b/tools/testing/selftests/net/lib/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only csum +xdp_helper diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index 82c3264b115e..5339f56329e1 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -1,15 +1,24 @@ # SPDX-License-Identifier: GPL-2.0 -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES) # Additional include paths needed by kselftest.h CFLAGS += -I../../ -TEST_FILES := ../../../../../Documentation/netlink/specs -TEST_FILES += ../../../../net/ynl +TEST_FILES := \ + ../../../../net/ynl \ + ../../../../../Documentation/netlink/specs \ + ksft_setup_loopback.sh \ +# end of TEST_FILES -TEST_GEN_FILES += csum +TEST_GEN_FILES := \ + $(patsubst %.c,%.o,$(wildcard *.bpf.c)) \ + csum \ + xdp_helper \ +# end of TEST_GEN_FILES -TEST_INCLUDES := $(wildcard py/*.py) +TEST_INCLUDES := $(wildcard py/*.py sh/*.sh) include ../../lib.mk + +include ../bpf.mk diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c index b9f3fc3c3426..27437590eeb5 100644 --- a/tools/testing/selftests/net/lib/csum.c +++ b/tools/testing/selftests/net/lib/csum.c @@ -654,10 +654,16 @@ static int recv_verify_packet_ipv4(void *nh, int len) { struct iphdr *iph = nh; uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + uint16_t ip_len; if (len < sizeof(*iph) || iph->protocol != proto) return -1; + ip_len = ntohs(iph->tot_len); + if (ip_len > len || ip_len < sizeof(*iph)) + return -1; + + len = ip_len; iph_addr_p = &iph->saddr; if (proto == IPPROTO_TCP) return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph)); @@ -669,16 +675,20 @@ static int recv_verify_packet_ipv6(void *nh, int len) { struct ipv6hdr *ip6h = nh; uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + uint16_t payload_len; if (len < sizeof(*ip6h) || ip6h->nexthdr != proto) return -1; - iph_addr_p = &ip6h->saddr; + payload_len = ntohs(ip6h->payload_len); + if (payload_len > len - sizeof(*ip6h)) + return -1; + iph_addr_p = &ip6h->saddr; if (proto == IPPROTO_TCP) - return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h)); + return recv_verify_packet_tcp(ip6h + 1, payload_len); else - return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h)); + return recv_verify_packet_udp(ip6h + 1, payload_len); } /* return whether auxdata includes TP_STATUS_CSUM_VALID */ diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h new file mode 100644 index 000000000000..17dc34a612c6 --- /dev/null +++ b/tools/testing/selftests/net/lib/ksft.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(__NET_KSFT_H__) +#define __NET_KSFT_H__ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +static inline void ksft_ready(void) +{ + const char msg[7] = "ready\n"; + char *env_str; + int fd; + + env_str = getenv("KSFT_READY_FD"); + if (env_str) { + fd = atoi(env_str); + if (!fd) { + fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n", + env_str); + return; + } + } else { + fd = STDOUT_FILENO; + } + + write(fd, msg, sizeof(msg)); + if (fd != STDOUT_FILENO) + close(fd); +} + +static inline void ksft_wait(void) +{ + char *env_str; + char byte; + int fd; + + env_str = getenv("KSFT_WAIT_FD"); + if (env_str) { + fd = atoi(env_str); + if (!fd) { + fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n", + env_str); + return; + } + } else { + /* Not running in KSFT env, wait for input from STDIN instead */ + fd = STDIN_FILENO; + } + + read(fd, &byte, sizeof(byte)); + if (fd != STDIN_FILENO) + close(fd); +} + +#endif diff --git a/tools/testing/selftests/net/lib/ksft_setup_loopback.sh b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh new file mode 100755 index 000000000000..3defbb1919c5 --- /dev/null +++ b/tools/testing/selftests/net/lib/ksft_setup_loopback.sh @@ -0,0 +1,111 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Setup script for running ksft tests over a real interface in loopback mode. +# This scripts replaces the historical setup_loopback.sh. It puts +# a (presumably) real hardware interface into loopback mode, creates macvlan +# interfaces on top and places them in a network namespace for isolation. +# +# NETIF env variable must be exported to indicate the real target device. +# Note that the test will override NETIF with one of the macvlans, the +# actual ksft test will only see the macvlans. +# +# Example use: +# export NETIF=eth0 +# ./net/lib/ksft_setup_loopback.sh ./drivers/net/gro.py + +if [ -z "$NETIF" ]; then + echo "Error: NETIF variable not set" + exit 1 +fi +if ! [ -d "/sys/class/net/$NETIF" ]; then + echo "Error: Can't find $NETIF, invalid netdevice" + exit 1 +fi + +# Save original settings for cleanup +readonly FLUSH_PATH="/sys/class/net/${NETIF}/gro_flush_timeout" +readonly IRQ_PATH="/sys/class/net/${NETIF}/napi_defer_hard_irqs" +FLUSH_TIMEOUT="$(< "${FLUSH_PATH}")" +readonly FLUSH_TIMEOUT +HARD_IRQS="$(< "${IRQ_PATH}")" +readonly HARD_IRQS + +SERVER_NS=$(mktemp -u server-XXXXXXXX) +readonly SERVER_NS +CLIENT_NS=$(mktemp -u client-XXXXXXXX) +readonly CLIENT_NS +readonly SERVER_MAC="aa:00:00:00:00:02" +readonly CLIENT_MAC="aa:00:00:00:00:01" + +# ksft expects addresses to communicate with remote +export LOCAL_V6=2001:db8:1::1 +export REMOTE_V6=2001:db8:1::2 + +cleanup() { + local exit_code=$? + + echo "Cleaning up..." + + # Remove macvlan interfaces and namespaces + ip -netns "${SERVER_NS}" link del dev server 2>/dev/null || true + ip netns del "${SERVER_NS}" 2>/dev/null || true + ip -netns "${CLIENT_NS}" link del dev client 2>/dev/null || true + ip netns del "${CLIENT_NS}" 2>/dev/null || true + + # Disable loopback + ethtool -K "${NETIF}" loopback off 2>/dev/null || true + sleep 1 + + echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}" + echo "${HARD_IRQS}" >"${IRQ_PATH}" + + exit $exit_code +} + +trap cleanup EXIT INT TERM + +# Enable loopback mode +echo "Enabling loopback on ${NETIF}..." +ethtool -K "${NETIF}" loopback on || { + echo "Failed to enable loopback mode" + exit 1 +} +# The interface may need time to get carrier back, but selftests +# will wait for carrier, so no need to wait / sleep here. + +# Use timer on host to trigger the network stack +# Also disable device interrupt to not depend on NIC interrupt +# Reduce test flakiness caused by unexpected interrupts +echo 100000 >"${FLUSH_PATH}" +echo 50 >"${IRQ_PATH}" + +# Create server namespace with macvlan +ip netns add "${SERVER_NS}" +ip link add link "${NETIF}" dev server address "${SERVER_MAC}" type macvlan +ip link set dev server netns "${SERVER_NS}" +ip -netns "${SERVER_NS}" link set dev server up +ip -netns "${SERVER_NS}" addr add $LOCAL_V6/64 dev server +ip -netns "${SERVER_NS}" link set dev lo up + +# Create client namespace with macvlan +ip netns add "${CLIENT_NS}" +ip link add link "${NETIF}" dev client address "${CLIENT_MAC}" type macvlan +ip link set dev client netns "${CLIENT_NS}" +ip -netns "${CLIENT_NS}" link set dev client up +ip -netns "${CLIENT_NS}" addr add $REMOTE_V6/64 dev client +ip -netns "${CLIENT_NS}" link set dev lo up + +echo "Setup complete!" +echo " Device: ${NETIF}" +echo " Server NS: ${SERVER_NS}" +echo " Client NS: ${CLIENT_NS}" +echo "" + +# Setup environment variables for tests +export NETIF=server +export REMOTE_TYPE=netns +export REMOTE_ARGS="${CLIENT_NS}" + +# Run the command +ip netns exec "${SERVER_NS}" "$@" diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index b6d498d125fe..40f9ce307dd1 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -1,8 +1,33 @@ # SPDX-License-Identifier: GPL-2.0 +""" +Python selftest helpers for netdev. +""" + from .consts import KSRC -from .ksft import * -from .netns import NetNS -from .nsim import * -from .utils import * -from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily +from .ksft import KsftFailEx, KsftSkipEx, KsftXfailEx, ksft_pr, ksft_eq, \ + ksft_ne, ksft_true, ksft_not_none, ksft_in, ksft_not_in, ksft_is, \ + ksft_ge, ksft_gt, ksft_lt, ksft_raises, ksft_busy_wait, \ + ktap_result, ksft_disruptive, ksft_setup, ksft_run, ksft_exit, \ + ksft_variants, KsftNamedVariant +from .netns import NetNS, NetNSEnter +from .nsim import NetdevSim, NetdevSimDev +from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \ + bpftool, ip, ethtool, bpftrace, rand_port, wait_port_listen, wait_file +from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily +from .ynl import NetshaperFamily, DevlinkFamily, PSPFamily + +__all__ = ["KSRC", + "KsftFailEx", "KsftSkipEx", "KsftXfailEx", "ksft_pr", "ksft_eq", + "ksft_ne", "ksft_true", "ksft_not_none", "ksft_in", "ksft_not_in", + "ksft_is", "ksft_ge", "ksft_gt", "ksft_lt", "ksft_raises", + "ksft_busy_wait", "ktap_result", "ksft_disruptive", "ksft_setup", + "ksft_run", "ksft_exit", "ksft_variants", "KsftNamedVariant", + "NetNS", "NetNSEnter", + "CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer", + "bpftool", "ip", "ethtool", "bpftrace", "rand_port", + "wait_port_listen", "wait_file", + "NetdevSim", "NetdevSimDev", + "NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError", + "YnlFamily", "EthtoolFamily", "NetdevFamily", "RtnlFamily", + "RtnlAddrFamily"] diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index f26c20df9db4..531e7fa1b3ea 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -1,15 +1,18 @@ # SPDX-License-Identifier: GPL-2.0 -import builtins +import functools import inspect +import signal import sys import time import traceback +from collections import namedtuple from .consts import KSFT_MAIN_NAME from .utils import global_defer_queue KSFT_RESULT = None KSFT_RESULT_ALL = True +KSFT_DISRUPTIVE = True class KsftFailEx(Exception): @@ -24,7 +27,12 @@ class KsftXfailEx(Exception): pass +class KsftTerminate(KeyboardInterrupt): + pass + + def ksft_pr(*objs, **kwargs): + kwargs["flush"] = True print("#", *objs, **kwargs) @@ -32,8 +40,18 @@ def _fail(*args): global KSFT_RESULT KSFT_RESULT = False - frame = inspect.stack()[2] - ksft_pr("At " + frame.filename + " line " + str(frame.lineno) + ":") + stack = inspect.stack() + started = False + for frame in reversed(stack[2:]): + # Start printing from the test case function + if not started: + if frame.function == 'ksft_run': + started = True + continue + + ksft_pr("Check| At " + frame.filename + ", line " + str(frame.lineno) + + ", in " + frame.function + ":") + ksft_pr("Check| " + frame.code_context[0].strip()) ksft_pr(*args) @@ -43,21 +61,47 @@ def ksft_eq(a, b, comment=""): _fail("Check failed", a, "!=", b, comment) +def ksft_ne(a, b, comment=""): + global KSFT_RESULT + if a == b: + _fail("Check failed", a, "==", b, comment) + + def ksft_true(a, comment=""): if not a: _fail("Check failed", a, "does not eval to True", comment) +def ksft_not_none(a, comment=""): + if a is None: + _fail("Check failed", a, "is None", comment) + + def ksft_in(a, b, comment=""): if a not in b: _fail("Check failed", a, "not in", b, comment) +def ksft_not_in(a, b, comment=""): + if a in b: + _fail("Check failed", a, "in", b, comment) + + +def ksft_is(a, b, comment=""): + if a is not b: + _fail("Check failed", a, "is not", b, comment) + + def ksft_ge(a, b, comment=""): if a < b: _fail("Check failed", a, "<", b, comment) +def ksft_gt(a, b, comment=""): + if a <= b: + _fail("Check failed", a, "<=", b, comment) + + def ksft_lt(a, b, comment=""): if a >= b: _fail("Check failed", a, ">=", b, comment) @@ -92,7 +136,7 @@ def ksft_busy_wait(cond, sleep=0.005, deadline=1, comment=""): time.sleep(sleep) -def ktap_result(ok, cnt=1, case="", comment=""): +def ktap_result(ok, cnt=1, case_name="", comment=""): global KSFT_RESULT_ALL KSFT_RESULT_ALL = KSFT_RESULT_ALL and ok @@ -102,11 +146,11 @@ def ktap_result(ok, cnt=1, case="", comment=""): res += "ok " res += str(cnt) + " " res += KSFT_MAIN_NAME - if case: - res += "." + str(case.__name__) + if case_name: + res += "." + case_name if comment: res += " # " + comment - print(res) + print(res, flush=True) def ksft_flush_defer(): @@ -119,7 +163,7 @@ def ksft_flush_defer(): entry = global_defer_queue.pop() try: entry.exec_only() - except: + except Exception: ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!") tb = traceback.format_exc() for line in tb.strip().split('\n'): @@ -127,9 +171,102 @@ def ksft_flush_defer(): KSFT_RESULT = False -def ksft_run(cases=None, globs=None, case_pfx=None, args=()): +KsftCaseFunction = namedtuple("KsftCaseFunction", + ['name', 'original_func', 'variants']) + + +def ksft_disruptive(func): + """ + Decorator that marks the test as disruptive (e.g. the test + that can down the interface). Disruptive tests can be skipped + by passing DISRUPTIVE=False environment variable. + """ + + @functools.wraps(func) + def wrapper(*args, **kwargs): + if not KSFT_DISRUPTIVE: + raise KsftSkipEx("marked as disruptive") + return func(*args, **kwargs) + return wrapper + + +class KsftNamedVariant: + """ Named string name + argument list tuple for @ksft_variants """ + + def __init__(self, name, *params): + self.params = params + self.name = name or "_".join([str(x) for x in self.params]) + + +def ksft_variants(params): + """ + Decorator defining the sets of inputs for a test. + The parameters will be included in the name of the resulting sub-case. + Parameters can be either single object, tuple or a KsftNamedVariant. + The argument can be a list or a generator. + + Example: + + @ksft_variants([ + (1, "a"), + (2, "b"), + KsftNamedVariant("three", 3, "c"), + ]) + def my_case(cfg, a, b): + pass # ... + + ksft_run(cases=[my_case], args=(cfg, )) + + Will generate cases: + my_case.1_a + my_case.2_b + my_case.three + """ + + return lambda func: KsftCaseFunction(func.__name__, func, params) + + +def ksft_setup(env): + """ + Setup test framework global state from the environment. + """ + + def get_bool(env, name): + value = env.get(name, "").lower() + if value in ["yes", "true"]: + return True + if value in ["no", "false"]: + return False + try: + return bool(int(value)) + except Exception: + raise Exception(f"failed to parse {name}") + + if "DISRUPTIVE" in env: + global KSFT_DISRUPTIVE + KSFT_DISRUPTIVE = get_bool(env, "DISRUPTIVE") + + return env + + +def _ksft_intr(signum, frame): + # ksft runner.sh sends 2 SIGTERMs in a row on a timeout + # if we don't ignore the second one it will stop us from handling cleanup + global term_cnt + term_cnt += 1 + if term_cnt == 1: + raise KsftTerminate() + else: + ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...") + + +def _ksft_generate_test_cases(cases, globs, case_pfx, args): + """Generate a flat list of (func, args, name) tuples""" + cases = cases or [] + test_cases = [] + # If using the globs method find all relevant functions if globs and case_pfx: for key, value in globs.items(): if not callable(value): @@ -139,22 +276,47 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases.append(value) break + for func in cases: + if isinstance(func, KsftCaseFunction): + # Parametrized test - create case for each param + for param in func.variants: + if not isinstance(param, KsftNamedVariant): + if not isinstance(param, tuple): + param = (param, ) + param = KsftNamedVariant(None, *param) + + test_cases.append((func.original_func, + (*args, *param.params), + func.name + "." + param.name)) + else: + test_cases.append((func, args, func.__name__)) + + return test_cases + + +def ksft_run(cases=None, globs=None, case_pfx=None, args=()): + test_cases = _ksft_generate_test_cases(cases, globs, case_pfx, args) + + global term_cnt + term_cnt = 0 + prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr) + totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} - print("KTAP version 1") - print("1.." + str(len(cases))) + print("TAP version 13", flush=True) + print("1.." + str(len(test_cases)), flush=True) global KSFT_RESULT cnt = 0 stop = False - for case in cases: + for func, args, name in test_cases: KSFT_RESULT = True cnt += 1 comment = "" cnt_key = "" try: - case(*args) + func(*args) except KsftSkipEx as e: comment = "SKIP " + str(e) cnt_key = 'skip' @@ -167,21 +329,37 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): for line in tb.strip().split('\n'): ksft_pr("Exception|", line) if stop: - ksft_pr("Stopping tests due to KeyboardInterrupt.") + ksft_pr(f"Stopping tests due to {type(e).__name__}.") KSFT_RESULT = False cnt_key = 'fail' - ksft_flush_defer() + try: + ksft_flush_defer() + except BaseException as e: + tb = traceback.format_exc() + for line in tb.strip().split('\n'): + ksft_pr("Exception|", line) + if isinstance(e, KeyboardInterrupt): + ksft_pr() + ksft_pr("WARN: defer() interrupted, cleanup may be incomplete.") + ksft_pr(" Attempting to finish cleanup before exiting.") + ksft_pr(" Interrupt again to exit immediately.") + ksft_pr() + stop = True + # Flush was interrupted, try to finish the job best we can + ksft_flush_defer() if not cnt_key: cnt_key = 'pass' if KSFT_RESULT else 'fail' - ktap_result(KSFT_RESULT, cnt, case, comment=comment) + ktap_result(KSFT_RESULT, cnt, name, comment=comment) totals[cnt_key] += 1 if stop: break + signal.signal(signal.SIGTERM, prev_sigterm) + print( f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0" ) diff --git a/tools/testing/selftests/net/lib/py/netns.py b/tools/testing/selftests/net/lib/py/netns.py index ecff85f9074f..8e9317044eef 100644 --- a/tools/testing/selftests/net/lib/py/netns.py +++ b/tools/testing/selftests/net/lib/py/netns.py @@ -1,9 +1,12 @@ # SPDX-License-Identifier: GPL-2.0 from .utils import ip +import ctypes import random import string +libc = ctypes.cdll.LoadLibrary('libc.so.6') + class NetNS: def __init__(self, name=None): @@ -29,3 +32,18 @@ class NetNS: def __repr__(self): return f"NetNS({self.name})" + + +class NetNSEnter: + def __init__(self, ns_name): + self.ns_path = f"/run/netns/{ns_name}" + + def __enter__(self): + self.saved = open("/proc/thread-self/ns/net") + with open(self.ns_path) as ns_file: + libc.setns(ns_file.fileno(), 0) + return self + + def __exit__(self, exc_type, exc_value, traceback): + libc.setns(self.saved.fileno(), 0) + self.saved.close() diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py index f571a8b3139b..7c640ed64c0b 100644 --- a/tools/testing/selftests/net/lib/py/nsim.py +++ b/tools/testing/selftests/net/lib/py/nsim.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +import errno import json import os import random @@ -26,7 +27,7 @@ class NetdevSim: self.port_index = port_index self.ns = ns self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index) - ret = ip("-j link show dev %s" % ifname, ns=ns) + ret = ip("-d -j link show dev %s" % ifname, ns=ns) self.dev = json.loads(ret.stdout)[0] self.ifindex = self.dev["ifindex"] diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 72590c3f90f1..106ee1f2df86 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -1,33 +1,80 @@ # SPDX-License-Identifier: GPL-2.0 -import errno import json as _json -import random +import os import re +import select import socket import subprocess import time class CmdExitFailure(Exception): - pass + def __init__(self, msg, cmd_obj): + super().__init__(msg) + self.cmd = cmd_obj + + +def fd_read_timeout(fd, timeout): + rlist, _, _ = select.select([fd], [], [], timeout) + if rlist: + return os.read(fd, 1024) + raise TimeoutError("Timeout waiting for fd read") class cmd: - def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None, timeout=5): + """ + Execute a command on local or remote host. + + @shell defaults to false, and class will try to split @comm into a list + if it's a string with spaces. + + Use bkg() instead to run a command in the background. + """ + def __init__(self, comm, shell=None, fail=True, ns=None, background=False, + host=None, timeout=5, ksft_ready=None, ksft_wait=None): if ns: comm = f'ip netns exec {ns} ' + comm self.stdout = None self.stderr = None self.ret = None + self.ksft_term_fd = None self.comm = comm if host: self.proc = host.cmd(comm) else: + # If user doesn't explicitly request shell try to avoid it. + if shell is None and isinstance(comm, str) and ' ' in comm: + comm = comm.split() + + # ksft_wait lets us wait for the background process to fully start, + # we pass an FD to the child process, and wait for it to write back. + # Similarly term_fd tells child it's time to exit. + pass_fds = [] + env = os.environ.copy() + if ksft_wait is not None: + wait_fd, self.ksft_term_fd = os.pipe() + pass_fds.append(wait_fd) + env["KSFT_WAIT_FD"] = str(wait_fd) + ksft_ready = True # ksft_wait implies ready + if ksft_ready is not None: + rfd, ready_fd = os.pipe() + pass_fds.append(ready_fd) + env["KSFT_READY_FD"] = str(ready_fd) + self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE, - stderr=subprocess.PIPE) + stderr=subprocess.PIPE, pass_fds=pass_fds, + env=env) + if ksft_wait is not None: + os.close(wait_fd) + if ksft_ready is not None: + os.close(ready_fd) + msg = fd_read_timeout(rfd, ksft_wait) + os.close(rfd) + if not msg: + raise Exception("Did not receive ready message") if not background: self.process(terminate=False, fail=fail, timeout=timeout) @@ -35,6 +82,8 @@ class cmd: if fail is None: fail = not terminate + if self.ksft_term_fd: + os.write(self.ksft_term_fd, b"1") if terminate: self.proc.terminate() stdout, stderr = self.proc.communicate(timeout) @@ -48,22 +97,48 @@ class cmd: if len(stderr) > 0 and stderr[-1] == "\n": stderr = stderr[:-1] raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" % - (self.proc.args, stdout, stderr)) + (self.proc.args, stdout, stderr), self) class bkg(cmd): - def __init__(self, comm, shell=True, fail=None, ns=None, host=None, - exit_wait=False): + """ + Run a command in the background. + + Examples usage: + + Run a command on remote host, and wait for it to finish. + This is usually paired with wait_port_listen() to make sure + the command has initialized: + + with bkg("socat ...", exit_wait=True, host=cfg.remote) as nc: + ... + + Run a command and expect it to let us know that it's ready + by writing to a special file descriptor passed via KSFT_READY_FD. + Command will be terminated when we exit the context manager: + + with bkg("my_binary", ksft_wait=5): + """ + def __init__(self, comm, shell=None, fail=None, ns=None, host=None, + exit_wait=False, ksft_ready=None, ksft_wait=None): super().__init__(comm, background=True, - shell=shell, fail=fail, ns=ns, host=host) - self.terminate = not exit_wait + shell=shell, fail=fail, ns=ns, host=host, + ksft_ready=ksft_ready, ksft_wait=ksft_wait) + self.terminate = not exit_wait and not ksft_wait + self._exit_wait = exit_wait self.check_fail = fail + if shell and self.terminate: + print("# Warning: combining shell and terminate is risky!") + print("# SIGTERM may not reach the child on zsh/ksh!") + def __enter__(self): return self def __exit__(self, ex_type, ex_value, ex_tb): - return self.process(terminate=self.terminate, fail=self.check_fail) + # Force termination on exception + terminate = self.terminate or (self._exit_wait and ex_type) + return self.process(terminate=terminate, fail=self.check_fail) global_defer_queue = [] @@ -71,8 +146,6 @@ global_defer_queue = [] class defer: def __init__(self, func, *args, **kwargs): - global global_defer_queue - if not callable(func): raise Exception("defer created with un-callable object, did you call the function instead of passing its name?") @@ -111,6 +184,10 @@ def tool(name, args, json=None, ns=None, host=None): return cmd_obj +def bpftool(args, json=None, ns=None, host=None): + return tool('bpftool', args, json=json, ns=ns, host=host) + + def ip(args, json=None, ns=None, host=None): if ns: args = f'-netns {ns} ' + args @@ -121,20 +198,48 @@ def ethtool(args, json=None, ns=None, host=None): return tool('ethtool', args, json=json, ns=ns, host=host) -def rand_port(): +def bpftrace(expr, json=None, ns=None, host=None, timeout=None): """ - Get a random unprivileged port, try to make sure it's not already used. + Run bpftrace and return map data (if json=True). + The output of bpftrace is inconvenient, so the helper converts + to a dict indexed by map name, e.g.: + { + "@": { ... }, + "@map2": { ... }, + } """ - for _ in range(1000): - port = random.randint(10000, 65535) - try: - with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s: - s.bind(("", port)) - return port - except OSError as e: - if e.errno != errno.EADDRINUSE: - raise - raise Exception("Can't find any free unprivileged port") + cmd_arr = ['bpftrace'] + # Throw in --quiet if json, otherwise the output has two objects + if json: + cmd_arr += ['-f', 'json', '-q'] + if timeout: + expr += ' interval:s:' + str(timeout) + ' { exit(); }' + cmd_arr += ['-e', expr] + cmd_obj = cmd(cmd_arr, ns=ns, host=host, shell=False) + if json: + # bpftrace prints objects as lines + ret = {} + for l in cmd_obj.stdout.split('\n'): + if not l.strip(): + continue + one = _json.loads(l) + if one.get('type') != 'map': + continue + for k, v in one["data"].items(): + if k.startswith('@'): + k = k.lstrip('@') + ret[k] = v + return ret + return cmd_obj + + +def rand_port(stype=socket.SOCK_STREAM): + """ + Get a random unprivileged port. + """ + with socket.socket(socket.AF_INET6, stype) as s: + s.bind(("", 0)) + return s.getsockname()[1] def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5): @@ -153,3 +258,21 @@ def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadlin if time.monotonic() > end: raise Exception("Waiting for port listen timed out") time.sleep(sleep) + + +def wait_file(fname, test_fn, sleep=0.005, deadline=5, encoding='utf-8'): + """ + Wait for file contents on the local system to satisfy a condition. + test_fn() should take one argument (file contents) and return whether + condition is met. + """ + end = time.monotonic() + deadline + + with open(fname, "r", encoding=encoding) as fp: + while True: + if test_fn(fp.read()): + break + fp.seek(0) + if time.monotonic() > end: + raise TimeoutError("Wait for file contents failed", fname) + time.sleep(sleep) diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index 1ace58370c06..32c223e93b2c 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -13,14 +13,14 @@ try: SPEC_PATH = KSFT_DIR / "net/lib/specs" sys.path.append(tools_full_path.as_posix()) - from net.lib.ynl.lib import YnlFamily, NlError + from net.lib.ynl.pyynl.lib import YnlFamily, NlError else: # Running in tree tools_full_path = KSRC / "tools" SPEC_PATH = KSRC / "Documentation/netlink/specs" sys.path.append(tools_full_path.as_posix()) - from net.ynl.lib import YnlFamily, NlError + from net.ynl.pyynl.lib import YnlFamily, NlError except ModuleNotFoundError as e: ksft_pr("Failed importing `ynl` library from kernel sources") ksft_pr(str(e)) @@ -32,18 +32,37 @@ except ModuleNotFoundError as e: # Set schema='' to avoid jsonschema validation, it's slow # class EthtoolFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('ethtool.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) class RtnlFamily(YnlFamily): - def __init__(self): - super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(), - schema='') + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('rt-link.yaml')).as_posix(), + schema='', recv_size=recv_size) +class RtnlAddrFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('rt-addr.yaml')).as_posix(), + schema='', recv_size=recv_size) class NetdevFamily(YnlFamily): - def __init__(self): + def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(), - schema='') + schema='', recv_size=recv_size) + +class NetshaperFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(), + schema='', recv_size=recv_size) + +class DevlinkFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(), + schema='', recv_size=recv_size) + +class PSPFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('psp.yaml')).as_posix(), + schema='', recv_size=recv_size) diff --git a/tools/testing/selftests/net/lib/sh/defer.sh b/tools/testing/selftests/net/lib/sh/defer.sh new file mode 100644 index 000000000000..47ab78c4d465 --- /dev/null +++ b/tools/testing/selftests/net/lib/sh/defer.sh @@ -0,0 +1,131 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# Whether to pause and allow debugging when an executed deferred command has a +# non-zero exit code. +: "${DEFER_PAUSE_ON_FAIL:=no}" + +# map[(scope_id,track,cleanup_id) -> cleanup_command] +# track={d=default | p=priority} +declare -A __DEFER__JOBS + +# map[(scope_id,track) -> # cleanup_commands] +declare -A __DEFER__NJOBS + +# scope_id of the topmost scope. +__DEFER__SCOPE_ID=0 + +__defer__ndefer_key() +{ + local track=$1; shift + + echo $__DEFER__SCOPE_ID,$track +} + +__defer__defer_key() +{ + local track=$1; shift + local defer_ix=$1; shift + + echo $__DEFER__SCOPE_ID,$track,$defer_ix +} + +__defer__ndefers() +{ + local track=$1; shift + + echo ${__DEFER__NJOBS[$(__defer__ndefer_key $track)]} +} + +__defer__run() +{ + local track=$1; shift + local defer_ix=$1; shift + local defer_key=$(__defer__defer_key $track $defer_ix) + local ret + + eval ${__DEFER__JOBS[$defer_key]} + ret=$? + + if [[ "$DEFER_PAUSE_ON_FAIL" == yes && "$ret" -ne 0 ]]; then + echo "Deferred command (track $track index $defer_ix):" + echo " ${__DEFER__JOBS[$defer_key]}" + echo "... ended with an exit status of $ret" + echo "Hit enter to continue, 'q' to quit" + read a + [[ "$a" == q ]] && exit 1 + fi + + unset __DEFER__JOBS[$defer_key] +} + +__defer__schedule() +{ + local track=$1; shift + local ndefers=$(__defer__ndefers $track) + local ndefers_key=$(__defer__ndefer_key $track) + local defer_key=$(__defer__defer_key $track $ndefers) + local defer="${@@Q}" + + __DEFER__JOBS[$defer_key]="$defer" + __DEFER__NJOBS[$ndefers_key]=$((ndefers + 1)) +} + +__defer__scope_wipe() +{ + __DEFER__NJOBS[$(__defer__ndefer_key d)]=0 + __DEFER__NJOBS[$(__defer__ndefer_key p)]=0 +} + +defer_scope_push() +{ + ((__DEFER__SCOPE_ID++)) + __defer__scope_wipe +} + +defer_scope_pop() +{ + local defer_ix + + for ((defer_ix=$(__defer__ndefers p); defer_ix-->0; )); do + __defer__run p $defer_ix + done + + for ((defer_ix=$(__defer__ndefers d); defer_ix-->0; )); do + __defer__run d $defer_ix + done + + __defer__scope_wipe + ((__DEFER__SCOPE_ID--)) +} + +defer() +{ + __defer__schedule d "$@" +} + +defer_prio() +{ + __defer__schedule p "$@" +} + +defer_scopes_cleanup() +{ + while ((__DEFER__SCOPE_ID >= 0)); do + defer_scope_pop + done +} + +in_defer_scope() +{ + local ret + + defer_scope_push + "$@" + ret=$? + defer_scope_pop + + return $ret +} + +__defer__scope_wipe diff --git a/tools/testing/selftests/net/xdp_dummy.bpf.c b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c index d988b2e0cee8..e73fab3edd9f 100644 --- a/tools/testing/selftests/net/xdp_dummy.bpf.c +++ b/tools/testing/selftests/net/lib/xdp_dummy.bpf.c @@ -10,4 +10,10 @@ int xdp_dummy_prog(struct xdp_md *ctx) return XDP_PASS; } +SEC("xdp.frags") +int xdp_dummy_prog_frags(struct xdp_md *ctx) +{ + return XDP_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/net/lib/xdp_helper.c b/tools/testing/selftests/net/lib/xdp_helper.c new file mode 100644 index 000000000000..eb025a9f35b1 --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_helper.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <linux/if_xdp.h> +#include <linux/if_link.h> +#include <net/if.h> +#include <inttypes.h> + +#include "ksft.h" + +#define UMEM_SZ (1U << 16) +#define NUM_DESC (UMEM_SZ / 2048) + + +static void print_usage(const char *bin) +{ + fprintf(stderr, "Usage: %s ifindex queue_id [-z]\n\n" + "where:\n\t-z: force zerocopy mode", bin); +} + +/* this is a simple helper program that creates an XDP socket and does the + * minimum necessary to get bind() to succeed. + * + * this test program is not intended to actually process packets, but could be + * extended in the future if that is actually needed. + * + * it is used by queues.py to ensure the xsk netlinux attribute is set + * correctly. + */ +int main(int argc, char **argv) +{ + struct xdp_umem_reg umem_reg = { 0 }; + struct sockaddr_xdp sxdp = { 0 }; + int num_desc = NUM_DESC; + void *umem_area; + int retry = 0; + int ifindex; + int sock_fd; + int queue; + + if (argc != 3 && argc != 4) { + print_usage(argv[0]); + return 1; + } + + sock_fd = socket(AF_XDP, SOCK_RAW, 0); + if (sock_fd < 0) { + perror("socket creation failed"); + /* if the kernel doesn't support AF_XDP, let the test program + * know with -1. All other error paths return 1. + */ + if (errno == EAFNOSUPPORT) + return -1; + return 1; + } + + /* "Probing mode", just checking if AF_XDP sockets are supported */ + if (!strcmp(argv[1], "-") && !strcmp(argv[2], "-")) { + printf("AF_XDP support detected\n"); + close(sock_fd); + return 0; + } + + ifindex = atoi(argv[1]); + queue = atoi(argv[2]); + + umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (umem_area == MAP_FAILED) { + perror("mmap failed"); + return 1; + } + + umem_reg.addr = (uintptr_t)umem_area; + umem_reg.len = UMEM_SZ; + umem_reg.chunk_size = 2048; + umem_reg.headroom = 0; + + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg, + sizeof(umem_reg)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc)); + + sxdp.sxdp_family = AF_XDP; + sxdp.sxdp_ifindex = ifindex; + sxdp.sxdp_queue_id = queue; + sxdp.sxdp_flags = 0; + + if (argc > 3) { + if (!strcmp(argv[3], "-z")) { + sxdp.sxdp_flags = XDP_ZEROCOPY; + } else { + print_usage(argv[0]); + return 1; + } + } + + while (1) { + if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0) + break; + + if (errno == EBUSY && retry < 3) { + retry++; + sleep(1); + continue; + } else { + perror("bind failed"); + munmap(umem_area, UMEM_SZ); + close(sock_fd); + return 1; + } + } + + ksft_ready(); + ksft_wait(); + + /* parent program will write a byte to stdin when its ready for this + * helper to exit + */ + + close(sock_fd); + return 0; +} diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c new file mode 100644 index 000000000000..64f05229ab24 --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -0,0 +1,680 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +#define MAX_ADJST_OFFSET 256 +#define MAX_PAYLOAD_LEN 5000 +#define MAX_HDR_LEN 64 + +extern int bpf_xdp_pull_data(struct xdp_md *xdp, __u32 len) __ksym __weak; + +enum { + XDP_MODE = 0, + XDP_PORT = 1, + XDP_ADJST_OFFSET = 2, + XDP_ADJST_TAG = 3, +} xdp_map_setup_keys; + +enum { + XDP_MODE_PASS = 0, + XDP_MODE_DROP = 1, + XDP_MODE_TX = 2, + XDP_MODE_TAIL_ADJST = 3, + XDP_MODE_HEAD_ADJST = 4, +} xdp_map_modes; + +enum { + STATS_RX = 0, + STATS_PASS = 1, + STATS_DROP = 2, + STATS_TX = 3, + STATS_ABORT = 4, +} xdp_stats; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 5); + __type(key, __u32); + __type(value, __s32); +} map_xdp_setup SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 5); + __type(key, __u32); + __type(value, __u64); +} map_xdp_stats SEC(".maps"); + +static __u32 min(__u32 a, __u32 b) +{ + return a < b ? a : b; +} + +static void record_stats(struct xdp_md *ctx, __u32 stat_type) +{ + __u64 *count; + + count = bpf_map_lookup_elem(&map_xdp_stats, &stat_type); + + if (count) + __sync_fetch_and_add(count, 1); +} + +static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + void *data, *data_end; + struct ethhdr *eth; + int err; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth)); + if (err) + return NULL; + + data_end = (void *)(long)ctx->data_end; + data = eth = (void *)(long)ctx->data; + + if (data + sizeof(*eth) > data_end) + return NULL; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) + + sizeof(*udph)); + if (err) + return NULL; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + iph = data + sizeof(*eth); + + if (iph + 1 > (struct iphdr *)data_end || + iph->protocol != IPPROTO_UDP) + return NULL; + + udph = data + sizeof(*iph) + sizeof(*eth); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) + + sizeof(*udph)); + if (err) + return NULL; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + ipv6h = data + sizeof(*eth); + + if (ipv6h + 1 > (struct ipv6hdr *)data_end || + ipv6h->nexthdr != IPPROTO_UDP) + return NULL; + + udph = data + sizeof(*ipv6h) + sizeof(*eth); + } else { + return NULL; + } + + if (udph + 1 > (struct udphdr *)data_end) + return NULL; + + if (udph->dest != bpf_htons(port)) + return NULL; + + record_stats(ctx, STATS_RX); + + return udph; +} + +static int xdp_mode_pass(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + record_stats(ctx, STATS_PASS); + + return XDP_PASS; +} + +static int xdp_mode_drop_handler(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + record_stats(ctx, STATS_DROP); + + return XDP_DROP; +} + +static void swap_machdr(void *data) +{ + struct ethhdr *eth = data; + __u8 tmp_mac[ETH_ALEN]; + + __builtin_memcpy(tmp_mac, eth->h_source, ETH_ALEN); + __builtin_memcpy(eth->h_source, eth->h_dest, ETH_ALEN); + __builtin_memcpy(eth->h_dest, tmp_mac, ETH_ALEN); +} + +static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + void *data, *data_end; + struct ethhdr *eth; + int err; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth)); + if (err) + return XDP_PASS; + + data_end = (void *)(long)ctx->data_end; + data = eth = (void *)(long)ctx->data; + + if (data + sizeof(*eth) > data_end) + return XDP_PASS; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph; + __be32 tmp_ip; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*iph) + + sizeof(*udph)); + if (err) + return XDP_PASS; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + iph = data + sizeof(*eth); + + if (iph + 1 > (struct iphdr *)data_end || + iph->protocol != IPPROTO_UDP) + return XDP_PASS; + + udph = data + sizeof(*iph) + sizeof(*eth); + + if (udph + 1 > (struct udphdr *)data_end) + return XDP_PASS; + if (udph->dest != bpf_htons(port)) + return XDP_PASS; + + record_stats(ctx, STATS_RX); + eth = data; + swap_machdr((void *)eth); + + tmp_ip = iph->saddr; + iph->saddr = iph->daddr; + iph->daddr = tmp_ip; + + record_stats(ctx, STATS_TX); + + return XDP_TX; + + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct in6_addr tmp_ipv6; + struct ipv6hdr *ipv6h; + + err = bpf_xdp_pull_data(ctx, sizeof(*eth) + sizeof(*ipv6h) + + sizeof(*udph)); + if (err) + return XDP_PASS; + + data_end = (void *)(long)ctx->data_end; + data = (void *)(long)ctx->data; + + ipv6h = data + sizeof(*eth); + + if (ipv6h + 1 > (struct ipv6hdr *)data_end || + ipv6h->nexthdr != IPPROTO_UDP) + return XDP_PASS; + + udph = data + sizeof(*ipv6h) + sizeof(*eth); + + if (udph + 1 > (struct udphdr *)data_end) + return XDP_PASS; + if (udph->dest != bpf_htons(port)) + return XDP_PASS; + + record_stats(ctx, STATS_RX); + eth = data; + swap_machdr((void *)eth); + + __builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6)); + __builtin_memcpy(&ipv6h->saddr, &ipv6h->daddr, + sizeof(tmp_ipv6)); + __builtin_memcpy(&ipv6h->daddr, &tmp_ipv6, sizeof(tmp_ipv6)); + + record_stats(ctx, STATS_TX); + + return XDP_TX; + } + + return XDP_PASS; +} + +static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + __u32 len, len_new; + + if (data + sizeof(*eth) > data_end) + return NULL; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + __u16 total_len; + + if (iph + 1 > (struct iphdr *)data_end) + return NULL; + + iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset); + + udph = (void *)eth + sizeof(*iph) + sizeof(*eth); + if (!udph || udph + 1 > (struct udphdr *)data_end) + return NULL; + + len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + __u16 payload_len; + + if (ipv6h + 1 > (struct ipv6hdr *)data_end) + return NULL; + + udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth); + if (!udph || udph + 1 > (struct udphdr *)data_end) + return NULL; + + *udp_csum = ~((__u32)udph->check); + + len = ipv6h->payload_len; + len_new = bpf_htons(bpf_ntohs(len) + offset); + ipv6h->payload_len = len_new; + + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + + len = udph->len; + len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + } else { + return NULL; + } + + udph->len = len_new; + + return udph; +} + +static __u16 csum_fold_helper(__u32 csum) +{ + return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff; +} + +static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset, + unsigned long hdr_len) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + __u32 buff_pos, udp_csum = 0; + struct udphdr *udph = NULL; + __u32 buff_len; + + udph = update_pkt(ctx, 0 - offset, &udp_csum); + if (!udph) + return -1; + + buff_len = bpf_xdp_get_buff_len(ctx); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + /* Make sure we have enough data to avoid eating the header */ + if (buff_len - offset < hdr_len) + return -1; + + buff_pos = buff_len - offset; + if (bpf_xdp_load_bytes(ctx, buff_pos, tmp_buff, offset) < 0) + return -1; + + udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + __u32 buff_pos, udp_csum = 0; + __u32 buff_len, hdr_len, key; + struct udphdr *udph; + __s32 *val; + __u8 tag; + + /* Proceed to update the packet headers before attempting to adjuste + * the tail. Once the tail is adjusted we lose access to the offset + * amount of data at the end of the packet which is crucial to update + * the checksum. + * Since any failure beyond this would abort the packet, we should + * not worry about passing a packet up the stack with wrong headers + */ + udph = update_pkt(ctx, offset, &udp_csum); + if (!udph) + return -1; + + key = XDP_ADJST_TAG; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return -1; + + tag = (__u8)(*val); + + for (int i = 0; i < MAX_ADJST_OFFSET; i++) + __builtin_memcpy(&tmp_buff[i], &tag, 1); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + buff_len = bpf_xdp_get_buff_len(ctx); + + if (bpf_xdp_adjust_tail(ctx, offset) < 0) { + bpf_printk("Failed to adjust tail\n"); + return -1; + } + + if (bpf_xdp_store_bytes(ctx, buff_len, tmp_buff, offset) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + __s32 *adjust_offset, *val; + unsigned long hdr_len; + void *offset_ptr; + __u32 key; + __u8 tag; + int ret; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + hdr_len = (void *)udph - (void *)(long)ctx->data + + sizeof(struct udphdr); + key = XDP_ADJST_OFFSET; + adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!adjust_offset) + return XDP_PASS; + + if (*adjust_offset < 0) + ret = xdp_adjst_tail_shrnk_data(ctx, + (__u16)(0 - *adjust_offset), + hdr_len); + else + ret = xdp_adjst_tail_grow_data(ctx, (__u16)(*adjust_offset)); + if (ret) + goto abort_pkt; + + record_stats(ctx, STATS_PASS); + return XDP_PASS; + +abort_pkt: + record_stats(ctx, STATS_ABORT); + return XDP_ABORTED; +} + +static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len, + __u32 offset) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + struct udphdr *udph; + void *offset_ptr; + __u32 udp_csum = 0; + + /* Update the length information in the IP and UDP headers before + * adjusting the headroom. This simplifies accessing the relevant + * fields in the IP and UDP headers for fragmented packets. Any + * failure beyond this point will result in the packet being aborted, + * so we don't need to worry about incorrect length information for + * passed packets. + */ + udph = update_pkt(ctx, (__s16)(0 - offset), &udp_csum); + if (!udph) + return -1; + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + if (bpf_xdp_load_bytes(ctx, hdr_len, tmp_buff, offset) < 0) + return -1; + + udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); + + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0) + return -1; + + if (bpf_xdp_adjust_head(ctx, offset) < 0) + return -1; + + if (offset > MAX_ADJST_OFFSET) + return -1; + + if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) + return -1; + + /* Added here to handle clang complain about negative value */ + hdr_len = hdr_len & 0xff; + + if (hdr_len == 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, 0, tmp_buff, hdr_len) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len, + __u32 offset) +{ + char hdr_buff[MAX_HDR_LEN]; + char data_buff[MAX_ADJST_OFFSET]; + void *offset_ptr; + __s32 *val; + __u32 key; + __u8 tag; + __u32 udp_csum = 0; + struct udphdr *udph; + + udph = update_pkt(ctx, (__s16)(offset), &udp_csum); + if (!udph) + return -1; + + key = XDP_ADJST_TAG; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return -1; + + tag = (__u8)(*val); + for (int i = 0; i < MAX_ADJST_OFFSET; i++) + __builtin_memcpy(&data_buff[i], &tag, 1); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) + return -1; + + /* Added here to handle clang complain about negative value */ + hdr_len = hdr_len & 0xff; + + if (hdr_len == 0) + return -1; + + if (bpf_xdp_load_bytes(ctx, 0, hdr_buff, hdr_len) < 0) + return -1; + + if (offset > MAX_ADJST_OFFSET) + return -1; + + if (bpf_xdp_adjust_head(ctx, 0 - offset) < 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, 0, hdr_buff, hdr_len) < 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, hdr_len, data_buff, offset) < 0) + return -1; + + return 0; +} + +static int xdp_head_adjst(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph_ptr = NULL; + __u32 key, size, hdr_len; + __s32 *val; + int res; + + /* Filter packets based on UDP port */ + udph_ptr = filter_udphdr(ctx, port); + if (!udph_ptr) + return XDP_PASS; + + hdr_len = (void *)udph_ptr - (void *)(long)ctx->data + + sizeof(struct udphdr); + + key = XDP_ADJST_OFFSET; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return XDP_PASS; + + switch (*val) { + case -16: + case 16: + size = 16; + break; + case -32: + case 32: + size = 32; + break; + case -64: + case 64: + size = 64; + break; + case -128: + case 128: + size = 128; + break; + case -256: + case 256: + size = 256; + break; + default: + bpf_printk("Invalid adjustment offset: %d\n", *val); + goto abort; + } + + if (*val < 0) + res = xdp_adjst_head_grow_data(ctx, hdr_len, size); + else + res = xdp_adjst_head_shrnk_data(ctx, hdr_len, size); + + if (res) + goto abort; + + record_stats(ctx, STATS_PASS); + return XDP_PASS; + +abort: + record_stats(ctx, STATS_ABORT); + return XDP_ABORTED; +} + +static int xdp_prog_common(struct xdp_md *ctx) +{ + __u32 key, *port; + __s32 *mode; + + key = XDP_MODE; + mode = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!mode) + return XDP_PASS; + + key = XDP_PORT; + port = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!port) + return XDP_PASS; + + switch (*mode) { + case XDP_MODE_PASS: + return xdp_mode_pass(ctx, (__u16)(*port)); + case XDP_MODE_DROP: + return xdp_mode_drop_handler(ctx, (__u16)(*port)); + case XDP_MODE_TX: + return xdp_mode_tx_handler(ctx, (__u16)(*port)); + case XDP_MODE_TAIL_ADJST: + return xdp_adjst_tail(ctx, (__u16)(*port)); + case XDP_MODE_HEAD_ADJST: + return xdp_head_adjst(ctx, (__u16)(*port)); + } + + /* Default action is to simple pass */ + return XDP_PASS; +} + +SEC("xdp") +int xdp_prog(struct xdp_md *ctx) +{ + return xdp_prog_common(ctx); +} + +SEC("xdp.frags") +int xdp_prog_frags(struct xdp_md *ctx) +{ + return xdp_prog_common(ctx); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/net/link_netns.py b/tools/testing/selftests/net/link_netns.py new file mode 100755 index 000000000000..aab043c59d69 --- /dev/null +++ b/tools/testing/selftests/net/link_netns.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import time + +from lib.py import ksft_run, ksft_exit, ksft_true +from lib.py import ip +from lib.py import NetNS, NetNSEnter +from lib.py import RtnlFamily + + +LINK_NETNSID = 100 + + +def test_event() -> None: + with NetNS() as ns1, NetNS() as ns2: + with NetNSEnter(str(ns2)): + rtnl = RtnlFamily() + + rtnl.ntf_subscribe("rtnlgrp-link") + + ip(f"netns set {ns2} {LINK_NETNSID}", ns=str(ns1)) + ip(f"link add netns {ns1} link-netnsid {LINK_NETNSID} dummy1 type dummy") + ip(f"link add netns {ns1} dummy2 type dummy", ns=str(ns2)) + + ip("link del dummy1", ns=str(ns1)) + ip("link del dummy2", ns=str(ns1)) + + time.sleep(1) + rtnl.check_ntf() + ksft_true(rtnl.async_msg_queue.empty(), + "Received unexpected link notification") + + +def validate_link_netns(netns, ifname, link_netnsid) -> bool: + link_info = ip(f"-d link show dev {ifname}", ns=netns, json=True) + if not link_info: + return False + return link_info[0].get("link_netnsid") == link_netnsid + + +def test_link_net() -> None: + configs = [ + # type, common args, type args, fallback to dev_net + ("ipvlan", "link dummy1", "", False), + ("macsec", "link dummy1", "", False), + ("macvlan", "link dummy1", "", False), + ("macvtap", "link dummy1", "", False), + ("vlan", "link dummy1", "id 100", False), + ("gre", "", "local 192.0.2.1", True), + ("vti", "", "local 192.0.2.1", True), + ("ipip", "", "local 192.0.2.1", True), + ("ip6gre", "", "local 2001:db8::1", True), + ("ip6tnl", "", "local 2001:db8::1", True), + ("vti6", "", "local 2001:db8::1", True), + ("sit", "", "local 192.0.2.1", True), + ("xfrm", "", "if_id 1", True), + ] + + with NetNS() as ns1, NetNS() as ns2, NetNS() as ns3: + net1, net2, net3 = str(ns1), str(ns2), str(ns3) + + # prepare link netnsid and a dummy link needed by certain drivers + ip(f"netns set {net3} {LINK_NETNSID}", ns=str(net2)) + ip("link add dummy1 type dummy", ns=net3) + + cases = [ + # source, "netns", "link-netns", expected link-netns + (net3, None, None, None, None), + (net3, net2, None, None, LINK_NETNSID), + (net2, None, net3, LINK_NETNSID, LINK_NETNSID), + (net1, net2, net3, LINK_NETNSID, LINK_NETNSID), + ] + + for src_net, netns, link_netns, exp1, exp2 in cases: + tgt_net = netns or src_net + for typ, cargs, targs, fb_dev_net in configs: + cmd = "link add" + if netns: + cmd += f" netns {netns}" + if link_netns: + cmd += f" link-netns {link_netns}" + cmd += f" {cargs} foo type {typ} {targs}" + ip(cmd, ns=src_net) + if fb_dev_net: + ksft_true(validate_link_netns(tgt_net, "foo", exp1), + f"{typ} link_netns validation failed") + else: + ksft_true(validate_link_netns(tgt_net, "foo", exp2), + f"{typ} link_netns validation failed") + ip(f"link del foo", ns=tgt_net) + + +def test_peer_net() -> None: + types = [ + "vxcan", + "netkit", + "veth", + ] + + with NetNS() as ns1, NetNS() as ns2, NetNS() as ns3, NetNS() as ns4: + net1, net2, net3, net4 = str(ns1), str(ns2), str(ns3), str(ns4) + + ip(f"netns set {net3} {LINK_NETNSID}", ns=str(net2)) + + cases = [ + # source, "netns", "link-netns", "peer netns", expected + (net1, None, None, None, None), + (net1, net2, None, None, None), + (net2, None, net3, None, LINK_NETNSID), + (net1, net2, net3, None, None), + (net2, None, None, net3, LINK_NETNSID), + (net1, net2, None, net3, LINK_NETNSID), + (net2, None, net2, net3, LINK_NETNSID), + (net1, net2, net4, net3, LINK_NETNSID), + ] + + for src_net, netns, link_netns, peer_netns, exp in cases: + tgt_net = netns or src_net + for typ in types: + cmd = "link add" + if netns: + cmd += f" netns {netns}" + if link_netns: + cmd += f" link-netns {link_netns}" + cmd += f" foo type {typ}" + if peer_netns: + cmd += f" peer netns {peer_netns}" + ip(cmd, ns=src_net) + ksft_true(validate_link_netns(tgt_net, "foo", exp), + f"{typ} peer_netns validation failed") + ip(f"link del foo", ns=tgt_net) + + +def main() -> None: + ksft_run([test_event, test_link_net, test_peer_net]) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh new file mode 100755 index 000000000000..881eb399798f --- /dev/null +++ b/tools/testing/selftests/net/lwt_dst_cache_ref_loop.sh @@ -0,0 +1,246 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Author: Justin Iurman <justin.iurman@uliege.be> +# +# WARNING +# ------- +# This is just a dummy script that triggers encap cases with possible dst cache +# reference loops in affected lwt users (see list below). Some cases are +# pathological configurations for simplicity, others are valid. Overall, we +# don't want this issue to happen, no matter what. In order to catch any +# reference loops, kmemleak MUST be used. The results alone are always blindly +# successful, don't rely on them. Note that the following tests may crash the +# kernel if the fix to prevent lwtunnel_{input|output|xmit}() reentry loops is +# not present. +# +# Affected lwt users so far (please update accordingly if needed): +# - ila_lwt (output only) +# - ioam6_iptunnel (output only) +# - rpl_iptunnel (both input and output) +# - seg6_iptunnel (both input and output) + +source lib.sh + +check_compatibility() +{ + setup_ns tmp_node &>/dev/null + if [ $? != 0 ]; then + echo "SKIP: Cannot create netns." + exit $ksft_skip + fi + + ip link add name veth0 netns $tmp_node type veth \ + peer name veth1 netns $tmp_node &>/dev/null + local ret=$? + + ip -netns $tmp_node link set veth0 up &>/dev/null + ret=$((ret + $?)) + + ip -netns $tmp_node link set veth1 up &>/dev/null + ret=$((ret + $?)) + + if [ $ret != 0 ]; then + echo "SKIP: Cannot configure links." + cleanup_ns $tmp_node + exit $ksft_skip + fi + + lsmod 2>/dev/null | grep -q "ila" + ila_lsmod=$? + [ $ila_lsmod != 0 ] && modprobe ila &>/dev/null + + ip -netns $tmp_node route add 2001:db8:1::/64 \ + encap ila 1:2:3:4 csum-mode no-action ident-type luid \ + hook-type output \ + dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:2::/64 \ + encap ioam6 trace prealloc type 0x800000 ns 0 size 4 \ + dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:3::/64 \ + encap rpl segs 2001:db8:3::1 dev veth0 &>/dev/null + + ip -netns $tmp_node route add 2001:db8:4::/64 \ + encap seg6 mode inline segs 2001:db8:4::1 dev veth0 &>/dev/null + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ila" + skip_ila=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap ioam6" + skip_ioam6=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap rpl" + skip_rpl=$? + + ip -netns $tmp_node -6 route 2>/dev/null | grep -q "encap seg6" + skip_seg6=$? + + cleanup_ns $tmp_node +} + +setup() +{ + setup_ns alpha beta gamma &>/dev/null + + ip link add name veth-alpha netns $alpha type veth \ + peer name veth-betaL netns $beta &>/dev/null + + ip link add name veth-betaR netns $beta type veth \ + peer name veth-gamma netns $gamma &>/dev/null + + ip -netns $alpha link set veth-alpha name veth0 &>/dev/null + ip -netns $beta link set veth-betaL name veth0 &>/dev/null + ip -netns $beta link set veth-betaR name veth1 &>/dev/null + ip -netns $gamma link set veth-gamma name veth0 &>/dev/null + + ip -netns $alpha addr add 2001:db8:1::2/64 dev veth0 &>/dev/null + ip -netns $alpha link set veth0 up &>/dev/null + ip -netns $alpha link set lo up &>/dev/null + ip -netns $alpha route add 2001:db8:2::/64 \ + via 2001:db8:1::1 dev veth0 &>/dev/null + + ip -netns $beta addr add 2001:db8:1::1/64 dev veth0 &>/dev/null + ip -netns $beta addr add 2001:db8:2::1/64 dev veth1 &>/dev/null + ip -netns $beta link set veth0 up &>/dev/null + ip -netns $beta link set veth1 up &>/dev/null + ip -netns $beta link set lo up &>/dev/null + ip -netns $beta route del 2001:db8:2::/64 + ip -netns $beta route add 2001:db8:2::/64 dev veth1 + ip netns exec $beta \ + sysctl -wq net.ipv6.conf.all.forwarding=1 &>/dev/null + + ip -netns $gamma addr add 2001:db8:2::2/64 dev veth0 &>/dev/null + ip -netns $gamma link set veth0 up &>/dev/null + ip -netns $gamma link set lo up &>/dev/null + ip -netns $gamma route add 2001:db8:1::/64 \ + via 2001:db8:2::1 dev veth0 &>/dev/null + + sleep 1 + + ip netns exec $alpha ping6 -c 5 -W 1 2001:db8:2::2 &>/dev/null + if [ $? != 0 ]; then + echo "SKIP: Setup failed." + exit $ksft_skip + fi + + sleep 1 +} + +cleanup() +{ + cleanup_ns $alpha $beta $gamma + [ $ila_lsmod != 0 ] && modprobe -r ila &>/dev/null +} + +run_ila() +{ + if [ $skip_ila != 0 ]; then + echo "SKIP: ila (output)" + return + fi + + ip -netns $beta route del 2001:db8:2::/64 + ip -netns $beta route add 2001:db8:2:0:0:0:0:2/128 \ + encap ila 2001:db8:2:0 csum-mode no-action ident-type luid \ + hook-type output \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: ila (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + ip -netns $beta route del 2001:db8:2:0:0:0:0:2/128 + ip -netns $beta route add 2001:db8:2::/64 dev veth1 + sleep 1 +} + +run_ioam6() +{ + if [ $skip_ioam6 != 0 ]; then + echo "SKIP: ioam6 (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap ioam6 trace prealloc type 0x800000 ns 1 size 4 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: ioam6 (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run_rpl() +{ + if [ $skip_rpl != 0 ]; then + echo "SKIP: rpl (input)" + echo "SKIP: rpl (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap rpl segs 2001:db8:2::2 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: rpl (input)" + ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + echo "TEST: rpl (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run_seg6() +{ + if [ $skip_seg6 != 0 ]; then + echo "SKIP: seg6 (input)" + echo "SKIP: seg6 (output)" + return + fi + + ip -netns $beta route change 2001:db8:2::/64 \ + encap seg6 mode inline segs 2001:db8:2::2 \ + dev veth1 &>/dev/null + sleep 1 + + echo "TEST: seg6 (input)" + ip netns exec $alpha ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 + + echo "TEST: seg6 (output)" + ip netns exec $beta ping6 -c 2 -W 1 2001:db8:2::2 &>/dev/null + sleep 1 +} + +run() +{ + run_ila + run_ioam6 + run_rpl + run_seg6 +} + +if [ "$(id -u)" -ne 0 ]; then + echo "SKIP: Need root privileges." + exit $ksft_skip +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool." + exit $ksft_skip +fi + +check_compatibility + +trap cleanup EXIT + +setup +run + +exit $ksft_pass diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore index 49daae73c41e..833279fb34e2 100644 --- a/tools/testing/selftests/net/mptcp/.gitignore +++ b/tools/testing/selftests/net/mptcp/.gitignore @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-only mptcp_connect +mptcp_diag mptcp_inq mptcp_sockopt pm_nl_ctl diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 7b936a926859..15d144a25d82 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -2,14 +2,35 @@ top_srcdir = ../../../../.. -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) -TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ - simult_flows.sh mptcp_sockopt.sh userspace_pm.sh +TEST_PROGS := \ + diag.sh \ + mptcp_connect.sh \ + mptcp_connect_checksum.sh \ + mptcp_connect_mmap.sh \ + mptcp_connect_sendfile.sh \ + mptcp_join.sh \ + mptcp_sockopt.sh \ + pm_netlink.sh \ + simult_flows.sh \ + userspace_pm.sh \ +# end of TEST_PROGS -TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq +TEST_GEN_FILES := \ + mptcp_connect \ + mptcp_diag \ + mptcp_inq \ + mptcp_sockopt \ + pm_nl_ctl \ +# end of TEST_GEN_FILES -TEST_FILES := mptcp_lib.sh settings +TEST_FILES := \ + mptcp_lib.sh \ + settings \ +# end of TEST_FILES + +TEST_INCLUDES := ../lib.sh $(wildcard ../lib/sh/*.sh) EXTRA_CLEAN := *.pcap diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 4f80014cae49..59051ee2a986 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -1,34 +1,36 @@ -CONFIG_KALLSYMS=y -CONFIG_MPTCP=y -CONFIG_IPV6=y -CONFIG_MPTCP_IPV6=y CONFIG_INET_DIAG=m CONFIG_INET_MPTCP_DIAG=m -CONFIG_VETH=y -CONFIG_NET_SCH_NETEM=m -CONFIG_SYN_COOKIES=y -CONFIG_NETFILTER=y -CONFIG_NETFILTER_ADVANCED=y -CONFIG_NETFILTER_NETLINK=m -CONFIG_NF_TABLES=m -CONFIG_NFT_COMPAT=m -CONFIG_NETFILTER_XTABLES=m -CONFIG_NETFILTER_XT_MATCH_BPF=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NF_TABLES_INET=y -CONFIG_NFT_TPROXY=m -CONFIG_NFT_SOCKET=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP6_NF_TARGET_REJECT=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y CONFIG_IP_NF_FILTER=m CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_REJECT=m +CONFIG_IPV6=y CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IP6_NF_FILTER=m +CONFIG_KALLSYMS=y +CONFIG_MPTCP=y +CONFIG_MPTCP_IPV6=y CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_CLS_ACT=y CONFIG_NET_CLS_FW=m +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XTABLES_LEGACY=y +CONFIG_NETFILTER_XT_MATCH_BPF=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NETFILTER_XT_MATCH_STATISTIC=m +CONFIG_NETFILTER_XT_TARGET_MARK=m CONFIG_NET_SCH_INGRESS=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_INET=y +CONFIG_NFT_COMPAT=m +CONFIG_NFT_SOCKET=m +CONFIG_NFT_TPROXY=m +CONFIG_SYN_COOKIES=y +CONFIG_VETH=y diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 776d43a6922d..d847ff1737c3 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -28,7 +28,7 @@ flush_pids() } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null @@ -200,6 +200,62 @@ chk_msk_cestab() "${expected}" "${msg}" "" } +chk_dump_one() +{ + local ss_token + local token + local msg + + ss_token="$(ss -inmHMN $ns | + mptcp_lib_get_info_value "token" "token")" + + token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\ + awk -F':[ \t]+' '/^token/ {print $2}')" + + msg="....chk dump_one" + + mptcp_lib_print_title "$msg" + if [ -n "$ss_token" ] && [ "$ss_token" = "$token" ]; then + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + else + mptcp_lib_pr_fail "expected $ss_token found $token" + mptcp_lib_result_fail "${msg}" + ret=${KSFT_FAIL} + fi +} + +chk_dump_subflow() +{ + local inet_diag_token + local subflow_line + local ss_output + local ss_token + local msg + + ss_output=$(ss -tniN $ns) + + subflow_line=$(echo "$ss_output" | \ + grep -m1 -Eo '[0-9.]+:[0-9].+ +[0-9.]+:[0-9.]+') + + ss_token=$(echo "$ss_output" | grep -m1 -Eo 'token:[^ ]+') + + inet_diag_token=$(ip netns exec $ns ./mptcp_diag -s "$subflow_line" | \ + grep -Eo 'token:[^ ]+') + + msg="....chk dump_subflow" + + mptcp_lib_print_title "$msg" + if [ -n "$ss_token" ] && [ "$ss_token" = "$inet_diag_token" ]; then + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + else + mptcp_lib_pr_fail "expected $ss_token found $inet_diag_token" + mptcp_lib_result_fail "${msg}" + ret=${KSFT_FAIL} + fi +} + msk_info_get_value() { local port="${1}" @@ -284,12 +340,14 @@ echo "b" | \ ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \ 127.0.0.1 >/dev/null & wait_connected $ns 10000 -chk_msk_nr 2 "after MPC handshake " +chk_msk_nr 2 "after MPC handshake" chk_last_time_info 10000 chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" chk_msk_inuse 2 chk_msk_cestab 2 +chk_dump_one +chk_dump_subflow flush_pids chk_msk_inuse 0 "2->0" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index d2043ec3bf6d..404a77bf366a 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -25,6 +25,8 @@ #include <sys/types.h> #include <sys/mman.h> +#include <arpa/inet.h> + #include <netdb.h> #include <netinet/in.h> @@ -178,13 +180,27 @@ static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen, } static void xgetaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, + struct addrinfo *hints, struct addrinfo **res) { - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { - const char *errstr = getxinfo_strerr(err); + const char *errstr; + + /* glibc starts to support MPTCP since v2.42. + * For older versions, use IPPROTO_TCP to resolve, + * and use TCP/MPTCP to create socket. + * Link: https://sourceware.org/git/?p=glibc.git;a=commit;h=a8e9022e0f82 + */ + if (err == EAI_SOCKTYPE) { + hints->ai_protocol = IPPROTO_TCP; + goto again; + } + + errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", node ? node : "", service ? service : "", errstr); @@ -290,7 +306,7 @@ static int sock_listen_mptcp(const char * const listenaddr, { int sock = -1; struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, .ai_flags = AI_PASSIVE | AI_NUMERICHOST }; @@ -354,7 +370,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, int infd, struct wstate *winfo) { struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; @@ -694,8 +710,14 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); if (bw < 0) { - if (cfg_rcv_trunc) - return 0; + /* expected reset, continue to read */ + if (cfg_rcv_trunc && + (errno == ECONNRESET || + errno == EPIPE)) { + fds.events &= ~POLLOUT; + continue; + } + perror("write"); return 111; } @@ -721,8 +743,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, } if (fds.revents & (POLLERR | POLLNVAL)) { - if (cfg_rcv_trunc) - return 0; + if (cfg_rcv_trunc) { + fds.events &= ~(POLLERR | POLLNVAL); + continue; + } fprintf(stderr, "Unexpected revents: " "POLLERR/POLLNVAL(%x)\n", fds.revents); return 5; @@ -1048,6 +1072,8 @@ static void check_getpeername_connect(int fd) socklen_t salen = sizeof(ss); char a[INET6_ADDRSTRLEN]; char b[INET6_ADDRSTRLEN]; + const char *iface; + size_t len; if (getpeername(fd, (struct sockaddr *)&ss, &salen) < 0) { perror("getpeername"); @@ -1057,7 +1083,13 @@ static void check_getpeername_connect(int fd) xgetnameinfo((struct sockaddr *)&ss, salen, a, sizeof(a), b, sizeof(b)); - if (strcmp(cfg_host, a) || strcmp(cfg_port, b)) + iface = strchr(cfg_host, '%'); + if (iface) + len = iface - cfg_host; + else + len = strlen(cfg_host) + 1; + + if (strncmp(cfg_host, a, len) || strcmp(cfg_port, b)) fprintf(stderr, "%s: %s vs %s, %s vs %s\n", __func__, cfg_host, a, cfg_port, b); } @@ -1077,6 +1109,7 @@ int main_loop_s(int listensock) struct pollfd polls; socklen_t salen; int remotesock; + int err = 0; int fd = 0; again: @@ -1109,19 +1142,19 @@ again: SOCK_TEST_TCPULP(remotesock, 0); memset(&winfo, 0, sizeof(winfo)); - copyfd_io(fd, remotesock, 1, true, &winfo); + err = copyfd_io(fd, remotesock, 1, true, &winfo); } else { perror("accept"); return 1; } - if (--cfg_repeat > 0) { - if (cfg_input) - close(fd); + if (cfg_input) + close(fd); + + if (!err && --cfg_repeat > 0) goto again; - } - return 0; + return err; } static void init_rng(void) @@ -1211,23 +1244,42 @@ static void parse_setsock_options(const char *name) exit(1); } -void xdisconnect(int fd, int addrlen) +void xdisconnect(int fd) { - struct sockaddr_storage empty; + socklen_t addrlen = sizeof(struct sockaddr_storage); + struct sockaddr_storage addr, empty; int msec_sleep = 10; - int queued = 1; - int i; + void *raw_addr; + int i, cmdlen; + char cmd[128]; + + /* get the local address and convert it to string */ + if (getsockname(fd, (struct sockaddr *)&addr, &addrlen) < 0) + xerror("getsockname"); + + if (addr.ss_family == AF_INET) + raw_addr = &(((struct sockaddr_in *)&addr)->sin_addr); + else if (addr.ss_family == AF_INET6) + raw_addr = &(((struct sockaddr_in6 *)&addr)->sin6_addr); + else + xerror("bad family"); + + strcpy(cmd, "ss -Mnt | grep -q "); + cmdlen = strlen(cmd); + if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen], + sizeof(cmd) - cmdlen)) + xerror("inet_ntop"); shutdown(fd, SHUT_WR); - /* while until the pending data is completely flushed, the later + /* + * wait until the pending data is completely flushed and all + * the sockets reached the closed status. * disconnect will bypass/ignore/drop any pending data. */ for (i = 0; ; i += msec_sleep) { - if (ioctl(fd, SIOCOUTQ, &queued) < 0) - xerror("can't query out socket queue: %d", errno); - - if (!queued) + /* closed socket are not listed by 'ss' */ + if (system(cmd) != 0) break; if (i > poll_timeout) @@ -1249,7 +1301,7 @@ int main_loop(void) if (cfg_input && cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); - if (fd < 0) + if (fd_in < 0) xerror("can't open %s:%d", cfg_input, errno); } @@ -1272,18 +1324,18 @@ again: if (cfg_input && !cfg_sockopt_types.mptfo) { fd_in = open(cfg_input, O_RDONLY); - if (fd < 0) + if (fd_in < 0) xerror("can't open %s:%d", cfg_input, errno); } ret = copyfd_io(fd_in, fd, 1, 0, &winfo); if (ret) - return ret; + goto out; if (cfg_truncate > 0) { - xdisconnect(fd, peer->ai_addrlen); + shutdown(fd, SHUT_WR); } else if (--cfg_repeat > 0) { - xdisconnect(fd, peer->ai_addrlen); + xdisconnect(fd); /* the socket could be unblocking at this point, we need the * connect to be blocking @@ -1299,7 +1351,10 @@ again: close(fd); } - return 0; +out: + if (cfg_input) + close(fd_in); + return ret; } int parse_proto(const char *proto) @@ -1394,7 +1449,7 @@ static void parse_opts(int argc, char **argv) */ if (cfg_truncate < 0) { cfg_rcv_trunc = true; - signal(SIGPIPE, handle_signal); + signal(SIGPIPE, SIG_IGN); } break; case 'j': diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index b77fb7065bfb..a6447f7a31fe 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -134,10 +134,10 @@ ns4="" TEST_GROUP="" # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { - rm -f "$cin_disconnect" "$cout_disconnect" + rm -f "$cin_disconnect" rm -f "$cin" "$cout" rm -f "$sin" "$sout" rm -f "$capout" @@ -155,7 +155,6 @@ cin=$(mktemp) cout=$(mktemp) capout=$(mktemp) cin_disconnect="$cin".disconnect -cout_disconnect="$cout".disconnect trap cleanup EXIT mptcp_lib_ns_init ns1 ns2 ns3 ns4 @@ -212,6 +211,11 @@ if $checksum; then done fi +if $capture; then + rndh="${ns1:4}" + mptcp_lib_pr_info "Packet capture files will have this prefix: ${rndh}-" +fi + set_ethtool_flags() { local ns="$1" local dev="$2" @@ -259,6 +263,15 @@ check_mptcp_disabled() mptcp_lib_ns_init disabled_ns print_larger_title "New MPTCP socket can be blocked via sysctl" + + # mainly to cover more code + if ! ip netns exec ${disabled_ns} sysctl net.mptcp >/dev/null; then + mptcp_lib_pr_fail "not able to list net.mptcp sysctl knobs" + mptcp_lib_result_fail "not able to list net.mptcp sysctl knobs" + ret=${KSFT_FAIL} + return 1 + fi + # net.mptcp.enabled should be enabled by default if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then mptcp_lib_pr_fail "net.mptcp.enabled sysctl is not 1 by default" @@ -345,13 +358,14 @@ do_transfer() local addr_port addr_port=$(printf "%s:%d" ${connect_addr} ${port}) - local result_msg - result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})" - mptcp_lib_print_title "${result_msg}" + local pretty_title + pretty_title="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})" + mptcp_lib_print_title "${pretty_title}" + + local tap_title="${connector_ns:0:3} ${cl_proto} -> ${listener_ns:0:3} (${addr_port}) ${srv_proto}" if $capture; then local capuser - local rndh="${connector_ns:4}" if [ -z $SUDO_USER ] ; then capuser="" else @@ -361,90 +375,79 @@ do_transfer() local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}" local capopt="-i any -s 65535 -B 32768 ${capuser}" - ip netns exec ${listener_ns} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 & + ip netns exec ${listener_ns} tcpdump ${capopt} \ + -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 & local cappid_listener=$! - ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 & - local cappid_connector=$! + if [ ${listener_ns} != ${connector_ns} ]; then + ip netns exec ${connector_ns} tcpdump ${capopt} \ + -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 & + local cappid_connector=$! + fi sleep 1 fi - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat -n + mptcp_lib_nstat_init "${listener_ns}" if [ ${listener_ns} != ${connector_ns} ]; then - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat -n - fi - - local stat_synrx_last_l - local stat_ackrx_last_l - local stat_cookietx_last - local stat_cookierx_last - local stat_csum_err_s - local stat_csum_err_c - local stat_tcpfb_last_l - stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") - stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") - stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") - stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") - stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") - - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_args $local_addr < "$sin" > "$sout" & + mptcp_lib_nstat_init "${connector_ns}" + fi + + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ + $extra_args $local_addr < "$sin" > "$sout" & local spid=$! mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" local start start=$(date +%s%3N) - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_args $connect_addr < "$cin" > "$cout" & + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_args $connect_addr < "$cin" > "$cout" & local cpid=$! + mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \ + "${connector_ns}" "${port}" "${cpid}" "${spid}" & + local timeout_pid=$! + wait $cpid local retc=$? wait $spid local rets=$? + if kill -0 $timeout_pid; then + # Finished before the timeout: kill the background job + mptcp_lib_kill_group_wait $timeout_pid + timeout_pid=0 + fi + local stop stop=$(date +%s%3N) if $capture; then sleep 1 kill ${cappid_listener} - kill ${cappid_connector} + if [ ${listener_ns} != ${connector_ns} ]; then + kill ${cappid_connector} + fi fi - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat | grep Tcp > /tmp/${listener_ns}.out + mptcp_lib_nstat_get "${listener_ns}" if [ ${listener_ns} != ${connector_ns} ]; then - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat | grep Tcp > /tmp/${connector_ns}.out + mptcp_lib_nstat_get "${connector_ns}" fi local duration duration=$((stop-start)) - result_msg+=" # time=${duration}ms" printf "(duration %05sms) " "${duration}" - if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" - echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" - cat /tmp/${listener_ns}.out - echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" - [ ${listener_ns} != ${connector_ns} ] && cat /tmp/${connector_ns}.out + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" echo cat "$capout" - mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" + mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}" return 1 fi @@ -454,38 +457,38 @@ do_transfer() rets=$? local extra="" - local stat_synrx_now_l - local stat_ackrx_now_l - local stat_cookietx_now - local stat_cookierx_now - local stat_ooo_now - local stat_tcpfb_now_l - stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") - stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") - stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") - stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") - - expect_synrx=$((stat_synrx_last_l)) - expect_ackrx=$((stat_ackrx_last_l)) + local stat_synrx + local stat_ackrx + local stat_cookietx + local stat_cookierx + local stat_ooo + local stat_tcpfb + stat_synrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + stat_ackrx=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + stat_cookietx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + stat_cookierx=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + stat_ooo=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") + stat_tcpfb=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") + + expect_synrx=0 + expect_ackrx=0 cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies) cookies=${cookies##*=} if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then - expect_synrx=$((stat_synrx_last_l+connect_per_transfer)) - expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer)) + expect_synrx=${connect_per_transfer} + expect_ackrx=${connect_per_transfer} fi - if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then - mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \ + if [ ${stat_synrx} -lt ${expect_synrx} ]; then + mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx})" \ "than expected (${expect_synrx})" retc=1 fi - if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then - if [ ${stat_ooo_now} -eq 0 ]; then - mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \ + if [ ${stat_ackrx} -lt ${expect_ackrx} ]; then + if [ ${stat_ooo} -eq 0 ]; then + mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx})" \ "than expected (${expect_ackrx})" rets=1 else @@ -499,57 +502,55 @@ do_transfer() csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") - local csum_err_s_nr=$((csum_err_s - stat_csum_err_s)) - if [ $csum_err_s_nr -gt 0 ]; then - mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]" + if [ $csum_err_s -gt 0 ]; then + mptcp_lib_pr_fail "server got ${csum_err_s} data checksum error[s]" rets=1 fi - local csum_err_c_nr=$((csum_err_c - stat_csum_err_c)) - if [ $csum_err_c_nr -gt 0 ]; then - mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]" + if [ $csum_err_c -gt 0 ]; then + mptcp_lib_pr_fail "client got ${csum_err_c} data checksum error[s]" retc=1 fi fi - if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then + if [ ${stat_ooo} -eq 0 ] && [ ${stat_tcpfb} -gt 0 ]; then mptcp_lib_pr_fail "unexpected fallback to TCP" rets=1 fi if [ $cookies -eq 2 ];then - if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then + if [ $stat_cookietx -eq 0 ] ;then extra+=" WARN: CookieSent: did not advance" fi - if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then + if [ $stat_cookierx -eq 0 ] ;then extra+=" WARN: CookieRecv: did not advance" fi else - if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then + if [ $stat_cookietx -gt 0 ] ;then extra+=" WARN: CookieSent: changed" fi - if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then + if [ $stat_cookierx -gt 0 ] ;then extra+=" WARN: CookieRecv: changed" fi fi - if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then + if [ ${stat_synrx} -gt ${expect_synrx} ]; then extra+=" WARN: SYNRX: expect ${expect_synrx}," - extra+=" got ${stat_synrx_now_l} (probably retransmissions)" + extra+=" got ${stat_synrx} (probably retransmissions)" fi - if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then + if [ ${stat_ackrx} -gt ${expect_ackrx} ]; then extra+=" WARN: ACKRX: expect ${expect_ackrx}," - extra+=" got ${stat_ackrx_now_l} (probably retransmissions)" + extra+=" got ${stat_ackrx} (probably retransmissions)" fi if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then mptcp_lib_pr_ok "${extra:1}" - mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}" + mptcp_lib_result_pass "${TEST_GROUP}: ${tap_title}" else if [ -n "${extra}" ]; then mptcp_lib_print_warn "${extra:1}" fi - mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" + mptcp_lib_result_fail "${TEST_GROUP}: ${tap_title}" fi cat "$capout" @@ -577,7 +578,7 @@ make_file() mptcp_lib_make_file $name 1024 $ksize dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null - echo "Created $name (size $(du -b "$name")) containing data sent by $who" + echo "Created $name (size $(stat -c "%s" "$name") B) containing data sent by $who" } run_tests_lo() @@ -848,6 +849,8 @@ stop_if_error() make_file "$cin" "client" make_file "$sin" "server" +mptcp_lib_subtests_last_ts_reset + check_mptcp_disabled stop_if_error "The kernel configuration is not valid for MPTCP" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh new file mode 100755 index 000000000000..ce93ec2f107f --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_checksum.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -C "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh new file mode 100755 index 000000000000..5dd30f9394af --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_mmap.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -m mmap "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh new file mode 100755 index 000000000000..1d16fb1cc9bb --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_connect_sendfile.sh @@ -0,0 +1,5 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +MPTCP_LIB_KSFT_TEST="$(basename "${0}" .sh)" \ + "$(dirname "${0}")/mptcp_connect.sh" -m sendfile "${@}" diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c new file mode 100644 index 000000000000..e084796e804d --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c @@ -0,0 +1,435 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025, Kylin Software */ + +#include <linux/sock_diag.h> +#include <linux/rtnetlink.h> +#include <linux/inet_diag.h> +#include <linux/netlink.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <linux/tcp.h> +#include <arpa/inet.h> + +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> + +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + +#define parse_rtattr_nested(tb, max, rta) \ + (parse_rtattr_flags((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta), \ + NLA_F_NESTED)) + +struct params { + __u32 target_token; + char subflow_addrs[1024]; +}; + +struct mptcp_info { + __u8 mptcpi_subflows; + __u8 mptcpi_add_addr_signal; + __u8 mptcpi_add_addr_accepted; + __u8 mptcpi_subflows_max; + __u8 mptcpi_add_addr_signal_max; + __u8 mptcpi_add_addr_accepted_max; + __u32 mptcpi_flags; + __u32 mptcpi_token; + __u64 mptcpi_write_seq; + __u64 mptcpi_snd_una; + __u64 mptcpi_rcv_nxt; + __u8 mptcpi_local_addr_used; + __u8 mptcpi_local_addr_max; + __u8 mptcpi_csum_enabled; + __u32 mptcpi_retransmits; + __u64 mptcpi_bytes_retrans; + __u64 mptcpi_bytes_sent; + __u64 mptcpi_bytes_received; + __u64 mptcpi_bytes_acked; + __u8 mptcpi_subflows_total; + __u8 reserved[3]; + __u32 mptcpi_last_data_sent; + __u32 mptcpi_last_data_recv; + __u32 mptcpi_last_ack_recv; +}; + +enum { + MPTCP_SUBFLOW_ATTR_UNSPEC, + MPTCP_SUBFLOW_ATTR_TOKEN_REM, + MPTCP_SUBFLOW_ATTR_TOKEN_LOC, + MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ, + MPTCP_SUBFLOW_ATTR_MAP_SEQ, + MPTCP_SUBFLOW_ATTR_MAP_SFSEQ, + MPTCP_SUBFLOW_ATTR_SSN_OFFSET, + MPTCP_SUBFLOW_ATTR_MAP_DATALEN, + MPTCP_SUBFLOW_ATTR_FLAGS, + MPTCP_SUBFLOW_ATTR_ID_REM, + MPTCP_SUBFLOW_ATTR_ID_LOC, + MPTCP_SUBFLOW_ATTR_PAD, + + __MPTCP_SUBFLOW_ATTR_MAX +}; + +#define MPTCP_SUBFLOW_ATTR_MAX (__MPTCP_SUBFLOW_ATTR_MAX - 1) + +#define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0) +#define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1) +#define MPTCP_SUBFLOW_FLAG_JOIN_REM _BITUL(2) +#define MPTCP_SUBFLOW_FLAG_JOIN_LOC _BITUL(3) +#define MPTCP_SUBFLOW_FLAG_BKUP_REM _BITUL(4) +#define MPTCP_SUBFLOW_FLAG_BKUP_LOC _BITUL(5) +#define MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED _BITUL(6) +#define MPTCP_SUBFLOW_FLAG_CONNECTED _BITUL(7) +#define MPTCP_SUBFLOW_FLAG_MAPVALID _BITUL(8) + +#define rta_getattr(type, value) (*(type *)RTA_DATA(value)) + +static void die_perror(const char *msg) +{ + perror(msg); + exit(1); +} + +static void die_usage(int r) +{ + fprintf(stderr, "Usage:\n" + "mptcp_diag -t <token>\n" + "mptcp_diag -s \"<saddr>:<sport> <daddr>:<dport>\"\n"); + exit(r); +} + +static void send_query(int fd, struct inet_diag_req_v2 *r, __u32 proto) +{ + struct sockaddr_nl nladdr = { + .nl_family = AF_NETLINK + }; + struct { + struct nlmsghdr nlh; + struct inet_diag_req_v2 r; + } req = { + .nlh = { + .nlmsg_len = sizeof(req), + .nlmsg_type = SOCK_DIAG_BY_FAMILY, + .nlmsg_flags = NLM_F_REQUEST + }, + .r = *r + }; + struct rtattr rta_proto; + struct iovec iov[6]; + int iovlen = 0; + + iov[iovlen++] = (struct iovec) { + .iov_base = &req, + .iov_len = sizeof(req) + }; + + if (proto == IPPROTO_MPTCP) { + rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL; + rta_proto.rta_len = RTA_LENGTH(sizeof(proto)); + + iov[iovlen++] = (struct iovec){ &rta_proto, sizeof(rta_proto)}; + iov[iovlen++] = (struct iovec){ &proto, sizeof(proto)}; + req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto)); + } + + struct msghdr msg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = iov, + .msg_iovlen = iovlen + }; + + for (;;) { + if (sendmsg(fd, &msg, 0) < 0) { + if (errno == EINTR) + continue; + die_perror("sendmsg"); + } + break; + } +} + +static void parse_rtattr_flags(struct rtattr *tb[], int max, struct rtattr *rta, + int len, unsigned short flags) +{ + unsigned short type; + + memset(tb, 0, sizeof(struct rtattr *) * (max + 1)); + while (RTA_OK(rta, len)) { + type = rta->rta_type & ~flags; + if (type <= max && !tb[type]) + tb[type] = rta; + rta = RTA_NEXT(rta, len); + } +} + +static void print_info_msg(struct mptcp_info *info) +{ + printf("Token & Flags\n"); + printf("token: %x\n", info->mptcpi_token); + printf("flags: %x\n", info->mptcpi_flags); + printf("csum_enabled: %u\n", info->mptcpi_csum_enabled); + + printf("\nBasic Info\n"); + printf("subflows: %u\n", info->mptcpi_subflows); + printf("subflows_max: %u\n", info->mptcpi_subflows_max); + printf("subflows_total: %u\n", info->mptcpi_subflows_total); + printf("local_addr_used: %u\n", info->mptcpi_local_addr_used); + printf("local_addr_max: %u\n", info->mptcpi_local_addr_max); + printf("add_addr_signal: %u\n", info->mptcpi_add_addr_signal); + printf("add_addr_accepted: %u\n", info->mptcpi_add_addr_accepted); + printf("add_addr_signal_max: %u\n", info->mptcpi_add_addr_signal_max); + printf("add_addr_accepted_max: %u\n", info->mptcpi_add_addr_accepted_max); + + printf("\nTransmission Info\n"); + printf("write_seq: %llu\n", info->mptcpi_write_seq); + printf("snd_una: %llu\n", info->mptcpi_snd_una); + printf("rcv_nxt: %llu\n", info->mptcpi_rcv_nxt); + printf("last_data_sent: %u\n", info->mptcpi_last_data_sent); + printf("last_data_recv: %u\n", info->mptcpi_last_data_recv); + printf("last_ack_recv: %u\n", info->mptcpi_last_ack_recv); + printf("retransmits: %u\n", info->mptcpi_retransmits); + printf("retransmit bytes: %llu\n", info->mptcpi_bytes_retrans); + printf("bytes_sent: %llu\n", info->mptcpi_bytes_sent); + printf("bytes_received: %llu\n", info->mptcpi_bytes_received); + printf("bytes_acked: %llu\n", info->mptcpi_bytes_acked); +} + +/* + * 'print_subflow_info' is from 'mptcp_subflow_info' + * which is a function in 'misc/ss.c' of iproute2. + */ +static void print_subflow_info(struct rtattr *tb[]) +{ + u_int32_t flags = 0; + + printf("It's a mptcp subflow, the subflow info:\n"); + if (tb[MPTCP_SUBFLOW_ATTR_FLAGS]) { + char caps[32 + 1] = { 0 }, *cap = &caps[0]; + + flags = rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_FLAGS]); + + if (flags & MPTCP_SUBFLOW_FLAG_MCAP_REM) + *cap++ = 'M'; + if (flags & MPTCP_SUBFLOW_FLAG_MCAP_LOC) + *cap++ = 'm'; + if (flags & MPTCP_SUBFLOW_FLAG_JOIN_REM) + *cap++ = 'J'; + if (flags & MPTCP_SUBFLOW_FLAG_JOIN_LOC) + *cap++ = 'j'; + if (flags & MPTCP_SUBFLOW_FLAG_BKUP_REM) + *cap++ = 'B'; + if (flags & MPTCP_SUBFLOW_FLAG_BKUP_LOC) + *cap++ = 'b'; + if (flags & MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED) + *cap++ = 'e'; + if (flags & MPTCP_SUBFLOW_FLAG_CONNECTED) + *cap++ = 'c'; + if (flags & MPTCP_SUBFLOW_FLAG_MAPVALID) + *cap++ = 'v'; + + if (flags) + printf(" flags:%s", caps); + } + if (tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM] && + tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC] && + tb[MPTCP_SUBFLOW_ATTR_ID_REM] && + tb[MPTCP_SUBFLOW_ATTR_ID_LOC]) + printf(" token:%04x(id:%u)/%04x(id:%u)", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM]), + rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_REM]), + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC]), + rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_LOC])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ]) + printf(" seq:%llu", + rta_getattr(__u64, tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ]) + printf(" sfseq:%u", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ])); + if (tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET]) + printf(" ssnoff:%u", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET])); + if (tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN]) + printf(" maplen:%u", + rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN])); + printf("\n"); +} + +static void parse_nlmsg(struct nlmsghdr *nlh, __u32 proto) +{ + struct inet_diag_msg *r = NLMSG_DATA(nlh); + struct rtattr *tb[INET_DIAG_MAX + 1]; + + parse_rtattr_flags(tb, INET_DIAG_MAX, (struct rtattr *)(r + 1), + nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)), + NLA_F_NESTED); + + if (proto == IPPROTO_MPTCP && tb[INET_DIAG_INFO]) { + int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]); + struct mptcp_info *info; + + /* workaround fort older kernels with less fields */ + if (len < sizeof(*info)) { + info = alloca(sizeof(*info)); + memcpy(info, RTA_DATA(tb[INET_DIAG_INFO]), len); + memset((char *)info + len, 0, sizeof(*info) - len); + } else { + info = RTA_DATA(tb[INET_DIAG_INFO]); + } + print_info_msg(info); + } + if (proto == IPPROTO_TCP && tb[INET_DIAG_ULP_INFO]) { + struct rtattr *ulpinfo[INET_ULP_INFO_MAX + 1] = { 0 }; + + parse_rtattr_nested(ulpinfo, INET_ULP_INFO_MAX, + tb[INET_DIAG_ULP_INFO]); + + if (ulpinfo[INET_ULP_INFO_MPTCP]) { + struct rtattr *sfinfo[MPTCP_SUBFLOW_ATTR_MAX + 1] = { 0 }; + + parse_rtattr_nested(sfinfo, MPTCP_SUBFLOW_ATTR_MAX, + ulpinfo[INET_ULP_INFO_MPTCP]); + print_subflow_info(sfinfo); + } else { + printf("It's a normal TCP!\n"); + } + } +} + +static void recv_nlmsg(int fd, __u32 proto) +{ + char rcv_buff[8192]; + struct nlmsghdr *nlh = (struct nlmsghdr *)rcv_buff; + struct sockaddr_nl rcv_nladdr = { + .nl_family = AF_NETLINK + }; + struct iovec rcv_iov = { + .iov_base = rcv_buff, + .iov_len = sizeof(rcv_buff) + }; + struct msghdr rcv_msg = { + .msg_name = &rcv_nladdr, + .msg_namelen = sizeof(rcv_nladdr), + .msg_iov = &rcv_iov, + .msg_iovlen = 1 + }; + int len; + + len = recvmsg(fd, &rcv_msg, 0); + + while (NLMSG_OK(nlh, len)) { + if (nlh->nlmsg_type == NLMSG_DONE) { + printf("NLMSG_DONE\n"); + break; + } else if (nlh->nlmsg_type == NLMSG_ERROR) { + struct nlmsgerr *err; + + err = (struct nlmsgerr *)NLMSG_DATA(nlh); + printf("Error %d:%s\n", + -(err->error), strerror(-(err->error))); + break; + } + parse_nlmsg(nlh, proto); + nlh = NLMSG_NEXT(nlh, len); + } +} + +static void get_mptcpinfo(__u32 token) +{ + struct inet_diag_req_v2 r = { + .sdiag_family = AF_INET, + /* Real proto is set via INET_DIAG_REQ_PROTOCOL */ + .sdiag_protocol = IPPROTO_TCP, + .idiag_ext = 1 << (INET_DIAG_INFO - 1), + .id.idiag_cookie[0] = token, + }; + __u32 proto = IPPROTO_MPTCP; + int fd; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + if (fd < 0) + die_perror("Netlink socket"); + + send_query(fd, &r, proto); + recv_nlmsg(fd, proto); + + close(fd); +} + +static void get_subflow_info(char *subflow_addrs) +{ + struct inet_diag_req_v2 r = { + .sdiag_family = AF_INET, + .sdiag_protocol = IPPROTO_TCP, + .idiag_ext = 1 << (INET_DIAG_INFO - 1), + .id.idiag_cookie[0] = INET_DIAG_NOCOOKIE, + .id.idiag_cookie[1] = INET_DIAG_NOCOOKIE, + }; + char saddr[64], daddr[64]; + int sport, dport; + int ret; + int fd; + + ret = sscanf(subflow_addrs, "%[^:]:%d %[^:]:%d", saddr, &sport, daddr, &dport); + if (ret != 4) + die_perror("IP PORT Pairs has style problems!"); + + printf("%s:%d -> %s:%d\n", saddr, sport, daddr, dport); + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG); + if (fd < 0) + die_perror("Netlink socket"); + + r.id.idiag_sport = htons(sport); + r.id.idiag_dport = htons(dport); + + inet_pton(AF_INET, saddr, &r.id.idiag_src); + inet_pton(AF_INET, daddr, &r.id.idiag_dst); + send_query(fd, &r, IPPROTO_TCP); + recv_nlmsg(fd, IPPROTO_TCP); +} + +static void parse_opts(int argc, char **argv, struct params *p) +{ + int c; + + if (argc < 2) + die_usage(1); + + while ((c = getopt(argc, argv, "ht:s:")) != -1) { + switch (c) { + case 'h': + die_usage(0); + break; + case 't': + sscanf(optarg, "%x", &p->target_token); + break; + case 's': + strncpy(p->subflow_addrs, optarg, + sizeof(p->subflow_addrs) - 1); + break; + default: + die_usage(1); + break; + } + } +} + +int main(int argc, char *argv[]) +{ + struct params p = { 0 }; + + parse_opts(argc, argv, &p); + + if (p.target_token) + get_mptcpinfo(p.target_token); + + if (p.subflow_addrs[0] != '\0') + get_subflow_info(p.subflow_addrs); + + return 0; +} + diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 218aac467321..8e8f6441ad8b 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -72,13 +72,22 @@ static const char *getxinfo_strerr(int err) } static void xgetaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, + struct addrinfo *hints, struct addrinfo **res) { - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { - const char *errstr = getxinfo_strerr(err); + const char *errstr; + + if (err == EAI_SOCKTYPE) { + hints->ai_protocol = IPPROTO_TCP; + goto again; + } + + errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", node ? node : "", service ? service : "", errstr); @@ -91,7 +100,7 @@ static int sock_listen_mptcp(const char * const listenaddr, { int sock = -1; struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, .ai_flags = AI_PASSIVE | AI_NUMERICHOST }; @@ -136,7 +145,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, const char * const port, int proto) { struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; @@ -493,6 +502,7 @@ static int server(int unixfd) process_one_client(r, unixfd); + close(fd); return 0; } @@ -571,8 +581,12 @@ int main(int argc, char *argv[]) die_perror("pipe"); s = xfork(); - if (s == 0) - return server(unixfds[1]); + if (s == 0) { + close(unixfds[0]); + ret = server(unixfds[1]); + close(unixfds[1]); + return ret; + } close(unixfds[1]); diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 108aeeb84ef1..b2e6e548f796 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -8,7 +8,7 @@ # ShellCheck incorrectly believes that most of the code here is unreachable # because it's invoked by variable name, see how the "tests" array is used -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 . "$(dirname "${0}")/mptcp_lib.sh" @@ -23,6 +23,7 @@ tmpfile="" cout="" err="" capout="" +cappid="" ns1="" ns2="" iptables="iptables" @@ -61,6 +62,29 @@ unset sflags unset fastclose unset fullmesh unset speed +unset bind_addr +unset join_syn_rej +unset join_csum_ns1 +unset join_csum_ns2 +unset join_fail_nr +unset join_rst_nr +unset join_infi_nr +unset join_corrupted_pkts +unset join_syn_tx +unset join_create_err +unset join_bind_err +unset join_connect_err + +unset fb_ns1 +unset fb_ns2 +unset fb_infinite_map_tx +unset fb_dss_corruption +unset fb_simult_conn +unset fb_mpc_passive +unset fb_mpc_active +unset fb_mpc_data +unset fb_md5_sig +unset fb_dss # generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) || # (ip6 && (ip6[74] & 0xf0) == 0x30)'" @@ -196,6 +220,22 @@ print_skip() mptcp_lib_pr_skip "${@}" } +# $1: check name; $2: rc +print_results() +{ + local check="${1}" + local rc=${2} + + print_check "${check}" + if [ ${rc} = ${KSFT_PASS} ]; then + print_ok + elif [ ${rc} = ${KSFT_SKIP} ]; then + print_skip + else + fail_test "see above" + fi +} + # [ $1: fail msg ] mark_as_skipped() { @@ -319,6 +359,7 @@ reset_with_add_addr_timeout() tables="${ip6tables}" fi + # set a maximum, to avoid too long timeout with exponential backoff ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \ @@ -337,7 +378,7 @@ reset_with_checksum() local ns1_enable=$1 local ns2_enable=$2 - reset "checksum test ${1} ${2}" || return 1 + reset "checksum test ${ns1_enable} ${ns2_enable}" || return 1 ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable @@ -420,12 +461,17 @@ reset_with_fail() fi } +start_events() +{ + mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid + mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid +} + reset_with_events() { reset "${1}" || return 1 - mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid - mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid + start_events } reset_with_tcp_filter() @@ -436,9 +482,10 @@ reset_with_tcp_filter() local ns="${!1}" local src="${2}" local target="${3}" + local chain="${4:-INPUT}" if ! ip netns exec "${ns}" ${iptables} \ - -A INPUT \ + -A "${chain}" \ -s "${src}" \ -p tcp \ -j "${target}"; then @@ -599,6 +646,27 @@ wait_mpj() done } +wait_ll_ready() +{ + local ns="${1}" + + local i + for i in $(seq 50); do + ip -n "${ns}" -6 addr show scope link | grep "inet6 fe80" | + grep -qw "tentative" || break + sleep 0.1 + done +} + +get_ll_addr() +{ + local ns="${1}" + local iface="${2}" + + ip -n "${ns}" -6 addr show dev "${iface}" scope link | + grep "inet6 fe80" | sed 's#.*\(fe80::.*\)/.*#\1#' +} + kill_events_pids() { mptcp_lib_kill_wait $evts_ns1_pid @@ -661,7 +729,7 @@ pm_nl_check_endpoint() done if [ -z "${id}" ]; then - test_fail "bad test - missing endpoint id" + fail_test "bad test - missing endpoint id" return fi @@ -833,7 +901,7 @@ chk_cestab_nr() local cestab=$2 local count - print_check "cestab $cestab" + print_check "currently established: $cestab" count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab") if [ -z "$count" ]; then print_skip @@ -855,45 +923,68 @@ check_cestab() fi } -do_transfer() +cond_start_capture() { - local listener_ns="$1" - local connector_ns="$2" - local cl_proto="$3" - local srv_proto="$4" - local connect_addr="$5" - - local port=$((10000 + MPTCP_LIB_TEST_COUNTER - 1)) - local cappid - local FAILING_LINKS=${FAILING_LINKS:-""} - local fastclose=${fastclose:-""} - local speed=${speed:-"fast"} + local ns="$1" - :> "$cout" - :> "$sout" :> "$capout" if $capture; then - local capuser - if [ -z $SUDO_USER ] ; then + local capuser capfile + if [ -z $SUDO_USER ]; then capuser="" else capuser="-Z $SUDO_USER" fi - capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "${listener_ns}") + capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "$ns") echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile" - ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & + ip netns exec "$ns" tcpdump -i any -s 65535 -B 32768 $capuser -w "$capfile" > "$capout" 2>&1 & cappid=$! sleep 1 fi +} + +cond_stop_capture() +{ + if $capture; then + sleep 1 + kill $cappid + cat "$capout" + fi +} + +get_port() +{ + echo "$((10000 + MPTCP_LIB_TEST_COUNTER - 1))" +} + +do_transfer() +{ + local listener_ns="$1" + local connector_ns="$2" + local cl_proto="$3" + local srv_proto="$4" + local connect_addr="$5" + local port + + local FAILING_LINKS=${FAILING_LINKS:-""} + local fastclose=${fastclose:-""} + local speed=${speed:-"fast"} + local bind_addr=${bind_addr:-"::"} + local listener_in="${sin}" + local connector_in="${cin}" + port=$(get_port) + + :> "$cout" + :> "$sout" - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat -n - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat -n + cond_start_capture ${listener_ns} + + mptcp_lib_nstat_init "${listener_ns}" + mptcp_lib_nstat_init "${connector_ns}" local extra_args if [ $speed = "fast" ]; then @@ -931,42 +1022,40 @@ do_transfer() extra_srv_args="$extra_args $extra_srv_args" if [ "$test_linkfail" -gt 1 ];then - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_srv_args "::" < "$sinfail" > "$sout" & - else - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \ - $extra_srv_args "::" < "$sin" > "$sout" & + listener_in="${sinfail}" fi + ip netns exec ${listener_ns} \ + ./mptcp_connect -t ${timeout_poll} -l -p ${port} -s ${srv_proto} \ + ${extra_srv_args} "${bind_addr}" < "${listener_in}" > "${sout}" & local spid=$! mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" extra_cl_args="$extra_args $extra_cl_args" if [ "$test_linkfail" -eq 0 ];then - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_cl_args $connect_addr < "$cin" > "$cout" & + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr < "$cin" > "$cout" & elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then + connector_in="${cinsent}" ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \ tee "$cinsent" | \ - timeout ${timeout_test} \ ip netns exec ${connector_ns} \ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ $extra_cl_args $connect_addr > "$cout" & else + connector_in="${cinsent}" tee "$cinsent" < "$cinfail" | \ - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ - $extra_cl_args $connect_addr > "$cout" & + ip netns exec ${connector_ns} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \ + $extra_cl_args $connect_addr > "$cout" & fi local cpid=$! + mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \ + "${connector_ns}" "${port}" "${cpid}" "${spid}" & + local timeout_pid=$! + pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr check_cestab $listener_ns $connector_ns @@ -975,49 +1064,29 @@ do_transfer() wait $spid local rets=$? - if $capture; then - sleep 1 - kill $cappid + if kill -0 $timeout_pid; then + # Finished before the timeout: kill the background job + mptcp_lib_kill_group_wait $timeout_pid + timeout_pid=0 fi - NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \ - nstat | grep Tcp > /tmp/${listener_ns}.out - NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \ - nstat | grep Tcp > /tmp/${connector_ns}.out + cond_stop_capture - if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then - fail_test "client exit code $retc, server $rets" - echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" - cat /tmp/${listener_ns}.out - echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" - cat /tmp/${connector_ns}.out + mptcp_lib_nstat_get "${listener_ns}" + mptcp_lib_nstat_get "${connector_ns}" - cat "$capout" + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then + fail_test "client exit code $retc, server $rets" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" return 1 fi - if [ "$test_linkfail" -gt 1 ];then - check_transfer $sinfail $cout "file received by client" $trunc_size - else - check_transfer $sin $cout "file received by client" $trunc_size - fi + check_transfer $listener_in $cout "file received by client" $trunc_size retc=$? - if [ "$test_linkfail" -eq 0 ];then - check_transfer $cin $sout "file received by server" $trunc_size - else - check_transfer $cinsent $sout "file received by server" $trunc_size - fi + check_transfer $connector_in $sout "file received by server" $trunc_size rets=$? - if [ $retc -eq 0 ] && [ $rets -eq 0 ];then - cat "$capout" - return 0 - fi - - cat "$capout" - return 1 + [ $retc -eq 0 ] && [ $rets -eq 0 ] } make_file() @@ -1083,12 +1152,20 @@ run_tests() do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} } +_dump_stats() +{ + local ns="${1}" + local side="${2}" + + mptcp_lib_print_err "${side} ns stats (${ns2})" + mptcp_lib_pr_nstat "${ns}" + echo +} + dump_stats() { - echo Server ns stats - ip netns exec $ns1 nstat -as | grep Tcp - echo Client ns stats - ip netns exec $ns2 nstat -as | grep Tcp + _dump_stats "${ns1}" "Server" + _dump_stats "${ns2}" "Client" } chk_csum_nr() @@ -1109,28 +1186,29 @@ chk_csum_nr() csum_ns2=${csum_ns2:1} fi - print_check "sum" + print_check "checksum server" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr") - if [ "$count" != "$csum_ns1" ]; then + if [ -n "$count" ] && [ "$count" != "$csum_ns1" ]; then extra_msg+=" ns1=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } || - { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then + { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then fail_test "got $count data checksum error[s] expected $csum_ns1" else print_ok fi - print_check "csum" + + print_check "checksum client" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr") - if [ "$count" != "$csum_ns2" ]; then + if [ -n "$count" ] && [ "$count" != "$csum_ns2" ]; then extra_msg+=" ns2=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } || - { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then + { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then fail_test "got $count data checksum error[s] expected $csum_ns2" else print_ok @@ -1147,6 +1225,8 @@ chk_fail_nr() local count local ns_tx=$ns1 local ns_rx=$ns2 + local tx="server" + local rx="client" local extra_msg="" local allow_tx_lost=0 local allow_rx_lost=0 @@ -1154,7 +1234,8 @@ chk_fail_nr() if [[ $ns_invert = "invert" ]]; then ns_tx=$ns2 ns_rx=$ns1 - extra_msg="invert" + tx="client" + rx="server" fi if [[ "${fail_tx}" = "-"* ]]; then @@ -1166,29 +1247,29 @@ chk_fail_nr() fail_rx=${fail_rx:1} fi - print_check "ftx" + print_check "fail tx ${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx") - if [ "$count" != "$fail_tx" ]; then - extra_msg+=",tx=$count" + if [ -n "$count" ] && [ "$count" != "$fail_tx" ]; then + extra_msg+=" tx=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } || - { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then + { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then fail_test "got $count MP_FAIL[s] TX expected $fail_tx" else print_ok fi - print_check "failrx" + print_check "fail rx ${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx") - if [ "$count" != "$fail_rx" ]; then - extra_msg+=",rx=$count" + if [ -n "$count" ] && [ "$count" != "$fail_rx" ]; then + extra_msg+=" rx=$count" fi if [ -z "$count" ]; then print_skip elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } || - { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then + { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then fail_test "got $count MP_FAIL[s] RX expected $fail_rx" else print_ok @@ -1205,37 +1286,35 @@ chk_fclose_nr() local count local ns_tx=$ns2 local ns_rx=$ns1 - local extra_msg="" + local tx="client" + local rx="server" if [[ $ns_invert = "invert" ]]; then ns_tx=$ns1 ns_rx=$ns2 - extra_msg="invert" + tx="server" + rx="client" fi - print_check "ctx" + print_check "fast close tx ${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_tx" ]; then - extra_msg+=",tx=$count" fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx" else print_ok fi - print_check "fclzrx" + print_check "fast close rx ${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_rx" ]; then - extra_msg+=",rx=$count" fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx" else print_ok fi - - print_info "$extra_msg" } chk_rst_nr() @@ -1246,15 +1325,17 @@ chk_rst_nr() local count local ns_tx=$ns1 local ns_rx=$ns2 - local extra_msg="" + local tx="server" + local rx="client" if [[ $ns_invert = "invert" ]]; then ns_tx=$ns2 ns_rx=$ns1 - extra_msg="invert" + tx="client" + rx="server" fi - print_check "rtx" + print_check "reset tx ${tx}" count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx") if [ -z "$count" ]; then print_skip @@ -1266,7 +1347,7 @@ chk_rst_nr() print_ok fi - print_check "rstrx" + print_check "reset rx ${rx}" count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx") if [ -z "$count" ]; then print_skip @@ -1277,8 +1358,6 @@ chk_rst_nr() else print_ok fi - - print_info "$extra_msg" } chk_infi_nr() @@ -1287,7 +1366,7 @@ chk_infi_nr() local infi_rx=$2 local count - print_check "itx" + print_check "infi tx client" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx") if [ -z "$count" ]; then print_skip @@ -1297,7 +1376,7 @@ chk_infi_nr() print_ok fi - print_check "infirx" + print_check "infi rx server" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx") if [ -z "$count" ]; then print_skip @@ -1308,17 +1387,176 @@ chk_infi_nr() fi } +chk_join_tx_nr() +{ + local syn_tx=${join_syn_tx:-0} + local create=${join_create_err:-0} + local bind=${join_bind_err:-0} + local connect=${join_connect_err:-0} + local rc=${KSFT_PASS} + local count + + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTx") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$syn_tx" ]; then + rc=${KSFT_FAIL} + print_check "syn tx" + fail_test "got $count JOIN[s] syn tx expected $syn_tx" + fi + + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxCreatSkErr") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$create" ]; then + rc=${KSFT_FAIL} + print_check "syn tx create socket error" + fail_test "got $count JOIN[s] syn tx create socket error expected $create" + fi + + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxBindErr") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$bind" ]; then + rc=${KSFT_FAIL} + print_check "syn tx bind error" + fail_test "got $count JOIN[s] syn tx bind error expected $bind" + fi + + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynTxConnectErr") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$connect" ]; then + rc=${KSFT_FAIL} + print_check "syn tx connect error" + fail_test "got $count JOIN[s] syn tx connect error expected $connect" + fi + + print_results "join Tx" ${rc} +} + +chk_fallback_nr() +{ + local infinite_map_tx=${fb_infinite_map_tx:-0} + local dss_corruption=${fb_dss_corruption:-0} + local simult_conn=${fb_simult_conn:-0} + local mpc_passive=${fb_mpc_passive:-0} + local mpc_active=${fb_mpc_active:-0} + local mpc_data=${fb_mpc_data:-0} + local md5_sig=${fb_md5_sig:-0} + local dss=${fb_dss:-0} + local rc=${KSFT_PASS} + local ns=$1 + local count + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtInfiniteMapTx") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$infinite_map_tx" ]; then + rc=${KSFT_FAIL} + print_check "$ns infinite map tx fallback" + fail_test "got $count infinite map tx fallback[s] in $ns expected $infinite_map_tx" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDSSCorruptionFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$dss_corruption" ]; then + rc=${KSFT_FAIL} + print_check "$ns dss corruption fallback" + fail_test "got $count dss corruption fallback[s] in $ns expected $dss_corruption" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtSimultConnectFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$simult_conn" ]; then + rc=${KSFT_FAIL} + print_check "$ns simult conn fallback" + fail_test "got $count simult conn fallback[s] in $ns expected $simult_conn" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackACK") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$mpc_passive" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc passive fallback" + fail_test "got $count mpc passive fallback[s] in $ns expected $mpc_passive" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableFallbackSYNACK") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$mpc_active" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc active fallback" + fail_test "got $count mpc active fallback[s] in $ns expected $mpc_active" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMPCapableDataFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$mpc_data" ]; then + rc=${KSFT_FAIL} + print_check "$ns mpc data fallback" + fail_test "got $count mpc data fallback[s] in $ns expected $mpc_data" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtMD5SigFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$md5_sig" ]; then + rc=${KSFT_FAIL} + print_check "$ns MD5 Sig fallback" + fail_test "got $count MD5 Sig fallback[s] in $ns expected $md5_sig" + fi + + count=$(mptcp_lib_get_counter ${!ns} "MPTcpExtDssFallback") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$dss" ]; then + rc=${KSFT_FAIL} + print_check "$ns dss fallback" + fail_test "got $count dss fallback[s] in $ns expected $dss" + fi + + return $rc +} + +chk_fallback_nr_all() +{ + local netns=("ns1" "ns2") + local fb_ns=("fb_ns1" "fb_ns2") + local rc=${KSFT_PASS} + + for i in 0 1; do + if [ -n "${!fb_ns[i]}" ]; then + eval "${!fb_ns[i]}" \ + chk_fallback_nr ${netns[i]} || rc=${?} + else + chk_fallback_nr ${netns[i]} || rc=${?} + fi + done + + if [ "${rc}" != "${KSFT_PASS}" ]; then + print_results "fallback" ${rc} + fi +} + chk_join_nr() { local syn_nr=$1 local syn_ack_nr=$2 local ack_nr=$3 - local csum_ns1=${4:-0} - local csum_ns2=${5:-0} - local fail_nr=${6:-0} - local rst_nr=${7:-0} - local infi_nr=${8:-0} - local corrupted_pkts=${9:-0} + local syn_rej=${join_syn_rej:-0} + local csum_ns1=${join_csum_ns1:-0} + local csum_ns2=${join_csum_ns2:-0} + local fail_nr=${join_fail_nr:-0} + local rst_nr=${join_rst_nr:-0} + local infi_nr=${join_infi_nr:-0} + local corrupted_pkts=${join_corrupted_pkts:-0} + local rc=${KSFT_PASS} local count local with_cookie @@ -1326,43 +1564,73 @@ chk_join_nr() print_info "${corrupted_pkts} corrupted pkts" fi - print_check "syn" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx") if [ -z "$count" ]; then - print_skip + rc=${KSFT_SKIP} elif [ "$count" != "$syn_nr" ]; then - fail_test "got $count JOIN[s] syn expected $syn_nr" - else - print_ok + rc=${KSFT_FAIL} + print_check "syn rx" + fail_test "got $count JOIN[s] syn rx expected $syn_nr" fi - print_check "synack" with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies) count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") if [ -z "$count" ]; then - print_skip + rc=${KSFT_SKIP} elif [ "$count" != "$syn_ack_nr" ]; then # simult connections exceeding the limit with cookie enabled could go up to # synack validation as the conn limit can be enforced reliably only after # the subflow creation - if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then - print_ok - else - fail_test "got $count JOIN[s] synack expected $syn_ack_nr" + if [ "$with_cookie" != 2 ] || [ "$count" -le "$syn_ack_nr" ] || [ "$count" -gt "$syn_nr" ]; then + rc=${KSFT_FAIL} + print_check "synack rx" + fail_test "got $count JOIN[s] synack rx expected $syn_ack_nr" fi - else - print_ok fi - print_check "ack" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckHMacFailure") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "0" ]; then + rc=${KSFT_FAIL} + print_check "synack HMAC" + fail_test "got $count JOIN[s] synack HMAC failure expected 0" + fi + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") if [ -z "$count" ]; then - print_skip + rc=${KSFT_SKIP} elif [ "$count" != "$ack_nr" ]; then - fail_test "got $count JOIN[s] ack expected $ack_nr" - else - print_ok + rc=${KSFT_FAIL} + print_check "ack rx" + fail_test "got $count JOIN[s] ack rx expected $ack_nr" fi + + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckHMacFailure") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "0" ]; then + rc=${KSFT_FAIL} + print_check "ack HMAC" + fail_test "got $count JOIN[s] ack HMAC failure expected 0" + fi + + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected") + if [ -z "$count" ]; then + rc=${KSFT_SKIP} + elif [ "$count" != "$syn_rej" ]; then + rc=${KSFT_FAIL} + print_check "syn rejected" + fail_test "got $count JOIN[s] syn rejected expected $syn_rej" + fi + + print_results "join Rx" ${rc} + + join_syn_tx="${join_syn_tx:-${syn_nr}}" \ + chk_join_tx_nr + + chk_fallback_nr_all + if $validate_checksum; then chk_csum_nr $csum_ns1 $csum_ns2 chk_fail_nr $fail_nr $fail_nr @@ -1415,30 +1683,39 @@ chk_add_nr() local add_nr=$1 local echo_nr=$2 local port_nr=${3:-0} - local syn_nr=${4:-$port_nr} - local syn_ack_nr=${5:-$port_nr} - local ack_nr=${6:-$port_nr} - local mis_syn_nr=${7:-0} - local mis_ack_nr=${8:-0} + local ns_invert=${4:-""} + local syn_nr=$port_nr + local syn_ack_nr=$port_nr + local ack_nr=$port_nr + local mis_syn_nr=0 + local mis_ack_nr=0 + local ns_tx=$ns1 + local ns_rx=$ns2 + local tx="" + local rx="" local count - local timeout - timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) + if [[ $ns_invert = "invert" ]]; then + ns_tx=$ns2 + ns_rx=$ns1 + tx=" client" + rx=" server" + fi - print_check "add" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr") + print_check "add addr rx${rx}" + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip - # if the test configured a short timeout tolerate greater then expected - # add addrs options, due to retransmissions - elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then + # Tolerate more ADD_ADDR then expected (if any), due to retransmissions + elif [ "$count" != "$add_nr" ] && + { [ "$add_nr" -eq 0 ] || [ "$count" -lt "$add_nr" ]; }; then fail_test "got $count ADD_ADDR[s] expected $add_nr" else print_ok fi - print_check "echo" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd") + print_check "add addr echo rx${tx}" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtEchoAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$echo_nr" ]; then @@ -1448,8 +1725,8 @@ chk_add_nr() fi if [ $port_nr -gt 0 ]; then - print_check "pt" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd") + print_check "add addr rx with port${rx}" + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtPortAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$port_nr" ]; then @@ -1458,8 +1735,8 @@ chk_add_nr() print_ok fi - print_check "syn" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") + print_check "syn rx port${tx}" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_nr" ]; then @@ -1469,8 +1746,8 @@ chk_add_nr() print_ok fi - print_check "synack" - count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") + print_check "synack rx port${rx}" + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPJoinPortSynAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_ack_nr" ]; then @@ -1480,8 +1757,8 @@ chk_add_nr() print_ok fi - print_check "ack" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") + print_check "ack rx port${tx}" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPJoinPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$ack_nr" ]; then @@ -1491,8 +1768,8 @@ chk_add_nr() print_ok fi - print_check "syn" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx") + print_check "syn rx port mismatch${tx}" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_syn_nr" ]; then @@ -1502,8 +1779,8 @@ chk_add_nr() print_ok fi - print_check "ack" - count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx") + print_check "ack rx port mismatch${tx}" + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMismatchPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_ack_nr" ]; then @@ -1519,24 +1796,21 @@ chk_add_tx_nr() { local add_tx_nr=$1 local echo_tx_nr=$2 - local timeout local count - timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) - - print_check "add TX" + print_check "add addr tx" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx") if [ -z "$count" ]; then print_skip - # if the test configured a short timeout tolerate greater then expected - # add addrs options, due to retransmissions - elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then + # Tolerate more ADD_ADDR then expected (if any), due to retransmissions + elif [ "$count" != "$add_tx_nr" ] && + { [ "$add_tx_nr" -eq 0 ] || [ "$count" -lt "$add_tx_nr" ]; }; then fail_test "got $count ADD_ADDR[s] TX, expected $add_tx_nr" else print_ok fi - print_check "echo TX" + print_check "add addr echo tx" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx") if [ -z "$count" ]; then print_skip @@ -1556,6 +1830,8 @@ chk_rm_nr() local count local addr_ns=$ns1 local subflow_ns=$ns2 + local addr="server" + local subflow="client" local extra_msg="" shift 2 @@ -1565,16 +1841,14 @@ chk_rm_nr() shift done - if [ -z $invert ]; then - addr_ns=$ns1 - subflow_ns=$ns2 - elif [ $invert = "true" ]; then + if [ "$invert" = "true" ]; then addr_ns=$ns2 subflow_ns=$ns1 - extra_msg="invert" + addr="client" + subflow="server" fi - print_check "rm" + print_check "rm addr rx ${addr}" count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr") if [ -z "$count" ]; then print_skip @@ -1584,7 +1858,7 @@ chk_rm_nr() print_ok fi - print_check "rmsf" + print_check "rm subflow ${subflow}" count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow") if [ -z "$count" ]; then print_skip @@ -1598,7 +1872,7 @@ chk_rm_nr() count=$((count + cnt)) if [ "$count" != "$rm_subflow_nr" ]; then suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" - extra_msg+=" simult" + extra_msg="simult" fi if [ $count -ge "$rm_subflow_nr" ] && \ [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then @@ -1619,7 +1893,7 @@ chk_rm_tx_nr() { local rm_addr_tx_nr=$1 - print_check "rm TX" + print_check "rm addr tx client" count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx") if [ -z "$count" ]; then print_skip @@ -1634,9 +1908,11 @@ chk_prio_nr() { local mp_prio_nr_tx=$1 local mp_prio_nr_rx=$2 + local mpj_syn=$3 + local mpj_syn_ack=$4 local count - print_check "ptx" + print_check "mp_prio tx server" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx") if [ -z "$count" ]; then print_skip @@ -1646,7 +1922,7 @@ chk_prio_nr() print_ok fi - print_check "prx" + print_check "mp_prio rx client" count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx") if [ -z "$count" ]; then print_skip @@ -1655,6 +1931,26 @@ chk_prio_nr() else print_ok fi + + print_check "syn backup" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynBackupRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mpj_syn" ]; then + fail_test "got $count JOIN[s] syn with Backup expected $mpj_syn" + else + print_ok + fi + + print_check "synack backup" + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckBackupRx") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$mpj_syn_ack" ]; then + fail_test "got $count JOIN[s] synack with Backup expected $mpj_syn_ack" + else + print_ok + fi } chk_subflow_nr() @@ -1819,7 +2115,8 @@ subflows_tests() pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 0 + join_syn_rej=1 \ + chk_join_nr 1 1 0 fi # subflow @@ -1848,7 +2145,8 @@ subflows_tests() pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 2 2 1 + join_syn_rej=1 \ + chk_join_nr 2 2 1 fi # single subflow, dev @@ -1869,9 +2167,11 @@ subflows_error_tests() pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.12.2 flags subflow speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 + join_bind_err=1 \ + chk_join_nr 0 0 0 fi # multiple subflows, with subflow creation error @@ -1883,7 +2183,8 @@ subflows_error_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=2 \ + chk_join_nr 1 1 1 fi # multiple subflows, with subflow timeout on MPJ @@ -1895,7 +2196,8 @@ subflows_error_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=2 \ + chk_join_nr 1 1 1 fi # multiple subflows, check that the endpoint corresponding to @@ -1916,7 +2218,8 @@ subflows_error_tests() # additional subflow could be created only if the PM select # the later endpoint, skipping the already used one - chk_join_nr 1 1 1 + join_syn_tx=2 \ + chk_join_nr 1 1 1 fi } @@ -1955,6 +2258,21 @@ signal_address_tests() chk_add_nr 1 1 fi + # uncommon: subflow and signal flags on the same endpoint + # or because the user wrongly picked both, but still expects the client + # to create additional subflows + if reset "subflow and signal together"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns2 10.0.3.2 flags signal,subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 0 invert # only initiated by ns2 + chk_add_nr 0 0 0 # none initiated by ns1 + chk_rst_nr 0 0 invert # no RST sent by the client + chk_rst_nr 0 0 # no RST sent by the server + fi + # accept and use add_addr with additional subflows if reset "multiple subflows and signal"; then pm_nl_set_limits $ns1 0 3 @@ -1974,7 +2292,8 @@ signal_address_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.4.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 chk_add_nr 3 3 fi @@ -1986,8 +2305,10 @@ signal_address_tests() pm_nl_add_endpoint $ns1 10.0.3.1 flags signal pm_nl_add_endpoint $ns1 10.0.14.1 flags signal pm_nl_set_limits $ns2 3 3 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + join_syn_tx=3 \ + chk_join_nr 1 1 1 chk_add_nr 3 3 fi @@ -2023,6 +2344,74 @@ signal_address_tests() fi } +laminar_endp_tests() +{ + # no laminar endpoints: routing rules are used + if reset_with_tcp_filter "without a laminar endpoint" ns1 10.0.2.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + run_tests $ns1 $ns2 10.0.1.1 + join_syn_tx=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + fi + + # laminar endpoints: this endpoint is used + if reset_with_tcp_filter "with a laminar endpoint" ns1 10.0.2.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi + + # laminar endpoints: these endpoints are used + if reset_with_tcp_filter "with multiple laminar endpoints" ns1 10.0.2.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns2 dead:beef:3::2 flags laminar + pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar + pm_nl_add_endpoint $ns2 10.0.4.2 flags laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + chk_add_nr 2 2 + fi + + # laminar endpoints: only one endpoint is used + if reset_with_tcp_filter "single laminar endpoint" ns1 10.0.2.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 2 2 + fi + + # laminar endpoints: subflow and laminar flags + if reset_with_tcp_filter "sublow + laminar endpoints" ns1 10.0.2.2 REJECT && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + pm_nl_set_limits $ns1 0 4 + pm_nl_set_limits $ns2 2 4 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,laminar + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,laminar + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + fi +} + link_failure_tests() { # accept and use add_addr with additional subflows and link loss @@ -2155,7 +2544,8 @@ add_addr_timeout_tests() pm_nl_set_limits $ns2 2 2 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=2 \ + chk_join_nr 1 1 1 chk_add_nr 8 0 fi } @@ -2166,7 +2556,7 @@ remove_tests() if reset "remove single subflow"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns2=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 @@ -2179,8 +2569,8 @@ remove_tests() if reset "remove multiple subflows"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns2=-2 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 @@ -2191,7 +2581,7 @@ remove_tests() # single address, remove if reset "remove single address"; then pm_nl_set_limits $ns1 0 1 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 1 addr_nr_ns1=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2204,9 +2594,9 @@ remove_tests() # subflow and signal, remove if reset "remove subflow and signal"; then pm_nl_set_limits $ns1 0 2 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 2 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 @@ -2218,10 +2608,10 @@ remove_tests() # subflows and signal, remove if reset "remove subflows and signal"; then pm_nl_set_limits $ns1 0 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 3 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2233,9 +2623,9 @@ remove_tests() # addresses remove if reset "remove addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 @@ -2248,14 +2638,15 @@ remove_tests() # invalid addresses remove if reset "remove invalid addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup # broadcast IP: no packet for this address will be received on ns1 - pm_nl_add_endpoint $ns1 224.0.0.1 flags signal - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 224.0.0.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup pm_nl_set_limits $ns2 2 2 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=2 join_connect_err=1 \ + chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert chk_rst_nr 0 0 @@ -2264,10 +2655,10 @@ remove_tests() # subflows and signal, flush if reset "flush subflows and signal"; then pm_nl_set_limits $ns1 0 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 3 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2280,9 +2671,9 @@ remove_tests() if reset "flush subflows"; then pm_nl_set_limits $ns1 3 3 pm_nl_set_limits $ns2 3 3 - pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup id 150 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2299,9 +2690,9 @@ remove_tests() # addresses flush if reset "flush addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2314,13 +2705,14 @@ remove_tests() # invalid addresses flush if reset "flush invalid addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.12.1 flags signal - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.14.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 + join_syn_tx=3 \ + chk_join_nr 1 1 1 chk_add_nr 3 3 chk_rm_nr 3 1 invert chk_rst_nr 0 0 @@ -2584,7 +2976,11 @@ mixed_tests() pm_nl_add_endpoint $ns1 10.0.1.1 flags signal speed=slow \ run_tests $ns1 $ns2 dead:beef:2::1 - chk_join_nr 1 1 1 + if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_fullmesh_max$"; then + chk_join_nr 0 0 0 + else + chk_join_nr 1 1 1 + fi fi # fullmesh still tries to create all the possibly subflows with @@ -2612,33 +3008,46 @@ backup_tests() sflags=nobackup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 fi # single address, backup if reset "single address, backup" && continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup + pm_nl_set_limits $ns2 1 1 + sflags=nobackup speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_prio_nr 1 0 0 1 + fi + + # single address, switch to backup + if reset "single address, switch to backup" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 1 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal pm_nl_set_limits $ns2 1 1 sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 - chk_prio_nr 1 1 + chk_prio_nr 1 1 0 0 fi # single address with port, backup if reset "single address with port, backup" && continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 0 1 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup port 10100 pm_nl_set_limits $ns2 1 1 - sflags=backup speed=slow \ + sflags=nobackup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 - chk_prio_nr 1 1 + chk_prio_nr 1 0 0 1 fi if reset "mpc backup" && @@ -2647,17 +3056,26 @@ backup_tests() speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 0 1 + chk_prio_nr 0 1 0 0 fi if reset "mpc backup both sides" && continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then - pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + pm_nl_add_endpoint $ns1 10.0.1.1 flags signal,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup + + # 10.0.2.2 (non-backup) -> 10.0.1.1 (backup) + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow + # 10.0.1.2 (backup) -> 10.0.2.1 (non-backup) + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + ip -net "$ns2" route add 10.0.2.1 via 10.0.1.1 dev ns2eth1 # force this path + speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - chk_prio_nr 1 1 + chk_join_nr 2 2 2 + chk_prio_nr 1 1 1 1 fi if reset "mpc switch to backup" && @@ -2666,7 +3084,7 @@ backup_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 0 1 + chk_prio_nr 0 1 0 0 fi if reset "mpc switch to backup both sides" && @@ -2676,7 +3094,7 @@ backup_tests() sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 - chk_prio_nr 1 1 + chk_prio_nr 1 1 0 0 fi } @@ -2709,6 +3127,32 @@ verify_listener_events() fail_test } +chk_mpc_endp_attempt() +{ + local retl=$1 + local attempts=$2 + + print_check "Connect" + + if [ ${retl} = 124 ]; then + fail_test "timeout on connect" + elif [ ${retl} = 0 ]; then + fail_test "unexpected successful connect" + else + print_ok + + print_check "Attempts" + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPCapableEndpAttempt") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$attempts" ]; then + fail_test "got ${count} MPC attempt[s] on port-based endpoint, expected ${attempts}" + else + print_ok + fi + fi +} + add_addr_ports_tests() { # signal address with port @@ -2799,6 +3243,149 @@ add_addr_ports_tests() chk_join_nr 2 2 2 chk_add_nr 2 2 2 fi + + if reset "port-based signal endpoint must not accept mpc"; then + local port retl count + port=$(get_port) + + cond_start_capture ${ns1} + pm_nl_add_endpoint ${ns1} 10.0.2.1 flags signal port ${port} + mptcp_lib_wait_local_port_listen ${ns1} ${port} + + timeout 1 ip netns exec ${ns2} \ + ./mptcp_connect -t ${timeout_poll} -p $port -s MPTCP 10.0.2.1 >/dev/null 2>&1 + retl=$? + cond_stop_capture + + chk_mpc_endp_attempt ${retl} 1 + fi +} + +bind_tests() +{ + # bind to one address should not allow extra subflows to other addresses + if reset "bind main address v4, no join v4"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + bind_addr="10.0.1.1" \ + run_tests $ns1 $ns2 10.0.1.1 + join_syn_tx=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + fi + + # bind to one address should not allow extra subflows to other addresses + if reset "bind main address v6, no join v6"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + bind_addr="dead:beef:1::1" \ + run_tests $ns1 $ns2 dead:beef:1::1 + join_syn_tx=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + fi + + # multiple binds to allow extra subflows to other addresses + if reset "multiple bind to allow joins v4"; then + local extra_bind + + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + + # Launching another app listening on a different address + # Note: it could be a totally different app, e.g. nc, socat, ... + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ + -s MPTCP 10.0.2.1 & + extra_bind=$! + + bind_addr="10.0.1.1" \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + + kill ${extra_bind} + fi + + # multiple binds to allow extra subflows to other addresses + if reset "multiple bind to allow joins v6"; then + local extra_bind + + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal + + # Launching another app listening on a different address + # Note: it could be a totally different app, e.g. nc, socat, ... + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ + -s MPTCP dead:beef:2::1 & + extra_bind=$! + + bind_addr="dead:beef:1::1" \ + run_tests $ns1 $ns2 dead:beef:1::1 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + + kill ${extra_bind} + fi + + # multiple binds to allow extra subflows to other addresses: v6 LL case + if reset "multiple bind to allow joins v6 link-local routing"; then + local extra_bind ns1ll1 ns1ll2 + + ns1ll1="$(get_ll_addr $ns1 ns1eth1)" + ns1ll2="$(get_ll_addr $ns1 ns1eth2)" + + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal + + wait_ll_ready $ns1 # to be able to bind + wait_ll_ready $ns2 # also needed to bind on the client side + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ + -s MPTCP "${ns1ll2}%ns1eth2" & + extra_bind=$! + + bind_addr="${ns1ll1}%ns1eth1" \ + run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1" + # it is not possible to connect to the announced LL addr without + # specifying the outgoing interface. + join_connect_err=1 \ + chk_join_nr 0 0 0 + chk_add_nr 1 1 + + kill ${extra_bind} + fi + + # multiple binds to allow extra subflows to v6 LL addresses: laminar + if reset "multiple bind to allow joins v6 link-local laminar" && + continue_if mptcp_lib_kallsyms_has "mptcp_pm_get_endp_laminar_max$"; then + local extra_bind ns1ll1 ns1ll2 ns2ll2 + + ns1ll1="$(get_ll_addr $ns1 ns1eth1)" + ns1ll2="$(get_ll_addr $ns1 ns1eth2)" + ns2ll2="$(get_ll_addr $ns2 ns2eth2)" + + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 2 2 + pm_nl_add_endpoint $ns1 "${ns1ll2}" flags signal + pm_nl_add_endpoint $ns2 "${ns2ll2}" flags laminar dev ns2eth2 + + wait_ll_ready $ns1 # to be able to bind + wait_ll_ready $ns2 # also needed to bind on the client side + ip netns exec ${ns1} ./mptcp_connect -l -t -1 -p "$(get_port)" \ + -s MPTCP "${ns1ll2}%ns1eth2" & + extra_bind=$! + + bind_addr="${ns1ll1}%ns1eth1" \ + run_tests $ns1 $ns2 "${ns1ll1}%ns2eth1" + chk_join_nr 1 1 1 + chk_add_nr 1 1 + + kill ${extra_bind} + fi } syncookies_tests() @@ -2829,7 +3416,8 @@ syncookies_tests() pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 2 1 1 + join_syn_rej=1 \ + chk_join_nr 2 1 1 fi # test signal address with cookies @@ -2868,37 +3456,16 @@ syncookies_tests() checksum_tests() { - # checksum test 0 0 - if reset_with_checksum 0 0; then - pm_nl_set_limits $ns1 0 1 - pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - fi - - # checksum test 1 1 - if reset_with_checksum 1 1; then - pm_nl_set_limits $ns1 0 1 - pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - fi - - # checksum test 0 1 - if reset_with_checksum 0 1; then - pm_nl_set_limits $ns1 0 1 - pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - fi - - # checksum test 1 0 - if reset_with_checksum 1 0; then - pm_nl_set_limits $ns1 0 1 - pm_nl_set_limits $ns2 0 1 - run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 - fi + local checksum_enable + for checksum_enable in "0 0" "1 1" "0 1" "1 0"; do + # checksum test 0 0, 1 1, 0 1, 1 0 + if reset_with_checksum ${checksum_enable}; then + pm_nl_set_limits $ns1 0 1 + pm_nl_set_limits $ns2 0 1 + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 0 0 0 + fi + done } deny_join_id0_tests() @@ -2962,6 +3529,17 @@ deny_join_id0_tests() run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 fi + + # default limits, server deny join id 0 + signal + if reset_with_allow_join_id0 "default limits, server deny join id 0" 0 1; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 0 2 + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 2 2 2 + fi } fullmesh_tests() @@ -2987,6 +3565,9 @@ fullmesh_tests() pm_nl_set_limits $ns1 1 3 pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh + fi fullmesh=1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -3053,7 +3634,7 @@ fullmesh_tests() addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 chk_rm_nr 0 1 fi @@ -3066,7 +3647,7 @@ fullmesh_tests() sflags=nobackup,nofullmesh speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 - chk_prio_nr 0 1 + chk_prio_nr 0 1 1 0 chk_rm_nr 0 1 fi } @@ -3074,7 +3655,6 @@ fullmesh_tests() fastclose_tests() { if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then - MPTCP_LIB_SUBTEST_FLAKY=1 test_linkfail=1024 fastclose=client \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 0 0 0 @@ -3083,10 +3663,10 @@ fastclose_tests() fi if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then - MPTCP_LIB_SUBTEST_FLAKY=1 test_linkfail=1024 fastclose=server \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 0 0 0 1 + join_rst_nr=1 \ + chk_join_nr 0 0 0 chk_fclose_nr 1 1 invert chk_rst_nr 1 1 fi @@ -3105,7 +3685,11 @@ fail_tests() MPTCP_LIB_SUBTEST_FLAKY=1 test_linkfail=128 \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)" + join_csum_ns1=+1 join_csum_ns2=+0 \ + join_fail_nr=1 join_rst_nr=0 join_infi_nr=1 \ + join_corrupted_pkts="$(pedit_action_pkts)" \ + fb_ns1="fb_dss=1" fb_ns2="fb_infinite_map_tx=1" \ + chk_join_nr 0 0 0 chk_fail_nr 1 -1 invert fi @@ -3118,7 +3702,10 @@ fail_tests() pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow test_linkfail=1024 \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)" + join_csum_ns1=1 join_csum_ns2=0 \ + join_fail_nr=1 join_rst_nr=1 join_infi_nr=0 \ + join_corrupted_pkts="$(pedit_action_pkts)" \ + chk_join_nr 1 1 1 fi } @@ -3260,6 +3847,36 @@ userspace_pm_chk_get_addr() fi } +# $1: ns ; $2: event type ; $3: count +chk_evt_nr() +{ + local ns=${1} + local evt_name="${2}" + local exp="${3}" + + local evts="${evts_ns1}" + local evt="${!evt_name}" + local count + + evt_name="${evt_name:16}" # without MPTCP_LIB_EVENT_ + [ "${ns}" == "ns2" ] && evts="${evts_ns2}" + + print_check "event ${ns} ${evt_name} (${exp})" + + if [[ "${evt_name}" = "LISTENER_"* ]] && + ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then + print_skip "event not supported" + return + fi + + count=$(grep -cw "type:${evt}" "${evts}") + if [ "${count}" != "${exp}" ]; then + fail_test "got ${count} events, expected ${exp}" + else + print_ok + fi +} + userspace_tests() { # userspace pm type prevents add_addr @@ -3294,7 +3911,8 @@ userspace_tests() pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 0 + join_syn_rej=1 \ + chk_join_nr 1 1 0 fi # userspace pm type does not send join @@ -3317,8 +3935,9 @@ userspace_tests() pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 1 1 0 - chk_prio_nr 0 0 + join_syn_rej=1 \ + chk_join_nr 1 1 0 + chk_prio_nr 0 0 0 0 fi # userspace pm type prevents rm_addr @@ -3340,8 +3959,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 2 2 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { timeout_test=120 test_linkfail=128 speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 userspace_pm_add_addr $ns1 10.0.2.1 10 @@ -3356,18 +3975,16 @@ userspace_tests() "signal" userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1" userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1" - userspace_pm_rm_addr $ns1 10 userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns1}" \ - "id 20 flags signal 10.0.3.1" "after rm_addr 10" + "id 20 flags signal 10.0.3.1" "after rm_sf 10" userspace_pm_rm_addr $ns1 20 - userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20" - chk_rm_nr 2 2 invert + chk_rm_nr 1 1 invert chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm create destroy subflow @@ -3375,8 +3992,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { timeout_test=120 test_linkfail=128 speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 userspace_pm_add_sf $ns2 10.0.3.2 20 @@ -3387,16 +4004,15 @@ userspace_tests() "id 20 flags subflow 10.0.3.2" \ "subflow" userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2" - userspace_pm_rm_addr $ns2 20 userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED userspace_pm_chk_dump_addr "${ns2}" \ "" \ - "after rm_addr 20" - chk_rm_nr 1 1 + "after rm_sf 20" + chk_rm_nr 0 1 chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm create id 0 subflow @@ -3404,8 +4020,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { timeout_test=120 test_linkfail=128 speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 chk_mptcp_info subflows 0 subflows 0 @@ -3417,7 +4033,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm remove initial subflow @@ -3425,8 +4041,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { timeout_test=120 test_linkfail=128 speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 userspace_pm_add_sf $ns2 10.0.3.2 20 @@ -3441,7 +4057,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm send RM_ADDR for ID 0 @@ -3449,8 +4065,8 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 - speed=5 \ - run_tests $ns1 $ns2 10.0.1.1 & + { timeout_test=120 test_linkfail=128 speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 userspace_pm_add_addr $ns1 10.0.2.1 10 @@ -3467,7 +4083,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi } @@ -3476,12 +4092,12 @@ endpoint_tests() # subflow_rebuild_header is needed to support the implicit flag # userspace pm type prevents add_addr if reset "implicit EP" && - mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - speed=slow \ - run_tests $ns1 $ns2 10.0.1.1 & + { timeout_test=120 test_linkfail=128 speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 @@ -3497,34 +4113,196 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_check_endpoint "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi - if reset "delete and re-add" && - mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then - pm_nl_set_limits $ns1 1 1 - pm_nl_set_limits $ns2 1 1 + if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + start_events + pm_nl_set_limits $ns1 0 3 + pm_nl_set_limits $ns2 0 3 + pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - test_linkfail=4 speed=20 \ - run_tests $ns1 $ns2 10.0.1.1 & + { timeout_test=120 test_linkfail=128 speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 pm_nl_check_endpoint "creation" \ $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2 - chk_subflow_nr "before delete" 2 + chk_subflow_nr "before delete id 2" 2 chk_mptcp_info subflows 1 subflows 1 pm_nl_del_endpoint $ns2 2 10.0.2.2 sleep 0.5 - chk_subflow_nr "after delete" 1 + chk_subflow_nr "after delete id 2" 1 chk_mptcp_info subflows 0 subflows 0 - pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow wait_mpj $ns2 - chk_subflow_nr "after re-add" 2 + chk_subflow_nr "after re-add id 2" 2 chk_mptcp_info subflows 1 subflows 1 - mptcp_lib_kill_wait $tests_pid + + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_attempt_fail $ns2 + chk_subflow_nr "after new reject" 2 + chk_mptcp_info subflows 1 subflows 1 + + ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT + pm_nl_del_endpoint $ns2 3 10.0.3.2 + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_mpj $ns2 + chk_subflow_nr "after no reject" 3 + chk_mptcp_info subflows 2 subflows 2 + + local i + for i in $(seq 3); do + pm_nl_del_endpoint $ns2 1 10.0.1.2 + sleep 0.5 + chk_subflow_nr "after delete id 0 ($i)" 2 + chk_mptcp_info subflows 2 subflows 2 # only decr for additional sf + + pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow + wait_mpj $ns2 + chk_subflow_nr "after re-add id 0 ($i)" 3 + chk_mptcp_info subflows 3 subflows 3 + done + + mptcp_lib_kill_group_wait $tests_pid + + kill_events_pids + chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0 + chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 4 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 4 + + chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 0 + chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 0 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 6 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 5 # one has been closed before estab + + join_syn_tx=7 \ + chk_join_nr 6 6 6 + chk_rm_nr 4 4 + fi + + # remove and re-add + if reset_with_events "delete re-add signal" && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=0 + pm_nl_set_limits $ns1 0 3 + pm_nl_set_limits $ns2 3 3 + pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal + pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal + { timeout_test=120 test_linkfail=128 speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null + local tests_pid=$! + + wait_mpj $ns2 + pm_nl_check_endpoint "creation" \ + $ns1 10.0.2.1 id 1 flags signal + chk_subflow_nr "before delete" 2 + chk_mptcp_info subflows 1 subflows 1 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 1 + + pm_nl_del_endpoint $ns1 1 10.0.2.1 + pm_nl_del_endpoint $ns1 2 224.0.0.1 + sleep 0.5 + chk_subflow_nr "after delete" 1 + chk_mptcp_info subflows 0 subflows 0 + chk_mptcp_info add_addr_signal 0 add_addr_accepted 0 + + pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal + pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-add" 3 + chk_mptcp_info subflows 2 subflows 2 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 + + pm_nl_del_endpoint $ns1 42 10.0.1.1 + sleep 0.5 + chk_subflow_nr "after delete ID 0" 2 + chk_mptcp_info subflows 2 subflows 2 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 + + pm_nl_add_endpoint $ns1 10.0.1.1 id 99 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-add ID 0" 3 + chk_mptcp_info subflows 3 subflows 3 + chk_mptcp_info add_addr_signal 3 add_addr_accepted 2 + + pm_nl_del_endpoint $ns1 99 10.0.1.1 + sleep 0.5 + chk_subflow_nr "after re-delete ID 0" 2 + chk_mptcp_info subflows 2 subflows 2 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 + + pm_nl_add_endpoint $ns1 10.0.1.1 id 88 flags signal + wait_mpj $ns2 + chk_subflow_nr "after re-re-add ID 0" 3 + chk_mptcp_info subflows 3 subflows 3 + chk_mptcp_info add_addr_signal 3 add_addr_accepted 2 + mptcp_lib_kill_group_wait $tests_pid + + kill_events_pids + chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns1 MPTCP_LIB_EVENT_ANNOUNCED 0 + chk_evt_nr ns1 MPTCP_LIB_EVENT_REMOVED 0 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5 + chk_evt_nr ns1 MPTCP_LIB_EVENT_SUB_CLOSED 3 + + chk_evt_nr ns2 MPTCP_LIB_EVENT_CREATED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ESTABLISHED 1 + chk_evt_nr ns2 MPTCP_LIB_EVENT_ANNOUNCED 6 + chk_evt_nr ns2 MPTCP_LIB_EVENT_REMOVED 4 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_ESTABLISHED 5 + chk_evt_nr ns2 MPTCP_LIB_EVENT_SUB_CLOSED 3 + + join_connect_err=1 \ + chk_join_nr 5 5 5 + chk_add_nr 6 6 + chk_rm_nr 4 3 invert + fi + + # flush and re-add + if reset_with_tcp_filter "flush re-add" ns2 10.0.3.2 REJECT OUTPUT && + continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + pm_nl_set_limits $ns1 0 2 + pm_nl_set_limits $ns2 1 2 + # broadcast IP: no packet for this address will be received on ns1 + pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + { timeout_test=120 test_linkfail=128 speed=20 \ + run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null + local tests_pid=$! + + wait_attempt_fail $ns2 + chk_subflow_nr "before flush" 1 + chk_mptcp_info subflows 0 subflows 0 + + pm_nl_flush_endpoint $ns2 + pm_nl_flush_endpoint $ns1 + wait_rm_addr $ns2 0 + ip netns exec "${ns2}" ${iptables} -D OUTPUT -s "10.0.3.2" -p tcp -j REJECT + pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow + wait_mpj $ns2 + pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal + wait_mpj $ns2 + mptcp_lib_kill_group_wait $tests_pid + + join_syn_tx=3 join_connect_err=1 \ + chk_join_nr 2 2 2 + chk_add_nr 2 2 + chk_rm_nr 1 0 invert fi } @@ -3559,6 +4337,7 @@ all_tests_sorted=( f@subflows_tests e@subflows_error_tests s@signal_address_tests + L@laminar_endp_tests l@link_failure_tests t@add_addr_timeout_tests r@remove_tests @@ -3568,6 +4347,7 @@ all_tests_sorted=( M@mixed_tests b@backup_tests p@add_addr_ports_tests + B@bind_tests k@syncookies_tests S@checksum_tests d@deny_join_id0_tests @@ -3627,9 +4407,11 @@ if [ ${#tests[@]} -eq 0 ]; then tests=("${all_tests_names[@]}") fi +mptcp_lib_subtests_last_ts_reset for subtests in "${tests[@]}"; do "${subtests}" done +append_prev_results if [ ${ret} -ne 0 ]; then echo @@ -3640,7 +4422,6 @@ if [ ${ret} -ne 0 ]; then echo fi -append_prev_results mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 438280e68434..5fea7e7df628 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -2,7 +2,6 @@ # SPDX-License-Identifier: GPL-2.0 . "$(dirname "${0}")/../lib.sh" -. "$(dirname "${0}")/../net_helper.sh" readonly KSFT_PASS=0 readonly KSFT_FAIL=1 @@ -12,10 +11,14 @@ readonly KSFT_SKIP=4 readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}" # These variables are used in some selftests, read-only +declare -rx MPTCP_LIB_EVENT_CREATED=1 # MPTCP_EVENT_CREATED +declare -rx MPTCP_LIB_EVENT_ESTABLISHED=2 # MPTCP_EVENT_ESTABLISHED +declare -rx MPTCP_LIB_EVENT_CLOSED=3 # MPTCP_EVENT_CLOSED declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED declare -rx MPTCP_LIB_EVENT_REMOVED=7 # MPTCP_EVENT_REMOVED declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED +declare -rx MPTCP_LIB_EVENT_SUB_PRIORITY=13 # MPTCP_EVENT_SUB_PRIORITY declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16 # MPTCP_EVENT_LISTENER_CLOSED @@ -25,6 +28,7 @@ declare -rx MPTCP_LIB_AF_INET6=10 MPTCP_LIB_SUBTESTS=() MPTCP_LIB_SUBTESTS_DUPLICATED=0 MPTCP_LIB_SUBTEST_FLAKY=0 +MPTCP_LIB_SUBTESTS_LAST_TS_MS= MPTCP_LIB_TEST_COUNTER=0 MPTCP_LIB_TEST_FORMAT="%02u %-50s" MPTCP_LIB_IP_MPTCP=0 @@ -102,6 +106,36 @@ mptcp_lib_pr_info() { mptcp_lib_print_info "INFO: ${*}" } +mptcp_lib_pr_nstat() { + local ns="${1}" + local hist="/tmp/${ns}.out" + + if [ -f "${hist}" ]; then + awk '$2 != 0 { print " "$0 }' "${hist}" + else + ip netns exec "${ns}" nstat -as | grep Tcp + fi +} + +# $1-2: listener/connector ns ; $3 port +mptcp_lib_pr_err_stats() { + local lns="${1}" + local cns="${2}" + local port="${3}" + + echo -en "${MPTCP_LIB_COLOR_RED}" + { + printf "\nnetns %s (listener) socket stat for %d:\n" "${lns}" "${port}" + ip netns exec "${lns}" ss -Menitam -o "sport = :${port}" + mptcp_lib_pr_nstat "${lns}" + + printf "\nnetns %s (connector) socket stat for %d:\n" "${cns}" "${port}" + ip netns exec "${cns}" ss -Menitam -o "dport = :${port}" + [ "${lns}" != "${cns}" ] && mptcp_lib_pr_nstat "${cns}" + } 1>&2 + echo -en "${MPTCP_LIB_COLOR_RESET}" +} + # SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all # features using the last version of the kernel and the selftests to make sure # a test is not being skipped by mistake. @@ -201,6 +235,11 @@ mptcp_lib_kversion_ge() { mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}" } +mptcp_lib_subtests_last_ts_reset() { + MPTCP_LIB_SUBTESTS_LAST_TS_MS="$(date +%s%3N)" +} +mptcp_lib_subtests_last_ts_reset + __mptcp_lib_result_check_duplicated() { local subtest @@ -215,13 +254,22 @@ __mptcp_lib_result_check_duplicated() { __mptcp_lib_result_add() { local result="${1}" + local time="time=" + local ts_prev_ms shift local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1)) __mptcp_lib_result_check_duplicated "${*}" - MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}") + # not to add two '#' + [[ "${*}" != *"#"* ]] && time="# ${time}" + + ts_prev_ms="${MPTCP_LIB_SUBTESTS_LAST_TS_MS}" + mptcp_lib_subtests_last_ts_reset + time+="$((MPTCP_LIB_SUBTESTS_LAST_TS_MS - ts_prev_ms))ms" + + MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*} ${time}") } # $1: test name @@ -291,12 +339,28 @@ mptcp_lib_result_print_all_tap() { # get the value of keyword $1 in the line marked by keyword $2 mptcp_lib_get_info_value() { - grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' + grep "${2}" 2>/dev/null | + sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' + # the ';q' at the end limits to the first matched entry. } # $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]] mptcp_lib_evts_get_info() { - grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1}," + grep "${4:-}" "${2}" 2>/dev/null | + mptcp_lib_get_info_value "${1}" "^type:${3:-1}," +} + +mptcp_lib_wait_timeout() { + local timeout_test="${1}" + local listener_ns="${2}" + local connector_ns="${3}" + local port="${4}" + shift 4 # rest are PIDs + + sleep "${timeout_test}" + mptcp_lib_print_err "timeout" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" + kill "${@}" 2>/dev/null } # $1: PID @@ -308,19 +372,62 @@ mptcp_lib_kill_wait() { wait "${1}" 2>/dev/null } +# $1: PID +mptcp_lib_pid_list_children() { + local curr="${1}" + # evoke 'ps' only once + local pids="${2:-"$(ps o pid,ppid)"}" + + echo "${curr}" + + local pid + for pid in $(echo "${pids}" | awk "\$2 == ${curr} { print \$1 }"); do + mptcp_lib_pid_list_children "${pid}" "${pids}" + done +} + +# $1: PID +mptcp_lib_kill_group_wait() { + # Some users might not have procps-ng: cannot use "kill -- -PID" + mptcp_lib_pid_list_children "${1}" | xargs -r kill &>/dev/null + wait "${1}" 2>/dev/null +} + # $1: IP address mptcp_lib_is_v6() { [ -z "${1##*:*}" ] } +mptcp_lib_nstat_init() { + local ns="${1}" + + rm -f "/tmp/${ns}."{nstat,out} + NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -n +} + +mptcp_lib_nstat_get() { + local ns="${1}" + + # filter out non-*TCP stats, and the rate (last column) + NSTAT_HISTORY="/tmp/${ns}.nstat" ip netns exec "${ns}" nstat -sz | + grep -o ".*Tcp\S\+\s\+[0-9]\+" > "/tmp/${ns}.out" +} + # $1: ns, $2: MIB counter +# Get the counter from the history (mptcp_lib_nstat_{init,get}()) if available. +# If not, get the counter from nstat ignoring any history. mptcp_lib_get_counter() { local ns="${1}" local counter="${2}" + local hist="/tmp/${ns}.out" local count - count=$(ip netns exec "${ns}" nstat -asz "${counter}" | - awk 'NR==1 {next} {print $2}') + if [[ -s "${hist}" && "${counter}" == *"Tcp"* ]]; then + count=$(awk "/^${counter} / {print \$2; exit}" "${hist}") + else + count=$(ip netns exec "${ns}" nstat -asz "${counter}" | + awk 'NR==1 {next} {print $2}') + fi if [ -z "${count}" ]; then mptcp_lib_fail_if_expected_feature "${counter} counter" return 1 @@ -342,7 +449,7 @@ mptcp_lib_make_file() { mptcp_lib_print_file_err() { ls -l "${1}" 1>&2 echo "Trailing bytes are: " - tail -c 27 "${1}" + tail -c 32 "${1}" | od -x | head -n2 } # $1: input file ; $2: output file ; $3: what kind of file @@ -436,8 +543,6 @@ mptcp_lib_ns_init() { local netns for netns in "${@}"; do ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1 - ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0 - ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0 done } diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index 926b0be87c99..286164f7246e 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -159,13 +159,22 @@ static const char *getxinfo_strerr(int err) } static void xgetaddrinfo(const char *node, const char *service, - const struct addrinfo *hints, + struct addrinfo *hints, struct addrinfo **res) { - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { - const char *errstr = getxinfo_strerr(err); + const char *errstr; + + if (err == EAI_SOCKTYPE) { + hints->ai_protocol = IPPROTO_TCP; + goto again; + } + + errstr = getxinfo_strerr(err); fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n", node ? node : "", service ? service : "", errstr); @@ -178,7 +187,7 @@ static int sock_listen_mptcp(const char * const listenaddr, { int sock = -1; struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, .ai_flags = AI_PASSIVE | AI_NUMERICHOST }; @@ -223,7 +232,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, const char * const port, int proto) { struct addrinfo hints = { - .ai_protocol = IPPROTO_TCP, + .ai_protocol = IPPROTO_MPTCP, .ai_socktype = SOCK_STREAM, }; struct addrinfo *a, *addr; @@ -658,22 +667,26 @@ static void process_one_client(int fd, int pipefd) do_getsockopts(&s, fd, ret, ret2); if (s.mptcpi_rcv_delta != (uint64_t)ret + 1) - xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret); + xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64 ", diff %" PRId64, + s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - (ret + 1)); /* be nice when running on top of older kernel */ if (s.pkt_stats_avail) { if (s.last_sample.mptcpi_bytes_sent != ret2) - xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64, + xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64 + ", diff %" PRId64, s.last_sample.mptcpi_bytes_sent, ret2, s.last_sample.mptcpi_bytes_sent - ret2); if (s.last_sample.mptcpi_bytes_received != ret) - xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64, + xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64 + ", diff %" PRId64, s.last_sample.mptcpi_bytes_received, ret, s.last_sample.mptcpi_bytes_received - ret); if (s.last_sample.mptcpi_bytes_acked != ret) - xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64, - s.last_sample.mptcpi_bytes_acked, ret2, - s.last_sample.mptcpi_bytes_acked - ret2); + xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64 + ", diff %" PRId64, + s.last_sample.mptcpi_bytes_acked, ret, + s.last_sample.mptcpi_bytes_acked - ret); } close(fd); @@ -713,6 +726,7 @@ static int server(int pipefd) process_one_client(r, pipefd); + close(fd); return 0; } @@ -838,8 +852,12 @@ int main(int argc, char *argv[]) die_perror("pipe"); s = xfork(); - if (s == 0) - return server(pipefds[1]); + if (s == 0) { + close(pipefds[0]); + ret = server(pipefds[1]); + close(pipefds[1]); + return ret; + } close(pipefds[1]); diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index 68899a303a1a..ab8bce06b262 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -95,7 +95,7 @@ init() } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}" @@ -169,34 +169,44 @@ do_transfer() cmsg+=",TCPINQ" fi - timeout ${timeout_test} \ - ip netns exec ${listener_ns} \ - $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \ - ${local_addr} < "$sin" > "$sout" & + mptcp_lib_nstat_init "${listener_ns}" + mptcp_lib_nstat_init "${connector_ns}" + + ip netns exec ${listener_ns} \ + $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \ + ${local_addr} < "$sin" > "$sout" & local spid=$! - sleep 1 + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" - timeout ${timeout_test} \ - ip netns exec ${connector_ns} \ - $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \ - $connect_addr < "$cin" > "$cout" & + ip netns exec ${connector_ns} \ + $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \ + $connect_addr < "$cin" > "$cout" & local cpid=$! + mptcp_lib_wait_timeout "${timeout_test}" "${listener_ns}" \ + "${connector_ns}" "${port}" "${cpid}" "${spid}" & + local timeout_pid=$! + wait $cpid local retc=$? wait $spid local rets=$? + if kill -0 $timeout_pid; then + # Finished before the timeout: kill the background job + mptcp_lib_kill_group_wait $timeout_pid + timeout_pid=0 + fi + + mptcp_lib_nstat_get "${listener_ns}" + mptcp_lib_nstat_get "${connector_ns}" + print_title "Transfer ${ip:2}" - if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ] || [ ${timeout_pid} -ne 0 ]; then mptcp_lib_pr_fail "client exit code $retc, server $rets" - echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" - - echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 - ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port" + mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" mptcp_lib_result_fail "transfer ${ip}" @@ -349,6 +359,7 @@ init make_file "$cin" "client" 1 make_file "$sin" "server" 1 trap cleanup EXIT +mptcp_lib_subtests_last_ts_reset run_tests $ns1 $ns2 10.0.1.1 run_tests $ns1 $ns2 dead:beef:1::1 diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 2757378b1b13..ec6a87588191 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -32,7 +32,7 @@ ns1="" err=$(mktemp) # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { rm -f "${err}" @@ -70,8 +70,9 @@ format_endpoints() { mptcp_lib_pm_nl_format_endpoints "${@}" } +# This function is invoked indirectly +#shellcheck disable=SC2317,SC2329 get_endpoint() { - # shellcheck disable=SC2317 # invoked indirectly mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}" } @@ -137,6 +138,8 @@ check() fi } +mptcp_lib_subtests_last_ts_reset + check "show_endpoints" "" "defaults addr list" default_limits="$(get_limits)" @@ -196,6 +199,7 @@ set_limits 1 9 2>/dev/null check "get_limits" "${default_limits}" "subflows above hard limit" set_limits 8 8 +flush_endpoint ## to make sure it doesn't affect the limits check "get_limits" "$(format_limits 8 8)" "set limits" flush_endpoint diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 7ad5a59adff2..65b374232ff5 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -2,6 +2,7 @@ #include <errno.h> #include <error.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -19,12 +20,6 @@ #include "linux/mptcp.h" -#ifndef MPTCP_PM_NAME -#define MPTCP_PM_NAME "mptcp_pm" -#endif -#ifndef MPTCP_PM_EVENTS -#define MPTCP_PM_EVENTS "mptcp_pm_events" -#endif #ifndef IPPROTO_MPTCP #define IPPROTO_MPTCP 262 #endif @@ -116,9 +111,11 @@ static int capture_events(int fd, int event_group) if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &event_group, sizeof(event_group)) < 0) - error(1, errno, "could not join the " MPTCP_PM_EVENTS " mcast group"); + error(1, errno, "could not join the " MPTCP_PM_EV_GRP_NAME " mcast group"); do { + bool server_side = false; + FD_ZERO(&rfds); FD_SET(fd, &rfds); res_len = NLMSG_ALIGN(sizeof(struct nlmsghdr)) + @@ -193,11 +190,22 @@ static int capture_events(int fd, int event_group) else if (attrs->rta_type == MPTCP_ATTR_ERROR) fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs)); else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE) - fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs)); + server_side = !!*(__u8 *)RTA_DATA(attrs); + else if (attrs->rta_type == MPTCP_ATTR_FLAGS) { + __u16 flags = *(__u16 *)RTA_DATA(attrs); + + /* only print when present, easier */ + if (flags & MPTCP_PM_EV_FLAG_DENY_JOIN_ID0) + fprintf(stderr, ",deny_join_id0:1"); + if (flags & MPTCP_PM_EV_FLAG_SERVER_SIDE) + server_side = true; + } attrs = RTA_NEXT(attrs, msg_len); } } + if (server_side) + fprintf(stderr, ",server_side:1"); fprintf(stderr, "\n"); } while (1); @@ -288,7 +296,7 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh, int *pm_family, if (grp->rta_type == CTRL_ATTR_MCAST_GRP_ID) *events_mcast_grp = *(__u32 *)RTA_DATA(grp); else if (grp->rta_type == CTRL_ATTR_MCAST_GRP_NAME && - !strcmp(RTA_DATA(grp), MPTCP_PM_EVENTS)) + !strcmp(RTA_DATA(grp), MPTCP_PM_EV_GRP_NAME)) got_events_grp = 1; grp = RTA_NEXT(grp, grp_len); @@ -822,6 +830,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[]) flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; else if (!strcmp(tok, "signal")) flags |= MPTCP_PM_ADDR_FLAG_SIGNAL; + else if (!strcmp(tok, "laminar")) + flags |= MPTCP_PM_ADDR_FLAG_LAMINAR; else if (!strcmp(tok, "backup")) flags |= MPTCP_PM_ADDR_FLAG_BACKUP; else if (!strcmp(tok, "fullmesh")) @@ -1010,6 +1020,13 @@ static void print_addr(struct rtattr *attrs, int len) printf(","); } + if (flags & MPTCP_PM_ADDR_FLAG_LAMINAR) { + printf("laminar"); + flags &= ~MPTCP_PM_ADDR_FLAG_LAMINAR; + if (flags) + printf(","); + } + if (flags & MPTCP_PM_ADDR_FLAG_BACKUP) { printf("backup"); flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index f74e1c3c126d..806aaa7d2d61 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -28,14 +28,14 @@ size=0 usage() { echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]" - echo -e "\t-b: bail out after first error, otherwise runs al testcases" + echo -e "\t-b: bail out after first error, otherwise runs all testcases" echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" echo -e "\t-d: debug this script" echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { rm -f "$cout" "$sout" @@ -155,50 +155,61 @@ do_transfer() sleep 1 fi - timeout ${timeout_test} \ - ip netns exec ${ns3} \ - ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \ - 0.0.0.0 < "$sin" > "$sout" & + mptcp_lib_nstat_init "${ns3}" + mptcp_lib_nstat_init "${ns1}" + + ip netns exec ${ns3} \ + ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \ + 0.0.0.0 < "$sin" > "$sout" & local spid=$! mptcp_lib_wait_local_port_listen "${ns3}" "${port}" - timeout ${timeout_test} \ - ip netns exec ${ns1} \ - ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \ - 10.0.3.3 < "$cin" > "$cout" & + ip netns exec ${ns1} \ + ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \ + 10.0.3.3 < "$cin" > "$cout" & local cpid=$! + mptcp_lib_wait_timeout "${timeout_test}" "${ns3}" "${ns1}" "${port}" \ + "${cpid}" "${spid}" & + local timeout_pid=$! + wait $cpid local retc=$? wait $spid local rets=$? + if kill -0 $timeout_pid; then + # Finished before the timeout: kill the background job + mptcp_lib_kill_group_wait $timeout_pid + timeout_pid=0 + fi + if $capture; then sleep 1 kill ${cappid_listener} kill ${cappid_connector} fi + mptcp_lib_nstat_get "${ns3}" + mptcp_lib_nstat_get "${ns1}" + cmp $sin $cout > /dev/null 2>&1 local cmps=$? cmp $cin $sout > /dev/null 2>&1 local cmpc=$? - printf "%-16s" " max $max_time " - if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \ - [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then + if [ $retc -eq 0 ] && [ $rets -eq 0 ] && + [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && + [ $timeout_pid -eq 0 ]; then + printf "%-16s" " max $max_time " mptcp_lib_pr_ok cat "$capout" return 0 fi - mptcp_lib_pr_fail - echo "client exit code $retc, server $rets" 1>&2 - echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2 - ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port" - echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2 - ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port" + mptcp_lib_pr_fail "client exit code $retc, server $rets" + mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}" ls -l $sin $cout ls -l $cin $sout @@ -286,6 +297,7 @@ while getopts "bcdhi" option;do done setup +mptcp_lib_subtests_last_ts_reset run_test 10 10 0 0 "balanced bwidth" run_test 10 10 1 25 "balanced bwidth with unbalanced delay" diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 9cb05978269d..e9ae1806ab07 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -94,7 +94,7 @@ test_fail() } # This function is used in the cleanup trap -#shellcheck disable=SC2317 +#shellcheck disable=SC2317,SC2329 cleanup() { print_title "Cleanup" @@ -117,7 +117,36 @@ cleanup() trap cleanup EXIT # Create and configure network namespaces for testing +print_title "Init" mptcp_lib_ns_init ns1 ns2 + +# check path_manager and pm_type sysctl mapping +if [ -f /proc/sys/net/mptcp/path_manager ]; then + ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=userspace + pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)" + if [ "${pm_type}" != "1" ]; then + test_fail "unexpected pm_type: ${pm_type}" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi + + ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=error 2>/dev/null + pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)" + if [ "${pm_type}" != "1" ]; then + test_fail "unexpected pm_type after error: ${pm_type}" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi + + ip netns exec "$ns1" sysctl -q net.mptcp.pm_type=0 + pm_name="$(ip netns exec "$ns1" sysctl -n net.mptcp.path_manager)" + if [ "${pm_name}" != "kernel" ]; then + test_fail "unexpected path-manager: ${pm_name}" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi +fi + for i in "$ns1" "$ns2" ;do ip netns exec "$i" sysctl -q net.mptcp.pm_type=1 done @@ -150,8 +179,8 @@ mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid server_evts=$(mktemp) mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid sleep 0.5 +mptcp_lib_subtests_last_ts_reset -print_title "Init" print_test "Created network namespaces ns1, ns2" test_pass @@ -172,6 +201,9 @@ make_connection() is_v6="v4" fi + # set this on the client side only: will not affect the rest + ip netns exec "$ns2" sysctl -q net.mptcp.allow_join_initial_addr_port=0 + :>"$client_evts" :>"$server_evts" @@ -179,7 +211,8 @@ make_connection() ip netns exec "$ns1" \ ./mptcp_connect -s MPTCP -w 300 -p $app_port -l $listen_addr > /dev/null 2>&1 & local server_pid=$! - sleep 0.5 + + mptcp_lib_wait_local_port_listen "${ns1}" "${port}" # Run the client, transfer $file and stay connected to the server # to conduct tests @@ -194,23 +227,28 @@ make_connection() local client_token local client_port local client_serverside + local client_nojoin local server_token local server_serverside + local server_nojoin client_token=$(mptcp_lib_evts_get_info token "$client_evts") client_port=$(mptcp_lib_evts_get_info sport "$client_evts") client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts") + client_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$client_evts") server_token=$(mptcp_lib_evts_get_info token "$server_evts") server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts") + server_nojoin=$(mptcp_lib_evts_get_info deny_join_id0 "$server_evts") print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1" - if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] && - [ "$server_serverside" = 1 ] + if [ "${client_token}" != "" ] && [ "${server_token}" != "" ] && + [ "${client_serverside:-0}" = 0 ] && [ "${server_serverside:-0}" = 1 ] && + [ "${client_nojoin:-0}" = 0 ] && [ "${server_nojoin:-0}" = 1 ] then test_pass print_title "Connection info: ${client_addr}:${client_port} -> ${connect_addr}:${app_port}" else - test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})" + test_fail "Expected tokens (c:${client_token} - s:${server_token}), server (c:${client_serverside} - s:${server_serverside}), nojoin (c:${client_nojoin} - s:${server_nojoin})" mptcp_lib_result_print_all_tap exit ${KSFT_FAIL} fi diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c index 7ea5fb28c93d..1d5d3c4e7e87 100644 --- a/tools/testing/selftests/net/msg_zerocopy.c +++ b/tools/testing/selftests/net/msg_zerocopy.c @@ -77,6 +77,7 @@ static int cfg_cork; static bool cfg_cork_mixed; static int cfg_cpu = -1; /* default: pin to last cpu */ +static int cfg_expect_zerocopy = -1; static int cfg_family = PF_UNSPEC; static int cfg_ifindex = 1; static int cfg_payload_len; @@ -92,9 +93,9 @@ static socklen_t cfg_alen; static struct sockaddr_storage cfg_dst_addr; static struct sockaddr_storage cfg_src_addr; +static int exitcode; static char payload[IP_MAXPACKET]; static long packets, bytes, completions, expected_completions; -static int zerocopied = -1; static uint32_t next_completion; static uint32_t sends_since_notify; @@ -444,11 +445,13 @@ static bool do_recv_completion(int fd, int domain) next_completion = hi + 1; zerocopy = !(serr->ee_code & SO_EE_CODE_ZEROCOPY_COPIED); - if (zerocopied == -1) - zerocopied = zerocopy; - else if (zerocopied != zerocopy) { - fprintf(stderr, "serr: inconsistent\n"); - zerocopied = zerocopy; + if (cfg_expect_zerocopy != -1 && + cfg_expect_zerocopy != zerocopy) { + fprintf(stderr, "serr: ee_code: %u != expected %u\n", + zerocopy, cfg_expect_zerocopy); + exitcode = 1; + /* suppress repeated messages */ + cfg_expect_zerocopy = zerocopy; } if (cfg_verbose >= 2) @@ -571,7 +574,7 @@ static void do_tx(int domain, int type, int protocol) fprintf(stderr, "tx=%lu (%lu MB) txc=%lu zc=%c\n", packets, bytes >> 20, completions, - zerocopied == 1 ? 'y' : 'n'); + cfg_zerocopy && cfg_expect_zerocopy == 1 ? 'y' : 'n'); } static int do_setup_rx(int domain, int type, int protocol) @@ -715,7 +718,7 @@ static void parse_opts(int argc, char **argv) cfg_payload_len = max_payload_len; - while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) { + while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vzZ:")) != -1) { switch (c) { case '4': if (cfg_family != PF_UNSPEC) @@ -770,6 +773,9 @@ static void parse_opts(int argc, char **argv) case 'z': cfg_zerocopy = true; break; + case 'Z': + cfg_expect_zerocopy = !!atoi(optarg); + break; } } @@ -817,5 +823,5 @@ int main(int argc, char **argv) else error(1, 0, "unknown cfg_test %s", cfg_test); - return 0; + return exitcode; } diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh index 89c22f5320e0..28178a38a4e7 100755 --- a/tools/testing/selftests/net/msg_zerocopy.sh +++ b/tools/testing/selftests/net/msg_zerocopy.sh @@ -6,6 +6,7 @@ set -e readonly DEV="veth0" +readonly DUMMY_DEV="dummy0" readonly DEV_MTU=65535 readonly BIN="./msg_zerocopy" @@ -14,21 +15,25 @@ readonly NSPREFIX="ns-${RAND}" readonly NS1="${NSPREFIX}1" readonly NS2="${NSPREFIX}2" -readonly SADDR4='192.168.1.1' -readonly DADDR4='192.168.1.2' -readonly SADDR6='fd::1' -readonly DADDR6='fd::2' +readonly LPREFIX4='192.168.1' +readonly RPREFIX4='192.168.2' +readonly LPREFIX6='fd' +readonly RPREFIX6='fc' + readonly path_sysctl_mem="net.core.optmem_max" # No arguments: automated test if [[ "$#" -eq "0" ]]; then - $0 4 tcp -t 1 - $0 6 tcp -t 1 - $0 4 udp -t 1 - $0 6 udp -t 1 - echo "OK. All tests passed" - exit 0 + ret=0 + + $0 4 tcp -t 1 || ret=1 + $0 6 tcp -t 1 || ret=1 + $0 4 udp -t 1 || ret=1 + $0 6 udp -t 1 || ret=1 + + [[ "$ret" == "0" ]] && echo "OK. All tests passed" + exit $ret fi # Argument parsing @@ -45,11 +50,18 @@ readonly EXTRA_ARGS="$@" # Argument parsing: configure addresses if [[ "${IP}" == "4" ]]; then - readonly SADDR="${SADDR4}" - readonly DADDR="${DADDR4}" + readonly SADDR="${LPREFIX4}.1" + readonly DADDR="${LPREFIX4}.2" + readonly DUMMY_ADDR="${RPREFIX4}.1" + readonly DADDR_TXONLY="${RPREFIX4}.2" + readonly MASK="24" elif [[ "${IP}" == "6" ]]; then - readonly SADDR="${SADDR6}" - readonly DADDR="${DADDR6}" + readonly SADDR="${LPREFIX6}::1" + readonly DADDR="${LPREFIX6}::2" + readonly DUMMY_ADDR="${RPREFIX6}::1" + readonly DADDR_TXONLY="${RPREFIX6}::2" + readonly MASK="64" + readonly NODAD="nodad" else echo "Invalid IP version ${IP}" exit 1 @@ -89,33 +101,61 @@ ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000" ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" +ip link add "${DUMMY_DEV}" mtu "${DEV_MTU}" netns "${NS2}" type dummy + # Bring the devices up ip -netns "${NS1}" link set "${DEV}" up ip -netns "${NS2}" link set "${DEV}" up +ip -netns "${NS2}" link set "${DUMMY_DEV}" up # Set fixed MAC addresses on the devices ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02 ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06 # Add fixed IP addresses to the devices -ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}" -ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" -ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad -ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad +ip -netns "${NS1}" addr add "${SADDR}/${MASK}" dev "${DEV}" ${NODAD} +ip -netns "${NS2}" addr add "${DADDR}/${MASK}" dev "${DEV}" ${NODAD} +ip -netns "${NS2}" addr add "${DUMMY_ADDR}/${MASK}" dev "${DUMMY_DEV}" ${NODAD} + +ip -netns "${NS1}" route add default via "${DADDR}" dev "${DEV}" +ip -netns "${NS2}" route add default via "${DADDR_TXONLY}" dev "${DUMMY_DEV}" + +ip netns exec "${NS2}" sysctl -wq net.ipv4.ip_forward=1 +ip netns exec "${NS2}" sysctl -wq net.ipv6.conf.all.forwarding=1 # Optionally disable sg or csum offload to test edge cases # ip netns exec "${NS1}" ethtool -K "${DEV}" sg off +ret=0 + do_test() { local readonly ARGS="$1" - echo "ipv${IP} ${TXMODE} ${ARGS}" - ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" & + # tx-rx test + # packets queued to a local socket are copied, + # sender notification has SO_EE_CODE_ZEROCOPY_COPIED. + + echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-rx\n" + ip netns exec "${NS2}" "${BIN}" "-${IP}" -i "${DEV}" -t 2 -C 2 \ + -S "${SADDR}" -D "${DADDR}" ${ARGS} -r "${RXMODE}" & sleep 0.2 - ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" + ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \ + -S "${SADDR}" -D "${DADDR}" ${ARGS} "${TXMODE}" -Z 0 || ret=1 wait + + # next test is unconnected tx to dummy0, cannot exercise with tcp + [[ "${TXMODE}" == "tcp" ]] && return + + # tx-only test: send out dummy0 + # packets leaving the host are not copied, + # sender notification does not have SO_EE_CODE_ZEROCOPY_COPIED. + + echo -e "\nipv${IP} ${TXMODE} ${ARGS} tx-only\n" + ip netns exec "${NS1}" "${BIN}" "-${IP}" -i "${DEV}" -t 1 -C 3 \ + -S "${SADDR}" -D "${DADDR_TXONLY}" ${ARGS} "${TXMODE}" -Z 1 || ret=1 } do_test "${EXTRA_ARGS}" do_test "-z ${EXTRA_ARGS}" -echo ok + +[[ "$ret" == "0" ]] && echo "OK" diff --git a/tools/testing/selftests/net/nat6to4.sh b/tools/testing/selftests/net/nat6to4.sh new file mode 100755 index 000000000000..0ee859b622a4 --- /dev/null +++ b/tools/testing/selftests/net/nat6to4.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +NS="ns-peer-$(mktemp -u XXXXXX)" + +ip netns add "${NS}" +ip -netns "${NS}" link set lo up +ip -netns "${NS}" route add default via 127.0.0.2 dev lo + +tc -n "${NS}" qdisc add dev lo ingress +tc -n "${NS}" filter add dev lo ingress prio 4 protocol ip \ + bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action + +ip netns exec "${NS}" \ + bash -c 'echo 012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789abc | socat - UDP4-DATAGRAM:224.1.0.1:6666,ip-multicast-loop=1' diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh deleted file mode 100644 index 6596fe03c77f..000000000000 --- a/tools/testing/selftests/net/net_helper.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Helper functions - -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - local protocol="${3}" - local pattern - local i - - pattern=":$(printf "%04X" "${port}") " - - # for tcp protocol additionally check the socket state - [ ${protocol} = "tcp" ] && pattern="${pattern}0A" - for i in $(seq 10); do - if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \ - /proc/net/"${protocol}"* | grep -q "${pattern}"; then - break - fi - sleep 0.1 - done -} diff --git a/tools/testing/selftests/net/netdev-l2addr.sh b/tools/testing/selftests/net/netdev-l2addr.sh new file mode 100755 index 000000000000..18509da293e5 --- /dev/null +++ b/tools/testing/selftests/net/netdev-l2addr.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +set -o pipefail + +NSIM_ADDR=2025 +TEST_ADDR="d0:be:d0:be:d0:00" + +RET_CODE=0 + +cleanup() { + cleanup_netdevsim "$NSIM_ADDR" + cleanup_ns "$NS" +} + +trap cleanup EXIT + +fail() { + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + RET_CODE=1 +} + +get_addr() +{ + local type="$1" + local dev="$2" + local ns="$3" + + ip -j -n "$ns" link show dev "$dev" | jq -er ".[0].$type" +} + +setup_ns NS + +nsim=$(create_netdevsim $NSIM_ADDR "$NS") + +get_addr address "$nsim" "$NS" >/dev/null || fail "Couldn't get ether addr" +get_addr broadcast "$nsim" "$NS" >/dev/null || fail "Couldn't get brd addr" +get_addr permaddr "$nsim" "$NS" >/dev/null && fail "Found perm_addr without setting it" + +ip -n "$NS" link set dev "$nsim" address "$TEST_ADDR" +ip -n "$NS" link set dev "$nsim" brd "$TEST_ADDR" + +[[ "$(get_addr address "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set ether addr" +[[ "$(get_addr broadcast "$nsim" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't set brd addr" + +if create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "FF:FF:FF:FF:FF:FF" 2>/dev/null; then + fail "Created netdevsim with broadcast permaddr" +fi + +nsim_port=$(create_netdevsim_port "$NSIM_ADDR" "$NS" 2 "$TEST_ADDR") + +get_addr address "$nsim_port" "$NS" >/dev/null || fail "Couldn't get ether addr" +get_addr broadcast "$nsim_port" "$NS" >/dev/null || fail "Couldn't get brd addr" +[[ "$(get_addr permaddr "$nsim_port" "$NS")" == "$TEST_ADDR" ]] || fail "Couldn't get permaddr" + +cleanup_netdevsim "$NSIM_ADDR" "$NS" + +exit $RET_CODE diff --git a/tools/testing/selftests/net/netdevice.sh b/tools/testing/selftests/net/netdevice.sh index e3afcb424710..438f7b2acc5f 100755 --- a/tools/testing/selftests/net/netdevice.sh +++ b/tools/testing/selftests/net/netdevice.sh @@ -67,8 +67,12 @@ kci_net_setup() return $ksft_skip fi - # TODO what ipaddr to set ? DHCP ? - echo "SKIP: $netdev: set IP address" + if [ "$veth_created" ]; then + echo "XFAIL: $netdev: set IP address unsupported for veth*" + else + # TODO what ipaddr to set ? DHCP ? + echo "SKIP: $netdev: set IP address" + fi return $ksft_skip } @@ -86,7 +90,7 @@ kci_netdev_ethtool_test() ret=$? if [ $ret -ne 0 ];then if [ $ret -eq "$1" ];then - echo "SKIP: $netdev: ethtool $2 not supported" + echo "XFAIL: $netdev: ethtool $2 not supported" return $ksft_skip else echo "FAIL: $netdev: ethtool $2" @@ -124,11 +128,45 @@ kci_netdev_ethtool() return 1 fi echo "PASS: $netdev: ethtool list features" - #TODO for each non fixed features, try to turn them on/off + + while read -r FEATURE VALUE FIXED; do + [ "$FEATURE" != "Features" ] || continue # Skip "Features" + [ "$FIXED" != "[fixed]" ] || continue # Skip fixed features + feature="${FEATURE%:*}" + + ethtool --offload "$netdev" "$feature" off + if [ $? -eq 0 ]; then + echo "PASS: $netdev: Turned off feature: $feature" + else + echo "FAIL: $netdev: Failed to turn off feature:" \ + "$feature" + fi + + ethtool --offload "$netdev" "$feature" on + if [ $? -eq 0 ]; then + echo "PASS: $netdev: Turned on feature: $feature" + else + echo "FAIL: $netdev: Failed to turn on feature:" \ + "$feature" + fi + + #restore the feature to its initial state + ethtool --offload "$netdev" "$feature" "$VALUE" + if [ $? -eq 0 ]; then + echo "PASS: $netdev: Restore feature $feature" \ + "to initial state $VALUE" + else + echo "FAIL: $netdev: Failed to restore feature" \ + "$feature to initial state $VALUE" + fi + + done < "$TMP_ETHTOOL_FEATURES" + rm "$TMP_ETHTOOL_FEATURES" kci_netdev_ethtool_test 74 'dump' "ethtool -d $netdev" kci_netdev_ethtool_test 94 'stats' "ethtool -S $netdev" + return 0 } @@ -196,10 +234,24 @@ if [ ! -e "$TMP_LIST_NETDEV" ];then fi ip link show |grep '^[0-9]' | grep -oE '[[:space:]].*eth[0-9]*:|[[:space:]].*enp[0-9]s[0-9]:' | cut -d\ -f2 | cut -d: -f1> "$TMP_LIST_NETDEV" + +if [ ! -s "$TMP_LIST_NETDEV" ]; then + echo "No valid network device found, creating veth pair" + ip link add veth0 type veth peer name veth1 + echo "veth0" > "$TMP_LIST_NETDEV" + veth_created=1 +fi + while read netdev do kci_test_netdev "$netdev" done < "$TMP_LIST_NETDEV" +#clean up veth interface pair if it was created +if [ "$veth_created" ]; then + ip link delete veth0 + echo "Removed veth pair" +fi + rm "$TMP_LIST_NETDEV" exit 0 diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore index 0a64d6d0e29a..5d2be9a00627 100644 --- a/tools/testing/selftests/net/netfilter/.gitignore +++ b/tools/testing/selftests/net/netfilter/.gitignore @@ -2,5 +2,7 @@ audit_logread connect_close conntrack_dump_flush +conntrack_reverse_clash sctp_collision nf_queue +udpclash diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile index 47945b2b3f92..ee2d1a5254f8 100644 --- a/tools/testing/selftests/net/netfilter/Makefile +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -6,36 +6,52 @@ HOSTPKG_CONFIG := pkg-config MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) -TEST_PROGS := br_netfilter.sh bridge_brouter.sh -TEST_PROGS += conntrack_icmp_related.sh -TEST_PROGS += conntrack_ipip_mtu.sh -TEST_PROGS += conntrack_tcp_unreplied.sh -TEST_PROGS += conntrack_sctp_collision.sh -TEST_PROGS += conntrack_vrf.sh -TEST_PROGS += ipvs.sh -TEST_PROGS += nf_conntrack_packetdrill.sh -TEST_PROGS += nf_nat_edemux.sh -TEST_PROGS += nft_audit.sh -TEST_PROGS += nft_concat_range.sh -TEST_PROGS += nft_conntrack_helper.sh -TEST_PROGS += nft_fib.sh -TEST_PROGS += nft_flowtable.sh -TEST_PROGS += nft_meta.sh -TEST_PROGS += nft_nat.sh -TEST_PROGS += nft_nat_zones.sh -TEST_PROGS += nft_queue.sh -TEST_PROGS += nft_synproxy.sh -TEST_PROGS += nft_zones_many.sh -TEST_PROGS += rpath.sh -TEST_PROGS += xt_string.sh +TEST_PROGS := \ + br_netfilter.sh \ + br_netfilter_queue.sh \ + bridge_brouter.sh \ + conntrack_clash.sh \ + conntrack_dump_flush.sh \ + conntrack_icmp_related.sh \ + conntrack_ipip_mtu.sh \ + conntrack_resize.sh \ + conntrack_reverse_clash.sh \ + conntrack_sctp_collision.sh \ + conntrack_tcp_unreplied.sh \ + conntrack_vrf.sh \ + ipvs.sh \ + nf_conntrack_packetdrill.sh \ + nf_nat_edemux.sh \ + nft_audit.sh \ + nft_concat_range.sh \ + nft_conntrack_helper.sh \ + nft_fib.sh \ + nft_flowtable.sh \ + nft_interface_stress.sh \ + nft_meta.sh \ + nft_nat.sh \ + nft_nat_zones.sh \ + nft_queue.sh \ + nft_synproxy.sh \ + nft_tproxy_tcp.sh \ + nft_tproxy_udp.sh \ + nft_zones_many.sh \ + rpath.sh \ + vxlan_mtu_frag.sh \ + xt_string.sh \ +# end of TEST_PROGS TEST_PROGS_EXTENDED = nft_concat_range_perf.sh -TEST_GEN_PROGS = conntrack_dump_flush - -TEST_GEN_FILES = audit_logread -TEST_GEN_FILES += connect_close nf_queue -TEST_GEN_FILES += sctp_collision +TEST_GEN_FILES = \ + audit_logread \ + connect_close \ + conntrack_dump_flush \ + conntrack_reverse_clash \ + nf_queue \ + sctp_collision \ + udpclash \ +# end of TEST_GEN_FILES include ../../lib.mk @@ -44,9 +60,14 @@ $(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS) $(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS) $(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS) +$(OUTPUT)/udpclash: LDLIBS += -lpthread -TEST_FILES := lib.sh -TEST_FILES += packetdrill +TEST_FILES := \ + lib.sh \ + packetdrill \ +# end of TEST_FILES TEST_INCLUDES := \ - ../lib.sh + $(wildcard ../lib/sh/*.sh) \ + ../lib.sh \ +# end of TEST_INCLUDES diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh index c28379a965d8..011de8763094 100755 --- a/tools/testing/selftests/net/netfilter/br_netfilter.sh +++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh @@ -13,6 +13,12 @@ source lib.sh checktool "nft --version" "run test without nft tool" +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + cleanup() { cleanup_all_ns } @@ -54,9 +60,6 @@ bcast_ping() done } -ip netns exec "$ns0" sysctl -q net.ipv4.conf.all.rp_filter=0 -ip netns exec "$ns0" sysctl -q net.ipv4.conf.default.rp_filter=0 - if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then echo "SKIP: Can't create veth device" exit $ksft_skip @@ -165,6 +168,7 @@ if [ "$t" -eq 0 ];then echo PASS: kernel not tainted else echo ERROR: kernel is tainted + dmesg ret=1 fi diff --git a/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh new file mode 100755 index 000000000000..4788641717d9 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/br_netfilter_queue.sh @@ -0,0 +1,85 @@ +#!/bin/bash + +source lib.sh + +checktool "nft --version" "run test without nft tool" + +read t < /proc/sys/kernel/tainted +if [ "$t" -ne 0 ];then + echo SKIP: kernel is tainted + exit $ksft_skip +fi + +cleanup() { + cleanup_all_ns +} + +setup_ns c1 c2 c3 sender + +trap cleanup EXIT + +nf_queue_wait() +{ + grep -q "^ *$1 " "/proc/self/net/netfilter/nfnetlink_queue" +} + +port_add() { + ns="$1" + dev="$2" + a="$3" + + ip link add name "$dev" type veth peer name "$dev" netns "$ns" + + ip -net "$ns" addr add 192.168.1."$a"/24 dev "$dev" + ip -net "$ns" link set "$dev" up + + ip link set "$dev" master br0 + ip link set "$dev" up +} + +[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } + +ip link add br0 type bridge +ip addr add 192.168.1.254/24 dev br0 + +port_add "$c1" "c1" 1 +port_add "$c2" "c2" 2 +port_add "$c3" "c3" 3 +port_add "$sender" "sender" 253 + +ip link set br0 up + +modprobe -q br_netfilter + +sysctl net.bridge.bridge-nf-call-iptables=1 || exit 1 + +ip netns exec "$sender" ping -I sender -c1 192.168.1.1 || exit 1 +ip netns exec "$sender" ping -I sender -c1 192.168.1.2 || exit 2 +ip netns exec "$sender" ping -I sender -c1 192.168.1.3 || exit 3 + +nft -f /dev/stdin <<EOF +table ip filter { + chain forward { + type filter hook forward priority 0; policy accept; + ct state new counter + ip protocol icmp counter queue num 0 bypass + } +} +EOF +./nf_queue -t 5 > /dev/null & + +busywait 5000 nf_queue_wait + +for i in $(seq 1 5); do conntrack -F > /dev/null 2> /dev/null; sleep 0.1 ; done & +ip netns exec "$sender" ping -I sender -f -c 50 -b 192.168.1.255 + +read t < /proc/sys/kernel/tainted +if [ "$t" -eq 0 ];then + echo PASS: kernel not tainted +else + echo ERROR: kernel is tainted + dmesg + exit 1 +fi + +exit 0 diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh index 2549b6590693..ea76f2bc2f59 100755 --- a/tools/testing/selftests/net/netfilter/bridge_brouter.sh +++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh @@ -22,8 +22,6 @@ trap cleanup EXIT setup_ns nsbr ns1 ns2 -ip netns exec "$nsbr" sysctl -q net.ipv4.conf.default.rp_filter=0 -ip netns exec "$nsbr" sysctl -q net.ipv4.conf.all.rp_filter=0 if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then echo "SKIP: Can't create veth device" exit $ksft_skip diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 63ef80ef47a4..12ce61fa15a8 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -7,63 +7,74 @@ CONFIG_BRIDGE_EBT_REDIRECT=m CONFIG_BRIDGE_EBT_T_FILTER=m CONFIG_BRIDGE_NETFILTER=m CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_BRIDGE_NF_EBTABLES_LEGACY=m +CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_CGROUP_BPF=y +CONFIG_CRYPTO_SHA1=m CONFIG_DUMMY=m +CONFIG_INET_DIAG=m CONFIG_INET_ESP=m -CONFIG_IP_NF_MATCH_RPFILTER=m -CONFIG_IP6_NF_MATCH_RPFILTER=m -CONFIG_IP_NF_IPTABLES=m +CONFIG_INET_SCTP_DIAG=m +CONFIG_IP6_NF_FILTER=m CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP6_NF_IPTABLES_LEGACY=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_RAW=m CONFIG_IP_NF_FILTER=m -CONFIG_IP6_NF_FILTER=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP_NF_IPTABLES_LEGACY=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_NAT=m CONFIG_IP_NF_RAW=m -CONFIG_IP6_NF_RAW=m CONFIG_IP_SCTP=m +CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IP_VS=m CONFIG_IP_VS_PROTO_TCP=y CONFIG_IP_VS_RR=m -CONFIG_IPV6=y -CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_MACVLAN=m CONFIG_NAMESPACES=y CONFIG_NET_CLS_U32=m -CONFIG_NET_L3_MASTER_DEV=y -CONFIG_NET_NS=y -CONFIG_NET_SCH_NETEM=m -CONFIG_NET_SCH_HTB=m -CONFIG_NET_IPIP=m -CONFIG_NET_VRF=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NETFILTER_NETLINK=m CONFIG_NETFILTER_NETLINK_QUEUE=m CONFIG_NETFILTER_SYNPROXY=m CONFIG_NETFILTER_XTABLES=m -CONFIG_NETFILTER_XT_NAT=m +CONFIG_NETFILTER_XTABLES_LEGACY=y CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m CONFIG_NETFILTER_XT_MATCH_STATE=m CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_NAT=m CONFIG_NETFILTER_XT_TARGET_REDIRECT=m +CONFIG_NET_IPIP=m +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_NS=y +CONFIG_NET_PKTGEN=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_VRF=y CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_EVENTS=y CONFIG_NF_CONNTRACK_FTP=m CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_PROCFS=y CONFIG_NF_CONNTRACK_ZONES=y CONFIG_NF_CT_NETLINK=m CONFIG_NF_CT_PROTO_SCTP=y CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NF_LOG_IPV4=m CONFIG_NF_LOG_IPV6=m CONFIG_NF_NAT=m -CONFIG_NF_NAT_REDIRECT=y CONFIG_NF_NAT_MASQUERADE=y +CONFIG_NF_NAT_REDIRECT=y CONFIG_NF_TABLES=m CONFIG_NF_TABLES_BRIDGE=m CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_IPV4=y CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_NETDEV=y -CONFIG_NF_FLOW_TABLE_INET=m CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_COMPAT=m CONFIG_NFT_CT=m @@ -81,9 +92,10 @@ CONFIG_NFT_QUEUE=m CONFIG_NFT_QUOTA=m CONFIG_NFT_REDIR=m CONFIG_NFT_SYNPROXY=m +CONFIG_NFT_TPROXY=m +CONFIG_TUN=m CONFIG_VETH=m CONFIG_VLAN_8021Q=m -CONFIG_XFRM_USER=m +CONFIG_VXLAN=m CONFIG_XFRM_STATISTICS=y -CONFIG_NET_PKTGEN=m -CONFIG_TUN=m +CONFIG_XFRM_USER=m diff --git a/tools/testing/selftests/net/netfilter/conntrack_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_clash.sh new file mode 100755 index 000000000000..7fc6c5dbd551 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_clash.sh @@ -0,0 +1,174 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +clash_resolution_active=0 +dport=22111 +ret=0 + +cleanup() +{ + # netns cleanup also zaps any remaining socat echo server. + cleanup_all_ns +} + +checktool "nft --version" "run test without nft" +checktool "conntrack --version" "run test without conntrack" +checktool "socat -h" "run test without socat" + +trap cleanup EXIT + +setup_ns nsclient1 nsclient2 nsrouter + +ip netns exec "$nsrouter" nft -f -<<EOF +table ip t { + chain lb { + meta l4proto udp dnat to numgen random mod 3 map { 0 : 10.0.2.1 . 9000, 1 : 10.0.2.1 . 9001, 2 : 10.0.2.1 . 9002 } + } + + chain prerouting { + type nat hook prerouting priority dstnat + + udp dport $dport counter jump lb + } + + chain output { + type nat hook output priority dstnat + + udp dport $dport counter jump lb + } +} +EOF + +load_simple_ruleset() +{ +ip netns exec "$1" nft -f -<<EOF +table ip t { + chain forward { + type filter hook forward priority 0 + + ct state new counter + } +} +EOF +} + +spawn_servers() +{ + local ns="$1" + local ports="9000 9001 9002" + + for port in $ports; do + ip netns exec "$ns" socat UDP-RECVFROM:$port,fork PIPE 2>/dev/null & + done + + for port in $ports; do + wait_local_port_listen "$ns" $port udp + done +} + +add_addr() +{ + local ns="$1" + local dev="$2" + local i="$3" + local j="$4" + + ip -net "$ns" link set "$dev" up + ip -net "$ns" addr add "10.0.$i.$j/24" dev "$dev" +} + +ping_test() +{ + local ns="$1" + local daddr="$2" + + if ! ip netns exec "$ns" ping -q -c 1 $daddr > /dev/null;then + echo "FAIL: ping from $ns to $daddr" + exit 1 + fi +} + +run_one_clash_test() +{ + local ns="$1" + local ctns="$2" + local daddr="$3" + local dport="$4" + local entries + local cre + + if ! ip netns exec "$ns" timeout 30 ./udpclash $daddr $dport;then + echo "INFO: did not receive expected number of replies for $daddr:$dport" + ip netns exec "$ctns" conntrack -S + # don't fail: check if clash resolution triggered after all. + fi + + entries=$(ip netns exec "$ctns" conntrack -S | wc -l) + cre=$(ip netns exec "$ctns" conntrack -S | grep "clash_resolve=0" | wc -l) + + if [ "$cre" -ne "$entries" ];then + clash_resolution_active=1 + return 0 + fi + + # not a failure: clash resolution logic did not trigger. + # With right timing, xmit completed sequentially and + # no parallel insertion occurs. + return $ksft_skip +} + +run_clash_test() +{ + local ns="$1" + local ctns="$2" + local daddr="$3" + local dport="$4" + local softerr=0 + + for i in $(seq 1 10);do + run_one_clash_test "$ns" "$ctns" "$daddr" "$dport" + local rv=$? + if [ $rv -eq 0 ];then + echo "PASS: clash resolution test for $daddr:$dport on attempt $i" + return 0 + elif [ $rv -eq $ksft_skip ]; then + softerr=1 + fi + done + + [ $softerr -eq 1 ] && echo "SKIP: clash resolution for $daddr:$dport did not trigger" +} + +ip link add veth0 netns "$nsclient1" type veth peer name veth0 netns "$nsrouter" +ip link add veth0 netns "$nsclient2" type veth peer name veth1 netns "$nsrouter" +add_addr "$nsclient1" veth0 1 1 +add_addr "$nsclient2" veth0 2 1 +add_addr "$nsrouter" veth0 1 99 +add_addr "$nsrouter" veth1 2 99 + +ip -net "$nsclient1" route add default via 10.0.1.99 +ip -net "$nsclient2" route add default via 10.0.2.99 +ip netns exec "$nsrouter" sysctl -q net.ipv4.ip_forward=1 + +ping_test "$nsclient1" 10.0.1.99 +ping_test "$nsclient1" 10.0.2.1 +ping_test "$nsclient2" 10.0.1.1 + +spawn_servers "$nsclient2" + +# exercise clash resolution with nat: +# nsrouter is supposed to dnat to 10.0.2.1:900{0,1,2,3}. +run_clash_test "$nsclient1" "$nsrouter" 10.0.1.99 "$dport" + +# exercise clash resolution without nat. +load_simple_ruleset "$nsclient2" +run_clash_test "$nsclient2" "$nsclient2" 127.0.0.1 9001 + +if [ $clash_resolution_active -eq 0 ];then + [ "$ret" -eq 0 ] && ret=$ksft_skip + echo "SKIP: Clash resolution did not trigger" +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c index bd9317bf5ada..5cecb8a1bc94 100644 --- a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c +++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c @@ -10,7 +10,7 @@ #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> #include <linux/netfilter/nf_conntrack_tcp.h> -#include "../../kselftest_harness.h" +#include "kselftest_harness.h" #define TEST_ZONE_ID 123 #define NF_CT_DEFAULT_ZONE_ID 0 @@ -43,6 +43,8 @@ static int build_cta_tuple_v4(struct nlmsghdr *nlh, int type, mnl_attr_nest_end(nlh, nest_proto); mnl_attr_nest_end(nlh, nest); + + return 0; } static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type, @@ -71,6 +73,8 @@ static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type, mnl_attr_nest_end(nlh, nest_proto); mnl_attr_nest_end(nlh, nest); + + return 0; } static int build_cta_proto(struct nlmsghdr *nlh) @@ -90,6 +94,8 @@ static int build_cta_proto(struct nlmsghdr *nlh) mnl_attr_nest_end(nlh, nest_proto); mnl_attr_nest_end(nlh, nest); + + return 0; } static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh, @@ -98,7 +104,7 @@ static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh, char buf[MNL_SOCKET_BUFFER_SIZE]; struct nlmsghdr *rplnlh; unsigned int portid; - int err, ret; + int ret; portid = mnl_socket_get_portid(sock); @@ -207,6 +213,7 @@ static int conntrack_data_generate_v6(struct mnl_socket *sock, static int count_entries(const struct nlmsghdr *nlh, void *data) { reply_counter++; + return MNL_CB_OK; } static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone) @@ -216,7 +223,7 @@ static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone) struct nfgenmsg *nfh; struct nlattr *nest; unsigned int portid; - int err, ret; + int ret; portid = mnl_socket_get_portid(sock); @@ -263,7 +270,7 @@ static int conntrack_flush_zone(struct mnl_socket *sock, uint16_t zone) struct nfgenmsg *nfh; struct nlattr *nest; unsigned int portid; - int err, ret; + int ret; portid = mnl_socket_get_portid(sock); diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.sh b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.sh new file mode 100755 index 000000000000..8b0935385849 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +exec unshare -n ./conntrack_dump_flush diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh new file mode 100755 index 000000000000..615fe3c6f405 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh @@ -0,0 +1,515 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +checktool "conntrack --version" "run test without conntrack" +checktool "nft --version" "run test without nft tool" + +init_net_max=0 +ct_buckets=0 +tmpfile="" +tmpfile_proc="" +tmpfile_uniq="" +ret=0 +have_socat=0 + +socat -h > /dev/null && have_socat=1 + +insert_count=2000 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && insert_count=400 + +modprobe -q nf_conntrack +if ! sysctl -q net.netfilter.nf_conntrack_max >/dev/null;then + echo "SKIP: conntrack sysctls not available" + exit $KSFT_SKIP +fi + +init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) || exit 1 +ct_buckets=$(sysctl -n net.netfilter.nf_conntrack_buckets) || exit 1 + +cleanup() { + cleanup_all_ns + + rm -f "$tmpfile" "$tmpfile_proc" "$tmpfile_uniq" + + # restore original sysctl setting + sysctl -q net.netfilter.nf_conntrack_max=$init_net_max + sysctl -q net.netfilter.nf_conntrack_buckets=$ct_buckets +} +trap cleanup EXIT + +check_max_alias() +{ + local expected="$1" + # old name, expected to alias to the first, i.e. changing one + # changes the other as well. + local lv=$(sysctl -n net.nf_conntrack_max) + + if [ $expected -ne "$lv" ];then + echo "nf_conntrack_max sysctls should have identical values" + exit 1 + fi +} + +insert_ctnetlink() { + local ns="$1" + local count="$2" + local i=0 + local bulk=16 + + while [ $i -lt $count ] ;do + ip netns exec "$ns" bash -c "for i in \$(seq 1 $bulk); do \ + if ! conntrack -I -s \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \ + -d \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \ + --protonum 17 --timeout 3600 --status ASSURED,SEEN_REPLY --sport \$RANDOM --dport 53; then \ + return;\ + fi & \ + done ; wait" 2>/dev/null + + i=$((i+bulk)) + done +} + +check_ctcount() { + local ns="$1" + local count="$2" + local msg="$3" + + local now=$(ip netns exec "$ns" conntrack -C) + + if [ $now -ne "$count" ] ;then + echo "expected $count entries in $ns, not $now: $msg" + exit 1 + fi + + echo "PASS: got $count connections: $msg" +} + +ctresize() { + local duration="$1" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + sysctl -q net.netfilter.nf_conntrack_buckets=$RANDOM + now=$(date +%s) + done +} + +do_rsleep() { + local limit="$1" + local r=$RANDOM + + r=$((r%limit)) + sleep "$r" +} + +ct_flush_once() { + local ns="$1" + + ip netns exec "$ns" conntrack -F 2>/dev/null +} + +ctflush() { + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + do_rsleep "$duration" + + while [ $now -lt $end ]; do + ct_flush_once "$ns" + do_rsleep "$duration" + now=$(date +%s) + done +} + +ct_pingflood() +{ + local ns="$1" + local duration="$2" + local msg="$3" + local now=$(date +%s) + local end=$((now + duration)) + local j=0 + local k=0 + + while [ $now -lt $end ]; do + j=$((j%256)) + k=$((k%256)) + + ip netns exec "$ns" bash -c \ + "j=$j k=$k; for i in \$(seq 1 254); do ping -q -c 1 127.\$k.\$j.\$i & done; wait" >/dev/null 2>&1 + + j=$((j+1)) + + if [ $j -eq 256 ];then + k=$((k+1)) + fi + + now=$(date +%s) + done + + wait +} + +ct_udpflood() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + [ $have_socat -ne "1" ] && return + + while [ $now -lt $end ]; do +ip netns exec "$ns" bash<<"EOF" + for i in $(seq 1 100);do + dport=$(((RANDOM%65536)+1)) + + echo bar | socat -u STDIN UDP:"127.0.0.1:$dport" & + done > /dev/null 2>&1 + wait +EOF + now=$(date +%s) + done +} + +ct_udpclash() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + [ -x udpclash ] || return + + while [ $now -lt $end ]; do + ip netns exec "$ns" timeout 30 ./udpclash 127.0.0.1 $((RANDOM%65536)) > /dev/null 2>&1 + + now=$(date +%s) + done +} + +# dump to /dev/null. We don't want dumps to cause infinite loops +# or use-after-free even when conntrack table is altered while dumps +# are in progress. +ct_nulldump() +{ + local ns="$1" + + ip netns exec "$ns" conntrack -L > /dev/null 2>&1 & + + # Don't require /proc support in conntrack + if [ -r /proc/self/net/nf_conntrack ] ; then + ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack" > /dev/null & + fi + + wait +} + +ct_nulldump_loop() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + ct_nulldump "$ns" + sleep $((RANDOM%2)) + now=$(date +%s) + done +} + +change_timeouts() +{ + local ns="$1" + local r1=$((RANDOM%2)) + local r2=$((RANDOM%2)) + + [ "$r1" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=$((RANDOM%5)) + [ "$r2" -eq 1 ] && ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=$((RANDOM%5)) +} + +ct_change_timeouts_loop() +{ + local ns="$1" + local duration="$2" + local now=$(date +%s) + local end=$((now + duration)) + + while [ $now -lt $end ]; do + change_timeouts "$ns" + sleep $((RANDOM%2)) + now=$(date +%s) + done + + # restore defaults + ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=30 + ip netns exec "$ns" sysctl -q net.netfilter.nf_conntrack_udp_timeout=30 +} + +check_taint() +{ + local tainted_then="$1" + local msg="$2" + + local tainted_now=0 + + if [ "$tainted_then" -ne 0 ];then + return + fi + + read tainted_now < /proc/sys/kernel/tainted + + if [ "$tainted_now" -eq 0 ];then + echo "PASS: $msg" + else + echo "TAINT: $msg" + dmesg + exit 1 + fi +} + +insert_flood() +{ + local n="$1" + local timeout="$2" + local r=0 + + r=$((RANDOM%$insert_count)) + + ct_pingflood "$n" "$timeout" "floodresize" & + ct_udpflood "$n" "$timeout" & + ct_udpclash "$n" "$timeout" & + + insert_ctnetlink "$n" "$r" & + ctflush "$n" "$timeout" & + ct_nulldump_loop "$n" "$timeout" & + ct_change_timeouts_loop "$n" "$timeout" & + + wait +} + +test_floodresize_all() +{ + local timeout=20 + local n="" + local tainted_then="" + + read tainted_then < /proc/sys/kernel/tainted + + for n in "$nsclient1" "$nsclient2";do + insert_flood "$n" "$timeout" & + done + + # resize table constantly while flood/insert/dump/flushs + # are happening in parallel. + ctresize "$timeout" + + # wait for subshells to complete, everything is limited + # by $timeout. + wait + + check_taint "$tainted_then" "resize+flood" +} + +check_dump() +{ + local ns="$1" + local protoname="$2" + local c=0 + local proto=0 + local proc=0 + local unique="" + local lret=0 + + # NOTE: assumes timeouts are large enough to not have + # expirations in all following tests. + l=$(ip netns exec "$ns" conntrack -L 2>/dev/null | sort | tee "$tmpfile" | wc -l) + c=$(ip netns exec "$ns" conntrack -C) + + if [ "$c" -eq 0 ]; then + echo "FAIL: conntrack count for $ns is 0" + lret=1 + fi + + if [ "$c" -ne "$l" ]; then + echo "FAIL: conntrack count inconsistency for $ns -L: $c != $l" + lret=1 + fi + + # check the dump we retrieved is free of duplicated entries. + unique=$(uniq "$tmpfile" | tee "$tmpfile_uniq" | wc -l) + if [ "$l" -ne "$unique" ]; then + echo "FAIL: listing contained redundant entries for $ns: $l != $unique" + diff -u "$tmpfile" "$tmpfile_uniq" + lret=1 + fi + + # we either inserted icmp or only udp, hence, --proto should return same entry count as without filter. + proto=$(ip netns exec "$ns" conntrack -L --proto $protoname 2>/dev/null | sort | uniq | tee "$tmpfile_uniq" | wc -l) + if [ "$l" -ne "$proto" ]; then + echo "FAIL: dump inconsistency for $ns -L --proto $protoname: $l != $proto" + diff -u "$tmpfile" "$tmpfile_uniq" + lret=1 + fi + + if [ -r /proc/self/net/nf_conntrack ] ; then + proc=$(ip netns exec "$ns" bash -c "sort < /proc/self/net/nf_conntrack | tee \"$tmpfile_proc\" | wc -l") + + if [ "$l" -ne "$proc" ]; then + echo "FAIL: proc inconsistency for $ns: $l != $proc" + lret=1 + fi + + proc=$(uniq "$tmpfile_proc" | tee "$tmpfile_uniq" | wc -l) + if [ "$l" -ne "$proc" ]; then + echo "FAIL: proc inconsistency after uniq filter for $ns: $l != $proc" + diff -u "$tmpfile_proc" "$tmpfile_uniq" + lret=1 + fi + fi + + if [ $lret -eq 0 ];then + echo "PASS: dump in netns $ns had same entry count (-C $c, -L $l, -p $proto, /proc $proc)" + else + echo "FAIL: dump in netns $ns had different entry count (-C $c, -L $l, -p $proto, /proc $proc)" + ret=1 + fi +} + +test_dump_all() +{ + local timeout=3 + local tainted_then="" + + read tainted_then < /proc/sys/kernel/tainted + + ct_flush_once "$nsclient1" + ct_flush_once "$nsclient2" + + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_icmp_timeout=3600 + + ct_pingflood "$nsclient1" $timeout "dumpall" & + insert_ctnetlink "$nsclient2" $insert_count + + wait + + check_dump "$nsclient1" "icmp" + check_dump "$nsclient2" "udp" + + check_taint "$tainted_then" "test parallel conntrack dumps" +} + +check_sysctl_immutable() +{ + local ns="$1" + local name="$2" + local failhard="$3" + local o=0 + local n=0 + + o=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null) + n=$((o+1)) + + # return value isn't reliable, need to read it back + ip netns exec "$ns" sysctl -q "$name"=$n 2>/dev/null >/dev/null + + n=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null) + + [ -z "$n" ] && return 1 + + if [ $o -ne $n ]; then + if [ $failhard -gt 0 ] ;then + echo "FAIL: net.$name should not be changeable from namespace (now $n)" + ret=1 + fi + return 0 + fi + + return 1 +} + +test_conntrack_max_limit() +{ + sysctl -q net.netfilter.nf_conntrack_max=100 + insert_ctnetlink "$nsclient1" 101 + + # check netns is clamped by init_net, i.e., either netns follows + # init_net value, or a higher pernet limit (compared to init_net) is ignored. + check_ctcount "$nsclient1" 100 "netns conntrack_max is init_net bound" + + sysctl -q net.netfilter.nf_conntrack_max=$init_net_max +} + +test_conntrack_disable() +{ + local timeout=2 + + # disable conntrack pickups + ip netns exec "$nsclient1" nft flush table ip test_ct + + ct_flush_once "$nsclient1" + ct_flush_once "$nsclient2" + + ct_pingflood "$nsclient1" "$timeout" "conntrack disable" + ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1 + + # Disabled, should not have picked up any connection. + check_ctcount "$nsclient1" 0 "conntrack disabled" + + # This one is still active, expect 1 connection. + check_ctcount "$nsclient2" 1 "conntrack enabled" +} + +init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) + +check_max_alias $init_net_max + +sysctl -q net.netfilter.nf_conntrack_max="262000" +check_max_alias 262000 + +setup_ns nsclient1 nsclient2 + +# check this only works from init_net +for n in netfilter.nf_conntrack_buckets netfilter.nf_conntrack_expect_max net.nf_conntrack_max;do + check_sysctl_immutable "$nsclient1" "net.$n" 1 +done + +# won't work on older kernels. If it works, check that the netns obeys the limit +if check_sysctl_immutable "$nsclient1" net.netfilter.nf_conntrack_max 0;then + # subtest: if pernet is changeable, check that reducing it in pernet + # limits the pernet entries. Inverse, pernet clamped by a lower init_net + # setting, is already checked by "test_conntrack_max_limit" test. + + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=1 + insert_ctnetlink "$nsclient1" 2 + check_ctcount "$nsclient1" 1 "netns conntrack_max is pernet bound" + ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=$init_net_max +fi + +for n in "$nsclient1" "$nsclient2";do +# enable conntrack in both namespaces +ip netns exec "$n" nft -f - <<EOF +table ip test_ct { + chain input { + type filter hook input priority 0 + ct state new counter + } +} +EOF +done + +tmpfile=$(mktemp) +tmpfile_proc=$(mktemp) +tmpfile_uniq=$(mktemp) +test_conntrack_max_limit +test_dump_all +test_floodresize_all +test_conntrack_disable + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c new file mode 100644 index 000000000000..507930cee8cb --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Needs something like: + * + * iptables -t nat -A POSTROUTING -o nomatch -j MASQUERADE + * + * so NAT engine attaches a NAT null-binding to each connection. + * + * With unmodified kernels, child or parent will exit with + * "Port number changed" error, even though no port translation + * was requested. + */ + +#include <errno.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <time.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <sys/wait.h> + +#define LEN 512 +#define PORT 56789 +#define TEST_TIME 5 + +static void die(const char *e) +{ + perror(e); + exit(111); +} + +static void die_port(uint16_t got, uint16_t want) +{ + fprintf(stderr, "Port number changed, wanted %d got %d\n", want, ntohs(got)); + exit(1); +} + +static int udp_socket(void) +{ + static const struct timeval tv = { + .tv_sec = 1, + }; + int fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + + if (fd < 0) + die("socket"); + + setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + return fd; +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_in sa1 = { + .sin_family = AF_INET, + }; + struct sockaddr_in sa2 = { + .sin_family = AF_INET, + }; + int s1, s2, status; + time_t end, now; + socklen_t plen; + char buf[LEN]; + bool child; + + sa1.sin_port = htons(PORT); + sa2.sin_port = htons(PORT + 1); + + s1 = udp_socket(); + s2 = udp_socket(); + + inet_pton(AF_INET, "127.0.0.11", &sa1.sin_addr); + inet_pton(AF_INET, "127.0.0.12", &sa2.sin_addr); + + if (bind(s1, (struct sockaddr *)&sa1, sizeof(sa1)) < 0) + die("bind 1"); + if (bind(s2, (struct sockaddr *)&sa2, sizeof(sa2)) < 0) + die("bind 2"); + + child = fork() == 0; + + now = time(NULL); + end = now + TEST_TIME; + + while (now < end) { + struct sockaddr_in peer; + socklen_t plen = sizeof(peer); + + now = time(NULL); + + if (child) { + if (sendto(s1, buf, LEN, 0, (struct sockaddr *)&sa2, sizeof(sa2)) != LEN) + continue; + + if (recvfrom(s2, buf, LEN, 0, (struct sockaddr *)&peer, &plen) < 0) + die("child recvfrom"); + + if (peer.sin_port != htons(PORT)) + die_port(peer.sin_port, PORT); + } else { + if (sendto(s2, buf, LEN, 0, (struct sockaddr *)&sa1, sizeof(sa1)) != LEN) + continue; + + if (recvfrom(s1, buf, LEN, 0, (struct sockaddr *)&peer, &plen) < 0) + die("parent recvfrom"); + + if (peer.sin_port != htons((PORT + 1))) + die_port(peer.sin_port, PORT + 1); + } + } + + if (child) + return 0; + + wait(&status); + + if (WIFEXITED(status)) + return WEXITSTATUS(status); + + return 1; +} diff --git a/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh new file mode 100755 index 000000000000..a24c896347a8 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_reverse_clash.sh @@ -0,0 +1,51 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +cleanup() +{ + cleanup_all_ns +} + +checktool "nft --version" "run test without nft" +checktool "conntrack --version" "run test without conntrack" + +trap cleanup EXIT + +setup_ns ns0 + +# make loopback connections get nat null bindings assigned +ip netns exec "$ns0" nft -f - <<EOF +table ip nat { + chain POSTROUTING { + type nat hook postrouting priority srcnat; policy accept; + oifname "nomatch" counter packets 0 bytes 0 masquerade + } +} +EOF + +do_flush() +{ + local end + local now + + now=$(date +%s) + end=$((now + 5)) + + while [ $now -lt $end ];do + ip netns exec "$ns0" conntrack -F 2>/dev/null + now=$(date +%s) + done +} + +do_flush & + +if ip netns exec "$ns0" ./conntrack_reverse_clash; then + echo "PASS: No SNAT performed for null bindings" +else + echo "ERROR: SNAT performed without any matching snat rule" + exit 1 +fi + +exit 0 diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh index 073e8e62d350..207b79932d91 100755 --- a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh +++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh @@ -51,10 +51,6 @@ trap cleanup EXIT setup_ns ns0 ns1 -ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0 -ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 -ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 - if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then echo "SKIP: Could not add veth device" exit $ksft_skip diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh index 4ceee9fb3949..9c9d5b38ab71 100755 --- a/tools/testing/selftests/net/netfilter/ipvs.sh +++ b/tools/testing/selftests/net/netfilter/ipvs.sh @@ -97,7 +97,7 @@ cleanup() { } server_listen() { - ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT > "${outfile}" & + ip netns exec "$ns2" timeout 5 socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT > "${outfile}" & server_pid=$! sleep 0.2 } @@ -129,9 +129,6 @@ test_dr() { # avoid incorrect arp response ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 - # avoid reverse route lookup - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0 ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 test_service @@ -154,7 +151,7 @@ test_nat() { test_tun() { ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0 - ip netns exec "${ns1}" modprobe -q ipip + modprobe -q ipip ip netns exec "${ns1}" ip link set tunl0 up ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0 ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0 @@ -163,13 +160,10 @@ test_tun() { ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port} ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1 - ip netns exec "${ns2}" modprobe -q ipip ip netns exec "${ns2}" ip link set tunl0 up ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 - ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0 ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 test_service diff --git a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh index 1014551dd769..6731fe1eaf2e 100755 --- a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh +++ b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh @@ -17,9 +17,31 @@ cleanup() checktool "socat -h" "run test without socat" checktool "iptables --version" "run test without iptables" +checktool "conntrack --version" "run test without conntrack" trap cleanup EXIT +connect_done() +{ + local ns="$1" + local port="$2" + + ip netns exec "$ns" ss -nt -o state established "dport = :$port" | grep -q "$port" +} + +check_ctstate() +{ + local ns="$1" + local dp="$2" + + if ! ip netns exec "$ns" conntrack --get -s 192.168.1.2 -d 192.168.1.1 -p tcp \ + --sport 10000 --dport "$dp" --state ESTABLISHED > /dev/null 2>&1;then + echo "FAIL: Did not find expected state for dport $2" + ip netns exec "$ns" bash -c 'conntrack -L; conntrack -S; ss -nt' + ret=1 + fi +} + setup_ns ns1 ns2 # Connect the namespaces using a veth pair @@ -44,15 +66,18 @@ socatpid=$! ip netns exec "$ns2" sysctl -q net.ipv4.ip_local_port_range="10000 10000" # add a virtual IP using DNAT -ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201 +ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201 || exit 1 # ... and route it to the other namespace ip netns exec "$ns2" ip route add 10.96.0.1 via 192.168.1.1 -# add a persistent connection from the other namespace -ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null & +# listener should be up by now, wait if it isn't yet. +wait_local_port_listen "$ns1" 5201 tcp -sleep 1 +# add a persistent connection from the other namespace +sleep 10 | ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null & +cpid0=$! +busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" "5201" # ip daddr:dport will be rewritten to 192.168.1.1 5201 # NAT must reallocate source port 10000 because @@ -71,26 +96,25 @@ fi ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201 ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201 -sleep 5 | ip netns exec "$ns2" socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null & +sleep 5 | ip netns exec "$ns2" socat -T 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null & +cpid1=$! -# if connect succeeds, client closes instantly due to EOF on stdin. -# if connect hangs, it will time out after 5s. -echo | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null & +sleep 5 | ip netns exec "$ns2" socat -T 5 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null & cpid2=$! -time_then=$(date +%s) -wait $cpid2 -rv=$? -time_now=$(date +%s) +busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" 5202 +busywait "$BUSYWAIT_TIMEOUT" connect_done "$ns2" 5203 -# Check how much time has elapsed, expectation is for -# 'cpid2' to connect and then exit (and no connect delay). -delta=$((time_now - time_then)) +check_ctstate "$ns1" 5202 +check_ctstate "$ns1" 5203 -if [ $delta -lt 2 ] && [ $rv -eq 0 ]; then +kill $socatpid $cpid0 $cpid1 $cpid2 +socatpid=0 + +if [ $ret -eq 0 ]; then echo "PASS: could connect to service via redirected ports" else - echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)" + echo "FAIL: socat cannot connect to service via redirect" ret=1 fi diff --git a/tools/testing/selftests/net/netfilter/nft_audit.sh b/tools/testing/selftests/net/netfilter/nft_audit.sh index 902f8114bc80..87f2b4c725aa 100755 --- a/tools/testing/selftests/net/netfilter/nft_audit.sh +++ b/tools/testing/selftests/net/netfilter/nft_audit.sh @@ -48,12 +48,31 @@ logread_pid=$! trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT exec 3<"$logfile" +lsplit='s/^\(.*\) entries=\([^ ]*\) \(.*\)$/pfx="\1"\nval="\2"\nsfx="\3"/' +summarize_logs() { + sum=0 + while read line; do + eval $(sed "$lsplit" <<< "$line") + [[ $sum -gt 0 ]] && { + [[ "$pfx $sfx" == "$tpfx $tsfx" ]] && { + let "sum += val" + continue + } + echo "$tpfx entries=$sum $tsfx" + } + tpfx="$pfx" + tsfx="$sfx" + sum=$val + done + echo "$tpfx entries=$sum $tsfx" +} + do_test() { # (cmd, log) echo -n "testing for cmd: $1 ... " cat <&3 >/dev/null $1 >/dev/null || exit 1 sleep 0.1 - res=$(diff -a -u <(echo "$2") - <&3) + res=$(diff -a -u <(echo "$2") <(summarize_logs <&3)) [ $? -eq 0 ] && { echo "OK"; return; } echo "FAIL" grep -v '^\(---\|+++\|@@\)' <<< "$res" @@ -152,31 +171,17 @@ do_test 'nft reset rules t1 c2' \ 'table=t1 family=2 entries=3 op=nft_reset_rule' do_test 'nft reset rules table t1' \ -'table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule' +'table=t1 family=2 entries=9 op=nft_reset_rule' do_test 'nft reset rules t2 c3' \ -'table=t2 family=2 entries=189 op=nft_reset_rule -table=t2 family=2 entries=188 op=nft_reset_rule -table=t2 family=2 entries=126 op=nft_reset_rule' +'table=t2 family=2 entries=503 op=nft_reset_rule' do_test 'nft reset rules t2' \ -'table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=186 op=nft_reset_rule -table=t2 family=2 entries=188 op=nft_reset_rule -table=t2 family=2 entries=129 op=nft_reset_rule' +'table=t2 family=2 entries=509 op=nft_reset_rule' do_test 'nft reset rules' \ -'table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule -table=t1 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=3 op=nft_reset_rule -table=t2 family=2 entries=180 op=nft_reset_rule -table=t2 family=2 entries=188 op=nft_reset_rule -table=t2 family=2 entries=135 op=nft_reset_rule' +'table=t1 family=2 entries=9 op=nft_reset_rule +table=t2 family=2 entries=509 op=nft_reset_rule' # resetting sets and elements @@ -200,13 +205,11 @@ do_test 'nft reset counters t1' \ 'table=t1 family=2 entries=1 op=nft_reset_obj' do_test 'nft reset counters t2' \ -'table=t2 family=2 entries=342 op=nft_reset_obj -table=t2 family=2 entries=158 op=nft_reset_obj' +'table=t2 family=2 entries=500 op=nft_reset_obj' do_test 'nft reset counters' \ 'table=t1 family=2 entries=1 op=nft_reset_obj -table=t2 family=2 entries=341 op=nft_reset_obj -table=t2 family=2 entries=159 op=nft_reset_obj' +table=t2 family=2 entries=500 op=nft_reset_obj' # resetting quotas @@ -217,13 +220,11 @@ do_test 'nft reset quotas t1' \ 'table=t1 family=2 entries=1 op=nft_reset_obj' do_test 'nft reset quotas t2' \ -'table=t2 family=2 entries=315 op=nft_reset_obj -table=t2 family=2 entries=185 op=nft_reset_obj' +'table=t2 family=2 entries=500 op=nft_reset_obj' do_test 'nft reset quotas' \ 'table=t1 family=2 entries=1 op=nft_reset_obj -table=t2 family=2 entries=314 op=nft_reset_obj -table=t2 family=2 entries=186 op=nft_reset_obj' +table=t2 family=2 entries=500 op=nft_reset_obj' # deleting rules diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh index 6d66240e149c..ad97c6227f35 100755 --- a/tools/testing/selftests/net/netfilter/nft_concat_range.sh +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -15,10 +15,12 @@ source lib.sh # Available test groups: # - reported_issues: check for issues that were reported in the past # - correctness: check that packets match given entries, and only those +# - correctness_large: same but with additional non-matching entries # - concurrency: attempt races between insertion, deletion and lookup # - timeout: check that packets match entries until they expire # - performance: estimate matching rate, compare with rbtree and hash baselines -TESTS="reported_issues correctness concurrency timeout" +TESTS="reported_issues correctness correctness_large concurrency timeout" + [ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}" # Set types, defined by TYPE_ variables below @@ -27,7 +29,7 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto net6_port_net6_port net_port_mac_proto_net" # Reported bugs, also described by TYPE_ variables below -BUGS="flush_remove_add reload" +BUGS="flush_remove_add reload net_port_proto_match avx2_mismatch doublecreate" # List of possible paths to pktgen script from kernel tree for performance tests PKTGEN_SCRIPT_PATHS=" @@ -371,6 +373,53 @@ race_repeat 0 perf_duration 0 " +TYPE_net_port_proto_match=" +display net,port,proto +type_spec ipv4_addr . inet_service . inet_proto +chain_spec ip daddr . udp dport . meta l4proto +dst addr4 port proto +src +start 1 +count 9 +src_delta 9 +tools sendip bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_avx2_mismatch=" +display avx2 false match +type_spec inet_proto . ipv6_addr +chain_spec meta l4proto . ip6 daddr +dst proto addr6 +src +start 1 +count 1 +src_delta 1 +tools ping +proto icmp6 + +race_repeat 0 + +perf_duration 0 +" + + +TYPE_doublecreate=" +display cannot create same element twice +type_spec ipv4_addr . ipv4_addr +chain_spec ip saddr . ip daddr +dst addr4 +proto icmp + +race_repeat 0 + +perf_duration 0 +" + # Set template for all tests, types and rules are filled in depending on test set_template=' flush ruleset @@ -382,6 +431,7 @@ table inet filter { set test { type ${type_spec} + counter flags interval,timeout } @@ -1121,9 +1171,18 @@ del() { fi } -# Return packet count from 'test' counter in 'inet filter' table +# Return packet count for elem $1 from 'test' counter in 'inet filter' table count_packets() { found=0 + for token in $(nft reset element inet filter test "${1}" ); do + [ ${found} -eq 1 ] && echo "${token}" && return + [ "${token}" = "packets" ] && found=1 + done +} + +# Return packet count from 'test' counter in 'inet filter' table +count_packets_nomatch() { + found=0 for token in $(nft list counter inet filter test); do [ ${found} -eq 1 ] && echo "${token}" && return [ "${token}" = "packets" ] && found=1 @@ -1169,6 +1228,10 @@ perf() { # Set MAC addresses, send single packet, check that it matches, reset counter send_match() { + local elem="$1" + + shift + ip link set veth_a address "$(format_mac "${1}")" ip -n B link set veth_b address "$(format_mac "${2}")" @@ -1179,7 +1242,7 @@ send_match() { eval src_"$f"=\$\(format_\$f "${2}"\) done eval send_\$proto - if [ "$(count_packets)" != "1" ]; then + if [ "$(count_packets "$elem")" != "1" ]; then err "${proto} packet to:" err " $(for f in ${dst}; do eval format_\$f "${1}"; printf ' '; done)" @@ -1205,7 +1268,7 @@ send_nomatch() { eval src_"$f"=\$\(format_\$f "${2}"\) done eval send_\$proto - if [ "$(count_packets)" != "0" ]; then + if [ "$(count_packets_nomatch)" != "0" ]; then err "${proto} packet to:" err " $(for f in ${dst}; do eval format_\$f "${1}"; printf ' '; done)" @@ -1218,15 +1281,54 @@ send_nomatch() { fi } +maybe_send_nomatch() { + local elem="$1" + local what="$4" + + [ $((RANDOM%20)) -gt 0 ] && return + + dst_addr4="$2" + dst_port="$3" + send_udp + + if [ "$(count_packets_nomatch)" != "0" ]; then + err "Packet to $dst_addr4:$dst_port did match $what" + err "$(nft -a list ruleset)" + return 1 + fi +} + +maybe_send_match() { + local elem="$1" + local what="$4" + + [ $((RANDOM%20)) -gt 0 ] && return + + dst_addr4="$2" + dst_port="$3" + send_udp + + if [ "$(count_packets "{ $elem }")" != "1" ]; then + err "Packet to $dst_addr4:$dst_port did not match $what" + err "$(nft -a list ruleset)" + return 1 + fi + nft reset counter inet filter test >/dev/null + nft reset element inet filter test "{ $elem }" >/dev/null +} + # Correctness test template: # - add ranged element, check that packets match it # - check that packets outside range don't match it # - remove some elements, check that packets don't match anymore -test_correctness() { - setup veth send_"${proto}" set || return ${ksft_skip} - +test_correctness_main() { range_size=1 + + send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1 + for i in $(seq "${start}" $((start + count))); do + local elem="" + end=$((start + range_size)) # Avoid negative or zero-sized port ranges @@ -1237,15 +1339,16 @@ test_correctness() { srcstart=$((start + src_delta)) srcend=$((end + src_delta)) - add "$(format)" || return 1 + elem="$(format)" + add "$elem" || return 1 for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do - send_match "${j}" $((j + src_delta)) || return 1 + send_match "$elem" "${j}" $((j + src_delta)) || return 1 done send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1 # Delete elements now and then if [ $((i % 3)) -eq 0 ]; then - del "$(format)" || return 1 + del "$elem" || return 1 for j in $(seq "$start" \ $((range_size / 2 + 1)) ${end}); do send_nomatch "${j}" $((j + src_delta)) \ @@ -1258,6 +1361,163 @@ test_correctness() { done } +test_correctness() { + setup veth send_"${proto}" set || return ${ksft_skip} + + test_correctness_main +} + +# Repeat the correctness tests, but add extra non-matching entries. +# This exercises the more compact '4 bit group' representation that +# gets picked when the default 8-bit representation exceed +# NFT_PIPAPO_LT_SIZE_HIGH bytes of memory. +# See usage of NFT_PIPAPO_LT_SIZE_HIGH in pipapo_lt_bits_adjust(). +# +# The format() helper is way too slow when generating lots of +# entries so its not used here. +test_correctness_large() { + setup veth send_"${proto}" set || return ${ksft_skip} + # number of dummy (filler) entries to add. + local dcount=16385 + + ( + echo -n "add element inet filter test { " + + case "$type_spec" in + "ether_addr . ipv4_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_mac $((1000000 + i)) + printf ". 172.%i.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) + done + ;; + "inet_proto . ipv6_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "%i . " $((RANDOM%256)) + format_addr6 $((1000000 + i)) + done + ;; + "inet_service . inet_proto") + # smaller key sizes, need more entries to hit the + # 4-bit threshold. + dcount=65536 + for i in $(seq 1 $dcount); do + local proto=$((RANDOM%256)) + + # Test uses UDP to match, as it also fails when matching + # an entry that doesn't exist, so skip 'udp' entries + # to not trigger a wrong failure. + [ $proto -eq 17 ] && proto=18 + [ $i -gt 1 ] && echo ", " + printf "%i . %i " $(((i%65534) + 1)) $((proto)) + done + ;; + "inet_service . ipv4_addr") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "%i . 172.%i.%i.%i " $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((i%256)) + done + ;; + "ipv4_addr . ether_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) + format_mac $((1000000 + i)) + done + ;; + "ipv4_addr . inet_service") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) + done + ;; + "ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr") + dcount=65536 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i . " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) + format_mac $((1000000 + i)) + printf ". %i . 192.168.%i.%i" $((RANDOM%256)) $((RANDOM%256)) $((i%256)) + done + ;; + "ipv4_addr . inet_service . inet_proto") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i . %i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) + done + ;; + "ipv4_addr . inet_service . inet_proto . ipv4_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) $((RANDOM%256)) + done + ;; + "ipv4_addr . inet_service . ipv4_addr") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + printf "172.%i.%i.%i . %i . 192.168.%i.%i " $((RANDOM%256)) $((RANDOM%256)) $((i%256)) $(((RANDOM%65534) + 1)) $((RANDOM%256)) $((RANDOM%256)) + done + ;; + "ipv6_addr . ether_addr") + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . " + format_mac $((1000000 + i)) + done + ;; + "ipv6_addr . inet_service") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . $(((RANDOM%65534) + 1))" + done + ;; + "ipv6_addr . inet_service . ether_addr") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . $(((RANDOM%65534) + 1)) . " + format_mac $((i + 1000000)) + done + ;; + "ipv6_addr . inet_service . ether_addr . inet_proto") + dcount=65536 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . $(((RANDOM%65534) + 1)) . " + format_mac $((i + 1000000)) + echo -n " . $((RANDOM%256))" + done + ;; + "ipv6_addr . inet_service . ipv6_addr . inet_service") + dcount=32768 + for i in $(seq 1 $dcount); do + [ $i -gt 1 ] && echo ", " + format_addr6 $((i + 1000000)) + echo -n " . $(((RANDOM%65534) + 1)) . " + format_addr6 $((i + 2123456)) + echo -n " . $((RANDOM%256))" + done + ;; + *) + "Unhandled $type_spec" + return 1 + esac + echo -n "}" + + ) | nft -f - || return 1 + + test_correctness_main +} + # Concurrency test template: # - add all the elements # - start a thread for each physical thread that: @@ -1380,14 +1640,17 @@ test_timeout() { range_size=1 for i in $(seq "$start" $((start + count))); do + local elem="" + end=$((start + range_size)) srcstart=$((start + src_delta)) srcend=$((end + src_delta)) - add "$(format)" || return 1 + elem="$(format)" + add "$elem" || return 1 for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do - send_match "${j}" $((j + src_delta)) || return 1 + send_match "$elem" "${j}" $((j + src_delta)) || return 1 done range_size=$((range_size + 1)) @@ -1545,7 +1808,7 @@ test_bug_reload() { srcend=$((end + src_delta)) for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do - send_match "${j}" $((j + src_delta)) || return 1 + send_match "$(format)" "${j}" $((j + src_delta)) || return 1 done range_size=$((range_size + 1)) @@ -1555,6 +1818,142 @@ test_bug_reload() { nft flush ruleset } +# - add ranged element, check that packets match it +# - delete element again, check it is gone +test_bug_net_port_proto_match() { + setup veth send_"${proto}" set || return ${ksft_skip} + rstart=${start} + + range_size=1 + for i in $(seq 1 10); do + for j in $(seq 1 20) ; do + local dport=$j + + elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))") + + # too slow, do not test all addresses + maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "before add" || return 1 + + nft "add element inet filter test { $elem }" || return 1 + + maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "after add" || return 1 + + nft "get element inet filter test { $elem }" | grep -q "$elem" + if [ $? -ne 0 ];then + local got=$(nft "get element inet filter test { $elem }") + err "post-add: should have returned $elem but got $got" + return 1 + fi + + maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "out-of-range" || return 1 + done + done + + # recheck after set was filled + for i in $(seq 1 10); do + for j in $(seq 1 20) ; do + local dport=$j + + elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))") + + nft "get element inet filter test { $elem }" | grep -q "$elem" + if [ $? -ne 0 ];then + local got=$(nft "get element inet filter test { $elem }") + err "post-fill: should have returned $elem but got $got" + return 1 + fi + + maybe_send_match "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "recheck" || return 1 + maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d1" $((dport+1))) "recheck out-of-range" || return 1 + done + done + + # random del and re-fetch + for i in $(seq 1 10); do + for j in $(seq 1 20) ; do + local rnd=$((RANDOM%10)) + local dport=$j + local got="" + + elem=$(printf "10.%d.%d.0/24 . %d-%d0 . 6-17 " ${i} ${j} ${dport} "$((dport+1))") + if [ $rnd -gt 0 ];then + continue + fi + + nft "delete element inet filter test { $elem }" + got=$(nft "get element inet filter test { $elem }" 2>/dev/null) + if [ $? -eq 0 ];then + err "post-delete: query for $elem returned $got instead of error." + return 1 + fi + + maybe_send_nomatch "$elem" $(printf "10.%d.%d.1" $i $j) $(printf "%d" $dport) "match after deletion" || return 1 + done + done + + nft flush ruleset +} + +test_bug_avx2_mismatch() +{ + setup veth send_"${proto}" set || return ${ksft_skip} + + local a1="fe80:dead:01ff:0a02:0b03:6007:8009:a001" + local a2="fe80:dead:01fe:0a02:0b03:6007:8009:a001" + + nft "add element inet filter test { icmpv6 . $a1 }" + + dst_addr6="$a2" + send_icmp6 + + if [ "$(count_packets "{ icmpv6 . $a1 }")" -gt "0" ]; then + err "False match for $a2" + return 1 + fi +} + +test_bug_doublecreate() +{ + local elements="1.2.3.4 . 1.2.4.1, 1.2.4.1 . 1.2.3.4" + local ret=1 + local i + + setup veth send_"${proto}" set || return ${ksft_skip} + + add "{ $elements }" || return 1 + # expected to work: 'add' on existing should be no-op. + add "{ $elements }" || return 1 + + # 'create' should return an error. + if nft create element inet filter test "{ $elements }" 2>/dev/null; then + err "Could create an existing element" + return 1 + fi +nft -f - <<EOF 2>/dev/null +flush set inet filter test +create element inet filter test { $elements } +create element inet filter test { $elements } +EOF + ret=$? + if [ $ret -eq 0 ]; then + err "Could create element twice in one transaction" + err "$(nft -a list ruleset)" + return 1 + fi + +nft -f - <<EOF 2>/dev/null +flush set inet filter test +create element inet filter test { $elements } +EOF + ret=$? + if [ $ret -ne 0 ]; then + err "Could not flush and re-create element in one transaction" + return 1 + fi + + return 0 +} + test_reported_issues() { eval test_bug_"${subtest}" } diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh index ce1451c275fd..04544905c216 100755 --- a/tools/testing/selftests/net/netfilter/nft_fib.sh +++ b/tools/testing/selftests/net/netfilter/nft_fib.sh @@ -3,6 +3,10 @@ # This tests the fib expression. # # Kselftest framework requirement - SKIP code is 4. +# +# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99 +# dead:1::99 dead:1::1 dead:2::1 dead:2::99 +# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2 source lib.sh @@ -45,6 +49,19 @@ table inet filter { EOF } +load_input_ruleset() { + local netns=$1 + +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table inet filter { + chain input { + type filter hook input priority 0; policy accept; + fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop + } +} +EOF +} + load_pbr_ruleset() { local netns=$1 @@ -59,6 +76,89 @@ table inet filter { EOF } +load_type_ruleset() { + local netns=$1 + + for family in ip ip6;do +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table $family filter { + chain type_match_in { + fib daddr type local counter comment "daddr configured on other iface" + fib daddr . iif type local counter comment "daddr configured on iif" + fib daddr type unicast counter comment "daddr not local" + fib daddr . iif type unicast counter comment "daddr not configured on iif" + } + + chain type_match_out { + fib daddr type unicast counter + fib daddr . oif type unicast counter + fib daddr type local counter + fib daddr . oif type local counter + } + + chain prerouting { + type filter hook prerouting priority 0; + icmp type echo-request counter jump type_match_in + icmpv6 type echo-request counter jump type_match_in + } + + chain input { + type filter hook input priority 0; + icmp type echo-request counter jump type_match_in + icmpv6 type echo-request counter jump type_match_in + } + + chain forward { + type filter hook forward priority 0; + icmp type echo-request counter jump type_match_in + icmpv6 type echo-request counter jump type_match_in + } + + chain output { + type filter hook output priority 0; + icmp type echo-request counter jump type_match_out + icmpv6 type echo-request counter jump type_match_out + } + + chain postrouting { + type filter hook postrouting priority 0; + icmp type echo-request counter jump type_match_out + icmpv6 type echo-request counter jump type_match_out + } +} +EOF +done +} + +reload_type_ruleset() { + ip netns exec "$1" nft flush table ip filter + ip netns exec "$1" nft flush table ip6 filter + load_type_ruleset "$1" +} + +check_fib_type_counter_family() { + local family="$1" + local want="$2" + local ns="$3" + local chain="$4" + local what="$5" + local errmsg="$6" + + if ! ip netns exec "$ns" nft list chain "$family" filter "$chain" | grep "$what" | grep -q "packets $want";then + echo "Netns $ns $family fib type counter doesn't match expected packet count of $want for $what $errmsg" 1>&2 + ip netns exec "$ns" nft list chain "$family" filter "$chain" + ret=1 + return 1 + fi + + return 0 +} + +check_fib_type_counter() { + check_fib_type_counter_family "ip" "$@" || return 1 + check_fib_type_counter_family "ip6" "$@" || return 1 +} + load_ruleset_count() { local netns=$1 @@ -77,6 +177,7 @@ check_drops() { if dmesg | grep -q ' nft_rpfilter: ';then dmesg | grep ' nft_rpfilter: ' echo "FAIL: rpfilter did drop packets" + ret=1 return 1 fi @@ -151,19 +252,509 @@ test_ping() { return 0 } +test_ping_unreachable() { + local daddr4=$1 + local daddr6=$2 + + if ip netns exec "$ns1" ping -c 1 -W 0.1 -q "$daddr4" > /dev/null; then + echo "FAIL: ${ns1} could reach $daddr4" 1>&2 + return 1 + fi + + if ip netns exec "$ns1" ping -c 1 -W 0.1 -q "$daddr6" > /dev/null; then + echo "FAIL: ${ns1} could reach $daddr6" 1>&2 + return 1 + fi + + return 0 +} + +test_fib_type() { + local notice="$1" + local errmsg="addr-on-if" + local lret=0 + + if ! load_type_ruleset "$nsrouter";then + echo "SKIP: Could not load fib type ruleset" + [ $ret -eq 0 ] && ret=$ksft_skip + return + fi + + # makes router receive packet for addresses configured on incoming + # interface. + test_ping 10.0.1.1 dead:1::1 || return 1 + + # expectation: triggers all 'local' in prerouting/input. + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type local" "$errmsg" || lret=1 + + reload_type_ruleset "$nsrouter" + # makes router receive packet for address configured on a different (but local) + # interface. + test_ping 10.0.2.1 dead:2::1 || return 1 + + # expectation: triggers 'unicast' in prerouting/input for daddr . iif and local for 'daddr'. + errmsg="addr-on-host" + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type local" "$errmsg" || lret=1 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1 + + reload_type_ruleset "$nsrouter" + test_ping 10.0.2.99 dead:2::99 || return 1 + errmsg="addr-on-otherhost" + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr type unicast" "$errmsg" || lret=1 + check_fib_type_counter 2 "$nsrouter" "type_match_in" "fib daddr . iif type unicast" "$errmsg" || lret=1 + + if [ $lret -eq 0 ];then + echo "PASS: fib expression address types match ($notice)" + else + echo "FAIL: fib expression address types match ($notice)" + ret=1 + fi +} + +test_fib_vrf_dev_add_dummy() +{ + if ! ip -net "$nsrouter" link add dummy0 type dummy ;then + echo "SKIP: VRF tests: dummy device type not supported" + return 1 + fi + + if ! ip -net "$nsrouter" link add tvrf type vrf table 9876;then + echo "SKIP: VRF tests: vrf device type not supported" + return 1 + fi + + ip -net "$nsrouter" link set dummy0 master tvrf + ip -net "$nsrouter" link set dummy0 up + ip -net "$nsrouter" link set tvrf up +} + +load_ruleset_vrf() +{ +# Due to the many different possible combinations using named counters +# or one-rule-per-expected-result is complex. +# +# Instead, add dynamic sets for the fib modes +# (fib address type, fib output interface lookup .. ), +# and then add the obtained fib results to them. +# +# The test is successful if the sets contain the expected results +# and no unexpected extra entries existed. +ip netns exec "$nsrouter" nft -f - <<EOF +flush ruleset +table inet t { + set fibif4 { + typeof meta iif . ip daddr . fib daddr oif + flags dynamic + counter + } + + set fibif4iif { + typeof meta iif . ip daddr . fib daddr . iif oif + flags dynamic + counter + } + + set fibif6 { + typeof meta iif . ip6 daddr . fib daddr oif + flags dynamic + counter + } + + set fibif6iif { + typeof meta iif . ip6 daddr . fib daddr . iif oif + flags dynamic + counter + } + + set fibtype4 { + typeof meta iif . ip daddr . fib daddr type + flags dynamic + counter + } + + set fibtype4iif { + typeof meta iif . ip daddr . fib daddr . iif type + flags dynamic + counter + } + + set fibtype6 { + typeof meta iif . ip6 daddr . fib daddr type + flags dynamic + counter + } + + set fibtype6iif { + typeof meta iif . ip6 daddr . fib daddr . iif type + flags dynamic + counter + } + + chain fib_test { + meta nfproto ipv4 jump { + add @fibif4 { meta iif . ip daddr . fib daddr oif } + add @fibif4iif { meta iif . ip daddr . fib daddr . iif oif } + add @fibtype4 { meta iif . ip daddr . fib daddr type } + add @fibtype4iif { meta iif . ip daddr . fib daddr . iif type } + + add @fibif4 { meta iif . ip saddr . fib saddr oif } + add @fibif4iif { meta iif . ip saddr . fib saddr . iif oif } + } + + meta nfproto ipv6 jump { + add @fibif6 { meta iif . ip6 daddr . fib daddr oif } + add @fibif6iif { meta iif . ip6 daddr . fib daddr . iif oif } + add @fibtype6 { meta iif . ip6 daddr . fib daddr type } + add @fibtype6iif { meta iif . ip6 daddr . fib daddr . iif type } + + add @fibif6 { meta iif . ip6 saddr . fib saddr oif } + add @fibif6iif { meta iif . ip6 saddr . fib saddr . iif oif } + } + } + + chain prerouting { + type filter hook prerouting priority 0; + icmp type echo-request counter jump fib_test + + # neighbour discovery to be ignored. + icmpv6 type echo-request counter jump fib_test + } +} +EOF + +if [ $? -ne 0 ] ;then + echo "SKIP: Could not load ruleset for fib vrf test" + [ $ret -eq 0 ] && ret=$ksft_skip + return 1 +fi +} + +check_type() +{ + local setname="$1" + local iifname="$2" + local addr="$3" + local type="$4" + local count="$5" + local lret=0 + + [ -z "$count" ] && count=1 + + if ! ip netns exec "$nsrouter" nft get element inet t "$setname" { "$iifname" . "$addr" . "$type" } |grep -q "counter packets $count";then + echo "FAIL: did not find $iifname . $addr . $type in $setname with $count packets" + ip netns exec "$nsrouter" nft list set inet t "$setname" + ret=1 + # do not fail right away, delete entry if it exists so later test that + # checks for unwanted keys don't get confused by this *expected* key. + lret=1 + fi + + # delete the entry, this allows to check if anything unexpected appeared + # at the end of the test run: all dynamic sets should be empty by then. + if ! ip netns exec "$nsrouter" nft delete element inet t "$setname" { "$iifname" . "$addr" . "$type" } ; then + echo "FAIL: can't delete $iifname . $addr . $type in $setname" + ip netns exec "$nsrouter" nft list set inet t "$setname" + ret=1 + return 1 + fi + + return $lret +} + +check_local() +{ + check_type $@ "local" 1 +} + +check_unicast() +{ + check_type $@ "unicast" 1 +} + +check_rpf() +{ + check_type $@ +} + +check_fib_vrf_sets_empty() +{ + local setname="" + local lret=0 + + # A non-empty set means that we have seen unexpected packets OR + # that a fib lookup provided unexpected results. + for setname in "fibif4" "fibif4iif" "fibif6" "fibif6iif" \ + "fibtype4" "fibtype4iif" "fibtype6" "fibtype6iif";do + if ip netns exec "$nsrouter" nft list set inet t "$setname" | grep -q elements;then + echo "FAIL: $setname not empty" + ip netns exec "$nsrouter" nft list set inet t "$setname" + ret=1 + lret=1 + fi + done + + return $lret +} + +check_fib_vrf_type() +{ + local msg="$1" + + local addr + # the incoming interface is always veth0. As its not linked to a VRF, + # the 'tvrf' device should NOT show up anywhere. + local ifname="veth0" + local lret=0 + + # local_veth0, local_veth1 + for addr in "10.0.1.1" "10.0.2.1"; do + check_local fibtype4 "$ifname" "$addr" || lret=1 + check_type fibif4 "$ifname" "$addr" "0" || lret=1 + done + for addr in "dead:1::1" "dead:2::1";do + check_local fibtype6 "$ifname" "$addr" || lret=1 + check_type fibif6 "$ifname" "$addr" "0" || lret=1 + done + + # when restricted to the incoming interface, 10.0.1.1 should + # be 'local', but 10.0.2.1 unicast. + check_local fibtype4iif "$ifname" "10.0.1.1" || lret=1 + check_unicast fibtype4iif "$ifname" "10.0.2.1" || lret=1 + + # same for the ipv6 addresses. + check_local fibtype6iif "$ifname" "dead:1::1" || lret=1 + check_unicast fibtype6iif "$ifname" "dead:2::1" || lret=1 + + # None of these addresses should find a valid route when restricting + # to the incoming interface (we ask for daddr - 10.0.1.1/2.1 are + # reachable via 'lo'. + for addr in "10.0.1.1" "10.0.2.1" "10.9.9.1" "10.9.9.2";do + check_type fibif4iif "$ifname" "$addr" "0" || lret=1 + done + + # expect default route (veth1), dummy0 is part of VRF but iif isn't. + for addr in "10.9.9.1" "10.9.9.2";do + check_unicast fibtype4 "$ifname" "$addr" || lret=1 + check_unicast fibtype4iif "$ifname" "$addr" || lret=1 + check_type fibif4 "$ifname" "$addr" "veth1" || lret=1 + done + for addr in "dead:9::1" "dead:9::2";do + check_unicast fibtype6 "$ifname" "$addr" || lret=1 + check_unicast fibtype6iif "$ifname" "$addr" || lret=1 + check_type fibif6 "$ifname" "$addr" "veth1" || lret=1 + done + + # same for the IPv6 equivalent addresses. + for addr in "dead:1::1" "dead:2::1" "dead:9::1" "dead:9::2";do + check_type fibif6iif "$ifname" "$addr" "0" || lret=1 + done + + check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1 + check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1 + check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1 + check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1 + + check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1 + check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1 + check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1 + check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1 + + check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 5 || lret=1 + check_rpf fibif4iif "$ifname" "10.0.1.99" "veth0" 5 || lret=1 + check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 5 || lret=1 + check_rpf fibif6iif "$ifname" "dead:1::99" "veth0" 5 || lret=1 + + check_fib_vrf_sets_empty || lret=1 + + if [ $lret -eq 0 ];then + echo "PASS: $msg" + else + echo "FAIL: $msg" + ret=1 + fi +} + +check_fib_veth_vrf_type() +{ + local msg="$1" + + local addr + local ifname + local setname + local lret=0 + + # as veth0 is now part of tvrf interface, packets will be seen + # twice, once with iif veth0, then with iif tvrf. + + for ifname in "veth0" "tvrf"; do + for addr in "10.0.1.1" "10.9.9.1"; do + check_local fibtype4 "$ifname" "$addr" || lret=1 + # addr local, but nft_fib doesn't return routes with RTN_LOCAL. + check_type fibif4 "$ifname" "$addr" 0 || lret=1 + check_type fibif4iif "$ifname" "$addr" 0 || lret=1 + done + + for addr in "dead:1::1" "dead:9::1"; do + check_local fibtype6 "$ifname" "$addr" || lret=1 + # same, address is local but no route is returned for lo. + check_type fibif6 "$ifname" "$addr" 0 || lret=1 + check_type fibif6iif "$ifname" "$addr" 0 || lret=1 + done + + for t in fibtype4 fibtype4iif; do + check_unicast "$t" "$ifname" 10.9.9.2 || lret=1 + done + for t in fibtype6 fibtype6iif; do + check_unicast "$t" "$ifname" dead:9::2 || lret=1 + done + + check_unicast fibtype4iif "$ifname" "10.9.9.1" || lret=1 + check_unicast fibtype6iif "$ifname" "dead:9::1" || lret=1 + + check_unicast fibtype4 "$ifname" "10.0.2.99" || lret=1 + check_unicast fibtype4iif "$ifname" "10.0.2.99" || lret=1 + + check_unicast fibtype6 "$ifname" "dead:2::99" || lret=1 + check_unicast fibtype6iif "$ifname" "dead:2::99" || lret=1 + + check_type fibif4 "$ifname" "10.0.2.99" "veth1" || lret=1 + check_type fibif6 "$ifname" "dead:2::99" "veth1" || lret=1 + check_type fibif4 "$ifname" "10.9.9.2" "dummy0" || lret=1 + check_type fibif6 "$ifname" "dead:9::2" "dummy0" || lret=1 + + # restricted to iif -- MUST NOT provide result, its != $ifname. + check_type fibif4iif "$ifname" "10.0.2.99" 0 || lret=1 + check_type fibif6iif "$ifname" "dead:2::99" 0 || lret=1 + + check_rpf fibif4 "$ifname" "10.0.1.99" "veth0" 4 || lret=1 + check_rpf fibif6 "$ifname" "dead:1::99" "veth0" 4 || lret=1 + check_rpf fibif4iif "$ifname" "10.0.1.99" "$ifname" 4 || lret=1 + check_rpf fibif6iif "$ifname" "dead:1::99" "$ifname" 4 || lret=1 + done + + check_local fibtype4iif "veth0" "10.0.1.1" || lret=1 + check_local fibtype6iif "veth0" "dead:1::1" || lret=1 + + check_unicast fibtype4iif "tvrf" "10.0.1.1" || lret=1 + check_unicast fibtype6iif "tvrf" "dead:1::1" || lret=1 + + # 10.9.9.2 should not provide a result for iif veth, but + # should when iif is tvrf. + # This is because its reachable via dummy0 which is part of + # tvrf. iif veth0 MUST conceal the dummy0 result (i.e. return oif 0). + check_type fibif4iif "veth0" "10.9.9.2" 0 || lret=1 + check_type fibif6iif "veth0" "dead:9::2" 0 || lret=1 + + check_type fibif4iif "tvrf" "10.9.9.2" "tvrf" || lret=1 + check_type fibif6iif "tvrf" "dead:9::2" "tvrf" || lret=1 + + check_fib_vrf_sets_empty || lret=1 + + if [ $lret -eq 0 ];then + echo "PASS: $msg" + else + echo "FAIL: $msg" + ret=1 + fi +} + +# Extends nsrouter config by adding dummy0+vrf. +# +# 10.0.1.99 10.0.1.1 10.0.2.1 10.0.2.99 +# dead:1::99 dead:1::1 dead:2::1 dead:2::99 +# ns1 <-------> [ veth0 ] nsrouter [veth1] <-------> ns2 +# [dummy0] +# 10.9.9.1 +# dead:9::1 +# [tvrf] +test_fib_vrf() +{ + local cntname="" + + if ! test_fib_vrf_dev_add_dummy; then + [ $ret -eq 0 ] && ret=$ksft_skip + return + fi + + ip -net "$nsrouter" addr add "10.9.9.1"/24 dev dummy0 + ip -net "$nsrouter" addr add "dead:9::1"/64 dev dummy0 nodad + + ip -net "$nsrouter" route add default via 10.0.2.99 + ip -net "$nsrouter" route add default via dead:2::99 + + load_ruleset_vrf || return + + # no echo reply for these addresses: The dummy interface is part of tvrf, + # but veth0 (incoming interface) isn't linked to it. + test_ping_unreachable "10.9.9.1" "dead:9::1" & + test_ping_unreachable "10.9.9.2" "dead:9::2" & + + # expect replies from these. + test_ping "10.0.1.1" "dead:1::1" + test_ping "10.0.2.1" "dead:2::1" + test_ping "10.0.2.99" "dead:2::99" + + wait + + check_fib_vrf_type "fib expression address types match (iif not in vrf)" + + # second round: this time, make veth0 (rx interface) part of the vrf. + # 10.9.9.1 / dead:9::1 become reachable from ns1, while ns2 + # becomes unreachable. + ip -net "$nsrouter" link set veth0 master tvrf + ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad + + # this reload should not be needed, but in case + # there is some error (missing or unexpected entry) this will prevent them + # from leaking into round 2. + load_ruleset_vrf || return + + test_ping "10.0.1.1" "dead:1::1" + test_ping "10.9.9.1" "dead:9::1" + + # ns2 should no longer be reachable (veth1 not in vrf) + test_ping_unreachable "10.0.2.99" "dead:2::99" & + + # vrf via dummy0, but host doesn't exist + test_ping_unreachable "10.9.9.2" "dead:9::2" & + + wait + + check_fib_veth_vrf_type "fib expression address types match (iif in vrf)" +} + ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null -ip netns exec "$nsrouter" sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null -ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null test_ping 10.0.2.1 dead:2::1 || exit 1 -check_drops || exit 1 +check_drops test_ping 10.0.2.99 dead:2::99 || exit 1 +check_drops + +[ $ret -eq 0 ] && echo "PASS: fib expression did not cause unwanted packet drops" + +load_input_ruleset "$ns1" + +test_ping 127.0.0.1 ::1 +check_drops + +test_ping 10.0.1.99 dead:1::99 +check_drops + +[ $ret -eq 0 ] && echo "PASS: fib expression did not discard loopback packets" + +load_input_ruleset "$ns1" + +test_ping 127.0.0.1 ::1 || exit 1 check_drops || exit 1 -echo "PASS: fib expression did not cause unwanted packet drops" +test_ping 10.0.1.99 dead:1::99 || exit 1 +check_drops || exit 1 + +echo "PASS: fib expression did not discard loopback packets" ip netns exec "$nsrouter" nft flush table inet filter @@ -213,7 +804,7 @@ ip -net "$nsrouter" addr del dead:2::1/64 dev veth0 # ... pbr ruleset for the router, check iif+oif. if ! load_pbr_ruleset "$nsrouter";then echo "SKIP: Could not load fib forward ruleset" - exit $ksft_skip + [ "$ret" -eq 0 ] && ret=$ksft_skip fi ip -net "$nsrouter" rule add from all table 128 @@ -224,11 +815,36 @@ ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1 # drop main ipv4 table ip -net "$nsrouter" -4 rule delete table main -if ! test_ping 10.0.2.99 dead:2::99;then - ip -net "$nsrouter" nft list ruleset - echo "FAIL: fib mismatch in pbr setup" - exit 1 +if test_ping 10.0.2.99 dead:2::99;then + echo "PASS: fib expression forward check with policy based routing" +else + echo "FAIL: fib expression forward check with policy based routing" + ret=1 fi -echo "PASS: fib expression forward check with policy based routing" -exit 0 +test_fib_type "policy routing" +ip netns exec "$nsrouter" nft delete table ip filter +ip netns exec "$nsrouter" nft delete table ip6 filter + +# Un-do policy routing changes +ip -net "$nsrouter" rule del from all table 128 +ip -net "$nsrouter" rule del from all iif veth0 table 129 + +ip -net "$nsrouter" route del table 128 to 10.0.1.0/24 dev veth0 +ip -net "$nsrouter" route del table 129 to 10.0.2.0/24 dev veth1 + +ip -net "$ns1" -4 route del default +ip -net "$ns1" -6 route del default + +ip -net "$ns1" -4 route add default via 10.0.1.1 +ip -net "$ns1" -6 route add default via dead:1::1 + +ip -net "$nsrouter" -4 rule add from all table main priority 32766 + +test_fib_type "default table" +ip netns exec "$nsrouter" nft delete table ip filter +ip netns exec "$nsrouter" nft delete table ip6 filter + +test_fib_vrf + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh index b3995550856a..a68bc882fa4e 100755 --- a/tools/testing/selftests/net/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh @@ -20,6 +20,7 @@ ret=0 SOCAT_TIMEOUT=60 nsin="" +nsin_small="" ns1out="" ns2out="" @@ -36,7 +37,7 @@ cleanup() { cleanup_all_ns - rm -f "$nsin" "$ns1out" "$ns2out" + rm -f "$nsin" "$nsin_small" "$ns1out" "$ns2out" [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns" } @@ -71,6 +72,9 @@ omtu=9000 lmtu=1500 rmtu=2000 +filesize=$((2 * 1024 * 1024)) +filesize_small=$((filesize / 16)) + usage(){ echo "nft_flowtable.sh [OPTIONS]" echo @@ -81,12 +85,16 @@ usage(){ exit 1 } -while getopts "o:l:r:" o +while getopts "o:l:r:s:" o do case $o in o) omtu=$OPTARG;; l) lmtu=$OPTARG;; r) rmtu=$OPTARG;; + s) + filesize=$OPTARG + filesize_small=$((OPTARG / 16)) + ;; *) usage;; esac done @@ -119,6 +127,8 @@ ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0 ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad +ip netns exec "$nsr1" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$nsr2" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null for i in 0 1; do ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null @@ -145,7 +155,9 @@ ip -net "$ns1" route add default via dead:1::1 ip -net "$ns2" route add default via dead:2::1 ip -net "$nsr1" route add default via 192.168.10.2 +ip -6 -net "$nsr1" route add default via fee1:2::2 ip -net "$nsr2" route add default via 192.168.10.1 +ip -6 -net "$nsr2" route add default via fee1:2::1 ip netns exec "$nsr1" nft -f - <<EOF table inet filter { @@ -212,23 +224,16 @@ if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then fi nsin=$(mktemp) +nsin_small=$(mktemp) ns1out=$(mktemp) ns2out=$(mktemp) make_file() { - name=$1 - - SIZE=$((RANDOM % (1024 * 128))) - SIZE=$((SIZE + (1024 * 8))) - TSIZE=$((SIZE * 1024)) - - dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null + name="$1" + sz="$2" - SIZE=$((RANDOM % 1024)) - SIZE=$((SIZE + 128)) - TSIZE=$((TSIZE + SIZE)) - dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null + head -c "$sz" < /dev/urandom > "$name" } check_counters() @@ -246,18 +251,18 @@ check_counters() local fs fs=$(du -sb "$nsin") local max_orig=${fs%%/*} - local max_repl=$((max_orig/4)) + local max_repl=$((max_orig)) # flowtable fastpath should bypass normal routing one, i.e. the counters in forward hook # should always be lower than the size of the transmitted file (max_orig). if [ "$orig_cnt" -gt "$max_orig" ];then - echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2 + echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig, reply counter $repl_cnt" 1>&2 ret=1 ok=0 fi if [ "$repl_cnt" -gt $max_repl ];then - echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2 + echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl, original counter $orig_cnt" 1>&2 ret=1 ok=0 fi @@ -270,6 +275,7 @@ check_counters() check_dscp() { local what=$1 + local pmtud="$2" local ok=1 local counter @@ -282,37 +288,39 @@ check_dscp() local pc4z=${counter%*bytes*} local pc4z=${pc4z#*packets} + local failmsg="FAIL: pmtu $pmtu: $what counters do not match, expected" + case "$what" in "dscp_none") if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_fwd") if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_ingress") if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; "dscp_egress") if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then - echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + echo "$failmsg dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 ret=1 ok=0 fi ;; *) - echo "FAIL: Unknown DSCP check" 1>&2 + echo "$failmsg: Unknown DSCP check" 1>&2 ret=1 ok=0 esac @@ -324,9 +332,9 @@ check_dscp() check_transfer() { - in=$1 - out=$2 - what=$3 + local in=$1 + local out=$2 + local what=$3 if ! cmp "$in" "$out" > /dev/null 2>&1; then echo "FAIL: file mismatch for $what" 1>&2 @@ -347,25 +355,42 @@ test_tcp_forwarding_ip() { local nsa=$1 local nsb=$2 - local dstip=$3 - local dstport=$4 + local pmtu=$3 + local proto=$4 + local dstip=$5 + local dstport=$6 local lret=0 + local socatc + local socatl + local infile="$nsin" + + if [ $pmtu -eq 0 ]; then + infile="$nsin_small" + fi - timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" & + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -${proto} \ + TCP"${proto}"-LISTEN:12345,reuseaddr STDIO < "$infile" > "$ns2out" & lpid=$! busywait 1000 listener_ready - timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out" + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -${proto} \ + TCP"${proto}":"$dstip":"$dstport" STDIO < "$infile" > "$ns1out" + socatc=$? wait $lpid + socatl=$? - if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then + if [ $socatl -ne 0 ] || [ $socatc -ne 0 ];then + rc=1 + fi + + if ! check_transfer "$infile" "$ns2out" "ns1 -> ns2"; then lret=1 ret=1 fi - if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then + if ! check_transfer "$infile" "$ns1out" "ns1 <- ns2"; then lret=1 ret=1 fi @@ -375,14 +400,22 @@ test_tcp_forwarding_ip() test_tcp_forwarding() { - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + local pmtu="$3" + local proto="$4" + local dstip="$5" + local dstport="$6" + + test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport" return $? } test_tcp_forwarding_set_dscp() { - check_dscp "dscp_none" + local pmtu="$3" + local proto="$4" + local dstip="$5" + local dstport="$6" ip netns exec "$nsr1" nft -f - <<EOF table netdev dscpmangle { @@ -393,8 +426,8 @@ table netdev dscpmangle { } EOF if [ $? -eq 0 ]; then - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 - check_dscp "dscp_ingress" + test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport" + check_dscp "dscp_ingress" "$pmtu" ip netns exec "$nsr1" nft delete table netdev dscpmangle else @@ -410,10 +443,10 @@ table netdev dscpmangle { } EOF if [ $? -eq 0 ]; then - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 - check_dscp "dscp_egress" + test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport" + check_dscp "dscp_egress" "$pmtu" - ip netns exec "$nsr1" nft flush table netdev dscpmangle + ip netns exec "$nsr1" nft delete table netdev dscpmangle else echo "SKIP: Could not load netdev:egress for veth1" fi @@ -421,48 +454,53 @@ fi # partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3 # counters should have seen packets (before and after ft offload kicks in). ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3 - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 - check_dscp "dscp_fwd" + test_tcp_forwarding_ip "$1" "$2" "$pmtu" "$proto" "$dstip" "$dstport" + check_dscp "dscp_fwd" "$pmtu" } test_tcp_forwarding_nat() { + local nsa="$1" + local nsb="$2" + local pmtu="$3" + local what="$4" local lret - local pmtu - test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 - lret=$? + [ "$pmtu" -eq 0 ] && what="$what (pmtu disabled)" - pmtu=$3 - what=$4 + test_tcp_forwarding_ip "$nsa" "$nsb" "$pmtu" 4 10.0.2.99 12345 + lret=$? if [ "$lret" -eq 0 ] ; then if [ "$pmtu" -eq 1 ] ;then - check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what" + check_counters "flow offload for ns1/ns2 with masquerade $what" else echo "PASS: flow offload for ns1/ns2 with masquerade $what" fi - test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666 + test_tcp_forwarding_ip "$1" "$2" "$pmtu" 4 10.6.6.6 1666 lret=$? if [ "$pmtu" -eq 1 ] ;then - check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what" + check_counters "flow offload for ns1/ns2 with dnat $what" elif [ "$lret" -eq 0 ] ; then echo "PASS: flow offload for ns1/ns2 with dnat $what" fi + else + echo "FAIL: flow offload for ns1/ns2 with dnat $what" fi return $lret } -make_file "$nsin" +make_file "$nsin" "$filesize" +make_file "$nsin_small" "$filesize_small" # First test: # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. # Due to MTU mismatch in both directions, all packets (except small packets like pure # acks) have to be handled by normal forwarding path. Therefore, packet counters # are not checked. -if test_tcp_forwarding "$ns1" "$ns2"; then +if test_tcp_forwarding "$ns1" "$ns2" 0 4 10.0.2.99 12345; then echo "PASS: flow offloaded for ns1/ns2" else echo "FAIL: flow offload for ns1/ns2:" 1>&2 @@ -470,6 +508,14 @@ else ret=1 fi +if test_tcp_forwarding "$ns1" "$ns2" 0 6 "[dead:2::99]" 12345; then + echo "PASS: IPv6 flow offloaded for ns1/ns2" +else + echo "FAIL: IPv6 flow offload for ns1/ns2:" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + # delete default route, i.e. ns2 won't be able to reach ns1 and # will depend on ns1 being masqueraded in nsr1. # expect ns1 has nsr1 address. @@ -494,8 +540,9 @@ table ip nat { } EOF -if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then - echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2 +check_dscp "dscp_none" "0" +if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 4 10.0.2.99 12345; then + echo "FAIL: flow offload for ns1/ns2 with dscp update and no pmtu discovery" 1>&2 exit 0 fi @@ -518,12 +565,87 @@ ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null # For earlier tests (large mtus), packets cannot be handled via flowtable # (except pure acks and other small packets). ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null +ip netns exec "$ns2" nft reset counters table inet filter >/dev/null + +if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 1 4 10.0.2.99 12345; then + echo "FAIL: flow offload for ns1/ns2 with dscp update and pmtu discovery" 1>&2 + exit 0 +fi + +ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 ip netns exec "$nsr1" nft list ruleset fi +# IPIP tunnel test: +# Add IPIP tunnel interfaces and check flowtable acceleration. +test_ipip() { +if ! ip -net "$nsr1" link add name tun0 type ipip \ + local 192.168.10.1 remote 192.168.10.2 >/dev/null;then + echo "SKIP: could not add ipip tunnel" + [ "$ret" -eq 0 ] && ret=$ksft_skip + return +fi +ip -net "$nsr1" link set tun0 up +ip -net "$nsr1" addr add 192.168.100.1/24 dev tun0 +ip netns exec "$nsr1" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null + +ip -net "$nsr2" link add name tun0 type ipip local 192.168.10.2 remote 192.168.10.1 +ip -net "$nsr2" link set tun0 up +ip -net "$nsr2" addr add 192.168.100.2/24 dev tun0 +ip netns exec "$nsr2" sysctl net.ipv4.conf.tun0.forwarding=1 > /dev/null + +ip -net "$nsr1" route change default via 192.168.100.2 +ip -net "$nsr2" route change default via 192.168.100.1 +ip -net "$ns2" route add default via 10.0.2.1 + +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun0 accept' +ip netns exec "$nsr1" nft -a insert rule inet filter forward \ + 'meta oif "veth0" tcp sport 12345 ct mark set 1 flow add @f1 counter name routed_repl accept' + +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel"; then + echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + +# Create vlan tagged devices for IPIP traffic. +ip -net "$nsr1" link add link veth1 name veth1.10 type vlan id 10 +ip -net "$nsr1" link set veth1.10 up +ip -net "$nsr1" addr add 192.168.20.1/24 dev veth1.10 +ip netns exec "$nsr1" sysctl net.ipv4.conf.veth1/10.forwarding=1 > /dev/null +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif veth1.10 accept' +ip -net "$nsr1" link add name tun1 type ipip local 192.168.20.1 remote 192.168.20.2 +ip -net "$nsr1" link set tun1 up +ip -net "$nsr1" addr add 192.168.200.1/24 dev tun1 +ip -net "$nsr1" route change default via 192.168.200.2 +ip netns exec "$nsr1" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null +ip netns exec "$nsr1" nft -a insert rule inet filter forward 'meta oif tun1 accept' + +ip -net "$nsr2" link add link veth0 name veth0.10 type vlan id 10 +ip -net "$nsr2" link set veth0.10 up +ip -net "$nsr2" addr add 192.168.20.2/24 dev veth0.10 +ip netns exec "$nsr2" sysctl net.ipv4.conf.veth0/10.forwarding=1 > /dev/null +ip -net "$nsr2" link add name tun1 type ipip local 192.168.20.2 remote 192.168.20.1 +ip -net "$nsr2" link set tun1 up +ip -net "$nsr2" addr add 192.168.200.2/24 dev tun1 +ip -net "$nsr2" route change default via 192.168.200.1 +ip netns exec "$nsr2" sysctl net.ipv4.conf.tun1.forwarding=1 > /dev/null + +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "IPIP tunnel over vlan"; then + echo "FAIL: flow offload for ns1/ns2 with IPIP tunnel over vlan" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + +# Restore the previous configuration +ip -net "$nsr1" route change default via 192.168.10.2 +ip -net "$nsr2" route change default via 192.168.10.1 +ip -net "$ns2" route del default via 10.0.2.1 +} + # Another test: # Add bridge interface br0 to Router1, with NAT enabled. test_bridge() { @@ -609,6 +731,8 @@ ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad ip -net "$nsr1" link set up dev veth0 } +test_ipip + test_bridge KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1) @@ -649,7 +773,7 @@ ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1 ip -net "$ns2" route add default via 10.0.2.1 ip -net "$ns2" route add default via dead:2::1 -if test_tcp_forwarding "$ns1" "$ns2"; then +if test_tcp_forwarding "$ns1" "$ns2" 1 4 10.0.2.99 12345; then check_counters "ipsec tunnel mode for ns1/ns2" else echo "FAIL: ipsec tunnel mode for ns1/ns2" @@ -657,6 +781,14 @@ else ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2 fi +if test_tcp_forwarding "$ns1" "$ns2" 1 6 "[dead:2::99]" 12345; then + check_counters "IPv6 ipsec tunnel mode for ns1/ns2" +else + echo "FAIL: IPv6 ipsec tunnel mode for ns1/ns2" + ip netns exec "$nsr1" nft list ruleset 1>&2 + ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2 +fi + if [ "$1" = "" ]; then low=1280 mtu=$((65536 - low)) @@ -664,8 +796,16 @@ if [ "$1" = "" ]; then l=$(((RANDOM%mtu) + low)) r=$(((RANDOM%mtu) + low)) - echo "re-run with random mtus: -o $o -l $l -r $r" - $0 -o "$o" -l "$l" -r "$r" + MINSIZE=$((2 * 1000 * 1000)) + MAXSIZE=$((64 * 1000 * 1000)) + + filesize=$(((RANDOM * RANDOM) % MAXSIZE)) + if [ "$filesize" -lt "$MINSIZE" ]; then + filesize=$((filesize+MINSIZE)) + fi + + echo "re-run with random mtus and file size: -o $o -l $l -r $r -s $filesize" + $0 -o "$o" -l "$l" -r "$r" -s "$filesize" || ret=1 fi exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_interface_stress.sh b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh new file mode 100755 index 000000000000..c0fffaa6dbd9 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_interface_stress.sh @@ -0,0 +1,157 @@ +#!/bin/bash -e +# +# SPDX-License-Identifier: GPL-2.0 +# +# Torture nftables' netdevice notifier callbacks and related code by frequent +# renaming of interfaces which netdev-family chains and flowtables hook into. + +source lib.sh + +checktool "nft --version" "run test without nft tool" +checktool "iperf3 --version" "run test without iperf3 tool" + +read kernel_tainted < /proc/sys/kernel/tainted + +# how many seconds to torture the kernel? +# default to 80% of max run time but don't exceed 48s +TEST_RUNTIME=$((${kselftest_timeout:-60} * 8 / 10)) +[[ $TEST_RUNTIME -gt 48 ]] && TEST_RUNTIME=48 + +trap "cleanup_all_ns" EXIT + +setup_ns nsc nsr nss + +ip -net $nsc link add cr0 type veth peer name rc0 netns $nsr +ip -net $nsc addr add 10.0.0.1/24 dev cr0 +ip -net $nsc link set cr0 up +ip -net $nsc route add default via 10.0.0.2 + +ip -net $nss link add sr0 type veth peer name rs0 netns $nsr +ip -net $nss addr add 10.1.0.1/24 dev sr0 +ip -net $nss link set sr0 up +ip -net $nss route add default via 10.1.0.2 + +ip -net $nsr addr add 10.0.0.2/24 dev rc0 +ip -net $nsr link set rc0 up +ip -net $nsr addr add 10.1.0.2/24 dev rs0 +ip -net $nsr link set rs0 up +ip netns exec $nsr sysctl -q net.ipv4.ip_forward=1 +ip netns exec $nsr sysctl -q net.ipv4.conf.all.forwarding=1 + +{ + echo "table netdev t {" + for ((i = 0; i < 10; i++)); do + cat <<-EOF + chain chain_rc$i { + type filter hook ingress device rc$i priority 0 + counter + } + chain chain_rs$i { + type filter hook ingress device rs$i priority 0 + counter + } + EOF + done + echo "}" + echo "table ip t {" + for ((i = 0; i < 10; i++)); do + cat <<-EOF + flowtable ft_${i} { + hook ingress priority 0 + devices = { rc$i, rs$i } + } + EOF + done + echo "chain c {" + echo "type filter hook forward priority 0" + for ((i = 0; i < 10; i++)); do + echo -n "iifname rc$i oifname rs$i " + echo "ip protocol tcp counter flow add @ft_${i}" + done + echo "counter" + echo "}" + echo "}" +} | ip netns exec $nsr nft -f - || { + echo "SKIP: Could not load nft ruleset" + exit $ksft_skip +} + +for ((o=0, n=1; ; o=n, n++, n %= 10)); do + ip -net $nsr link set rc$o name rc$n + ip -net $nsr link set rs$o name rs$n +done & +rename_loop_pid=$! + +while true; do ip netns exec $nsr nft list ruleset >/dev/null 2>&1; done & +nft_list_pid=$! + +ip netns exec $nsr nft monitor >/dev/null & +nft_monitor_pid=$! + +ip netns exec $nss iperf3 --server --daemon -1 +summary_expr='s,^\[SUM\] .* \([0-9\.]\+\) Kbits/sec .* receiver,\1,p' +rate=$(ip netns exec $nsc iperf3 \ + --format k -c 10.1.0.1 --time $TEST_RUNTIME \ + --length 56 --parallel 10 -i 0 | sed -n "$summary_expr") + +kill $nft_list_pid +kill $nft_monitor_pid +kill $rename_loop_pid +wait + +wildcard_prep() { + ip netns exec $nsr nft -f - <<EOF +table ip t { + flowtable ft_wild { + hook ingress priority 0 + devices = { wild* } + } +} +EOF +} + +if ! wildcard_prep; then + echo "SKIP wildcard tests: not supported by host's nft?" +else + for ((i = 0; i < 100; i++)); do + ip -net $nsr link add wild$i type dummy & + done + wait + for ((i = 80; i < 100; i++)); do + ip -net $nsr link del wild$i & + done + for ((i = 0; i < 80; i++)); do + ip -net $nsr link del wild$i & + done + wait + for ((i = 0; i < 100; i += 10)); do + ( + for ((j = 0; j < 10; j++)); do + ip -net $nsr link add wild$((i + j)) type dummy + done + for ((j = 0; j < 10; j++)); do + ip -net $nsr link del wild$((i + j)) + done + ) & + done + wait +fi + + +[[ $kernel_tainted -eq 0 && $(</proc/sys/kernel/tainted) -ne 0 ]] && { + echo "FAIL: Kernel is tainted!" + exit $ksft_fail +} + +[[ $rate -gt 0 ]] || { + echo "FAIL: Zero throughput in iperf3" + exit $ksft_fail +} + +[[ -f /sys/kernel/debug/kmemleak && \ + -n $(</sys/kernel/debug/kmemleak) ]] && { + echo "FAIL: non-empty kmemleak report" + exit $ksft_fail +} + +exit $ksft_pass diff --git a/tools/testing/selftests/net/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh index 9e39de26455f..b3ec2d0a3f56 100755 --- a/tools/testing/selftests/net/netfilter/nft_nat.sh +++ b/tools/testing/selftests/net/netfilter/nft_nat.sh @@ -569,7 +569,7 @@ test_redirect6() ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then - echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6" + echo "ERROR: cannot ping $ns1 from $ns2 via ipv6" lret=1 fi @@ -859,13 +859,31 @@ EOF # from router:service bypass connection tracking. test_port_shadow_notrack "$family" - # test nat based mitigation: fowarded packets coming from service port + # test nat based mitigation: forwarded packets coming from service port # are masqueraded with random highport. test_port_shadow_pat "$family" ip netns exec "$ns0" nft delete table $family nat } +file_cmp() +{ + local infile="$1" + local outfile="$2" + + if ! cmp "$infile" "$outfile";then + echo -n "Infile " + ls -l "$infile" + echo -n "Outfile " + ls -l "$outfile" + echo "ERROR: in and output file mismatch when checking $msg" 1>&1 + ret=1 + return 1 + fi + + return 0 +} + test_stateless_nat_ip() { local lret=0 @@ -966,11 +984,7 @@ EOF wait - if ! cmp "$INFILE" "$OUTFILE";then - ls -l "$INFILE" "$OUTFILE" - echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2 - lret=1 - fi + file_cmp "$INFILE" "$OUTFILE" "udp with stateless nat" || lret=1 :> "$OUTFILE" @@ -991,6 +1005,62 @@ EOF return $lret } +test_dnat_clash() +{ + local lret=0 + + if ! socat -h > /dev/null 2>&1;then + echo "SKIP: Could not run dnat clash test without socat tool" + [ $ret -eq 0 ] && ret=$ksft_skip + return $ksft_skip + fi + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +flush ruleset +table ip dnat-test { + chain prerouting { + type nat hook prerouting priority dstnat; policy accept; + ip daddr 10.0.2.1 udp dport 1234 counter dnat to 10.0.1.1:1234 + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add dnat rules" + [ $ret -eq 0 ] && ret=$ksft_skip + return $ksft_skip + fi + + local udpdaddr="10.0.2.1" + for i in 1 2;do + echo "PING $udpdaddr" > "$INFILE" + echo "PONG 10.0.1.1 step $i" | ip netns exec "$ns0" timeout 3 socat STDIO UDP4-LISTEN:1234,bind=10.0.1.1 > "$OUTFILE" 2>/dev/null & + local lpid=$! + + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns0" 1234 "-u" + + result=$(ip netns exec "$ns1" timeout 3 socat STDIO UDP4-SENDTO:"$udpdaddr:1234,sourceport=4321" < "$INFILE") + udpdaddr="10.0.1.1" + + if [ "$result" != "PONG 10.0.1.1 step $i" ] ; then + echo "ERROR: failed to test udp $ns1 to $ns2 with dnat rule step $i, result: \"$result\"" 1>&2 + lret=1 + ret=1 + fi + + wait + + file_cmp "$INFILE" "$OUTFILE" "udp dnat step $i" || lret=1 + + :> "$OUTFILE" + done + + test $lret -eq 0 && echo "PASS: IP dnat clash $ns1:$ns2" + + ip netns exec "$ns0" nft flush ruleset + + return $lret +} + # ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 for i in "$ns0" "$ns1" "$ns2" ;do ip netns exec "$i" nft -f /dev/stdin <<EOF @@ -1147,6 +1217,7 @@ $test_inet_nat && test_redirect6 inet test_port_shadowing test_stateless_nat_ip +test_dnat_clash if [ $ret -ne 0 ];then echo -n "FAIL: " diff --git a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh index 3b81d88bdde3..9f200f80253a 100755 --- a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh +++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh @@ -88,7 +88,6 @@ for i in $(seq 1 "$maxclients");do echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2 echo netns exec "$gw" ip link set "veth$i" up echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2 - echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".rp_filter=0 # clients have same IP addresses. echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0 @@ -178,7 +177,6 @@ fi ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null -ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null # useful for debugging: allows to use 'ping' from clients to gateway. ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index c61d23a8c88d..6136ceec45e0 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -8,7 +8,9 @@ source lib.sh ret=0 -timeout=2 +timeout=5 + +SCTP_TEST_TIMEOUT=60 cleanup() { @@ -25,10 +27,13 @@ cleanup() } checktool "nft --version" "test without nft tool" +checktool "socat -h" "run test without socat" + +modprobe -q sctp trap cleanup EXIT -setup_ns ns1 ns2 nsrouter +setup_ns ns1 ns2 ns3 nsrouter TMPFILE0=$(mktemp) TMPFILE1=$(mktemp) @@ -36,13 +41,16 @@ TMPFILE2=$(mktemp) TMPFILE3=$(mktemp) TMPINPUT=$(mktemp) -dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT" +COUNT=200 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && COUNT=$((COUNT/8)) +dd conv=sparse status=none if=/dev/zero bs=1M count=$COUNT of="$TMPINPUT" if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then echo "SKIP: No virtual ethernet pair device support in kernel" exit $ksft_skip fi ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2" +ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3" ip -net "$nsrouter" link set veth0 up ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0 @@ -52,8 +60,13 @@ ip -net "$nsrouter" link set veth1 up ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1 ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad +ip -net "$nsrouter" link set veth2 up +ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2 +ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad + ip -net "$ns1" link set eth0 up ip -net "$ns2" link set eth0 up +ip -net "$ns3" link set eth0 up ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad @@ -65,6 +78,11 @@ ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad ip -net "$ns2" route add default via 10.0.2.1 ip -net "$ns2" route add default via dead:2::1 +ip -net "$ns3" addr add 10.0.3.99/24 dev eth0 +ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad +ip -net "$ns3" route add default via 10.0.3.1 +ip -net "$ns3" route add default via dead:3::1 + load_ruleset() { local name=$1 local prio=$2 @@ -250,45 +268,52 @@ listener_ready() test_tcp_forward() { - ip netns exec "$nsrouter" ./nf_queue -q 2 -t "$timeout" & + ip netns exec "$nsrouter" ./nf_queue -q 2 & local nfqpid=$! timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null & local rpid=$! busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 2 + + local tthen=$(date +%s) ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null - wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain" + wait_and_check_retval "$rpid" "tcp and nfqueue in forward chain" "$tthen" + kill "$nfqpid" } test_tcp_localhost() { - dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT" timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null & local rpid=$! - ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" & + ip netns exec "$nsrouter" ./nf_queue -q 3 & local nfqpid=$! + local tthen=$(date +%s) busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3 ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null - wait "$rpid" && echo "PASS: tcp via loopback" - wait 2>/dev/null + wait_and_check_retval "$rpid" "tcp via loopback" "$tthen" + kill "$nfqpid" } test_tcp_localhost_connectclose() { - ip netns exec "$nsrouter" ./connect_close -p 23456 -t "$timeout" & - ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" & + ip netns exec "$nsrouter" ./nf_queue -q 3 & + local nfqpid=$! busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3 + timeout 10 ip netns exec "$nsrouter" ./connect_close -p 23456 -t 3 + + kill "$nfqpid" wait && echo "PASS: tcp via loopback with connect/close" - wait 2>/dev/null } test_tcp_localhost_requeue() @@ -353,7 +378,7 @@ table inet filter { } } EOF - ip netns exec "$ns1" ./nf_queue -q 1 -t "$timeout" & + ip netns exec "$ns1" ./nf_queue -q 1 & local nfqpid=$! busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1 @@ -363,6 +388,7 @@ EOF for n in output post; do for d in tvrf eth0; do if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then + kill "$nfqpid" echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2 ip netns exec "$ns1" nft list ruleset ret=1 @@ -371,8 +397,192 @@ EOF done done - wait "$nfqpid" && echo "PASS: icmp+nfqueue via vrf" - wait 2>/dev/null + kill "$nfqpid" + echo "PASS: icmp+nfqueue via vrf" +} + +sctp_listener_ready() +{ + ss -S -N "$1" -ln -o "sport = :12345" | grep -q 12345 +} + +check_output_files() +{ + local f1="$1" + local f2="$2" + local err="$3" + + if ! cmp "$f1" "$f2" ; then + echo "FAIL: $err: input and output file differ" 1>&2 + echo -n " Input file" 1>&2 + ls -l "$f1" 1>&2 + echo -n "Output file" 1>&2 + ls -l "$f2" 1>&2 + ret=1 + fi +} + +wait_and_check_retval() +{ + local rpid="$1" + local msg="$2" + local tthen="$3" + local tnow=$(date +%s) + + if wait "$rpid";then + echo -n "PASS: " + else + echo -n "FAIL: " + ret=1 + fi + + printf "%s (duration: %ds)\n" "$msg" $((tnow-tthen)) +} + +test_sctp_forward() +{ + ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet sctpq { + chain forward { + type filter hook forward priority 0; policy accept; + sctp dport 12345 queue num 10 + } +} +EOF + timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" & + local rpid=$! + + busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2" + + ip netns exec "$nsrouter" ./nf_queue -q 10 -G & + local nfqpid=$! + local tthen=$(date +%s) + + ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null + + if ! ip netns exec "$nsrouter" nft delete table inet sctpq; then + echo "FAIL: Could not delete sctpq table" + exit 1 + fi + + wait_and_check_retval "$rpid" "sctp and nfqueue in forward chain" "$tthen" + kill "$nfqpid" + + check_output_files "$TMPINPUT" "$TMPFILE1" "sctp forward" +} + +test_sctp_output() +{ + ip netns exec "$ns1" nft -f /dev/stdin <<EOF +table inet sctpq { + chain output { + type filter hook output priority 0; policy accept; + sctp dport 12345 queue num 11 + } +} +EOF + # reduce test file size, software segmentation causes sk wmem increase. + dd conv=sparse status=none if=/dev/zero bs=1M count=$((COUNT/2)) of="$TMPINPUT" + + timeout "$SCTP_TEST_TIMEOUT" ip netns exec "$ns2" socat -u SCTP-LISTEN:12345 STDOUT > "$TMPFILE1" & + local rpid=$! + + busywait "$BUSYWAIT_TIMEOUT" sctp_listener_ready "$ns2" + + ip netns exec "$ns1" ./nf_queue -q 11 & + local nfqpid=$! + local tthen=$(date +%s) + + ip netns exec "$ns1" socat -u STDIN SCTP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null + + if ! ip netns exec "$ns1" nft delete table inet sctpq; then + echo "FAIL: Could not delete sctpq table" + exit 1 + fi + + # must wait before checking completeness of output file. + wait_and_check_retval "$rpid" "sctp and nfqueue in output chain with GSO" "$tthen" + kill "$nfqpid" + + check_output_files "$TMPINPUT" "$TMPFILE1" "sctp output" +} + +udp_listener_ready() +{ + ss -S -N "$1" -uln -o "sport = :12345" | grep -q 12345 +} + +output_files_written() +{ + test -s "$1" && test -s "$2" +} + +test_udp_ct_race() +{ + ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet udpq { + chain prerouting { + type nat hook prerouting priority dstnat - 5; policy accept; + ip daddr 10.6.6.6 udp dport 12345 counter dnat to numgen inc mod 2 map { 0 : 10.0.2.99, 1 : 10.0.3.99 } + } + chain postrouting { + type filter hook postrouting priority srcnat - 5; policy accept; + udp dport 12345 counter queue num 12 + } +} +EOF + :> "$TMPFILE1" + :> "$TMPFILE2" + + timeout 10 ip netns exec "$ns2" socat UDP-LISTEN:12345,fork,pf=ipv4 OPEN:"$TMPFILE1",trunc & + local rpid1=$! + + timeout 10 ip netns exec "$ns3" socat UDP-LISTEN:12345,fork,pf=ipv4 OPEN:"$TMPFILE2",trunc & + local rpid2=$! + + ip netns exec "$nsrouter" ./nf_queue -q 12 -d 1000 & + local nfqpid=$! + + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns2" + busywait "$BUSYWAIT_TIMEOUT" udp_listener_ready "$ns3" + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 12 + + # Send two packets, one should end up in ns1, other in ns2. + # This is because nfqueue will delay packet for long enough so that + # second packet will not find existing conntrack entry. + echo "Packet 1" | ip netns exec "$ns1" socat -u STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221 + echo "Packet 2" | ip netns exec "$ns1" socat -u STDIN UDP-DATAGRAM:10.6.6.6:12345,bind=0.0.0.0:55221 + + busywait 10000 output_files_written "$TMPFILE1" "$TMPFILE2" + + kill "$nfqpid" + + if ! ip netns exec "$nsrouter" bash -c 'conntrack -L -p udp --dport 12345 2>/dev/null | wc -l | grep -q "^1"'; then + echo "FAIL: Expected One udp conntrack entry" + ip netns exec "$nsrouter" conntrack -L -p udp --dport 12345 + ret=1 + fi + + if ! ip netns exec "$nsrouter" nft delete table inet udpq; then + echo "FAIL: Could not delete udpq table" + ret=1 + return + fi + + NUMLINES1=$(wc -l < "$TMPFILE1") + NUMLINES2=$(wc -l < "$TMPFILE2") + + if [ "$NUMLINES1" -ne 1 ] || [ "$NUMLINES2" -ne 1 ]; then + ret=1 + echo "FAIL: uneven udp packet distribution: $NUMLINES1 $NUMLINES2" + echo -n "$TMPFILE1: ";cat "$TMPFILE1" + echo -n "$TMPFILE2: ";cat "$TMPFILE2" + return + fi + + echo "PASS: both udp receivers got one packet each" } test_queue_removal() @@ -388,7 +598,7 @@ table ip filter { } } EOF - ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 -t "$timeout" & + ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 & local nfqpid=$! busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0 @@ -407,6 +617,7 @@ EOF echo "PASS: queue program exiting while packets queued" else echo "TAINT: queue program exiting while packets queued" + dmesg ret=1 fi } @@ -414,6 +625,7 @@ EOF ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null load_ruleset "filter" 0 @@ -443,11 +655,17 @@ test_queue 10 # same. We queue to a second program as well. load_ruleset "filter2" 20 test_queue 20 +ip netns exec "$ns1" nft flush ruleset test_tcp_forward test_tcp_localhost test_tcp_localhost_connectclose test_tcp_localhost_requeue +test_sctp_forward +test_sctp_output +test_udp_ct_race + +# should be last, adds vrf device in ns1 and changes routes test_icmp_vrf test_queue_removal diff --git a/tools/testing/selftests/net/netfilter/nft_tproxy_tcp.sh b/tools/testing/selftests/net/netfilter/nft_tproxy_tcp.sh new file mode 100755 index 000000000000..e208fb03eeb7 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_tproxy_tcp.sh @@ -0,0 +1,358 @@ +#!/bin/bash +# +# This tests tproxy on the following scenario: +# +# +------------+ +# +-------+ | nsrouter | +-------+ +# |ns1 |.99 .1| |.1 .99| ns2| +# | eth0|---------------|veth0 veth1|------------------|eth0 | +# | | 10.0.1.0/24 | | 10.0.2.0/24 | | +# +-------+ dead:1::/64 | veth2 | dead:2::/64 +-------+ +# +------------+ +# |.1 +# | +# | +# | +-------+ +# | .99| ns3| +# +------------------------|eth0 | +# 10.0.3.0/24 | | +# dead:3::/64 +-------+ +# +# The tproxy implementation acts as an echo server so the client +# must receive the same message it sent if it has been proxied. +# If is not proxied the servers return PONG_NS# with the number +# of the namespace the server is running. +# +# shellcheck disable=SC2162,SC2317 + +source lib.sh +ret=0 +timeout=5 + +cleanup() +{ + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + ip netns pids "$ns3" | xargs kill 2>/dev/null + ip netns pids "$nsrouter" | xargs kill 2>/dev/null + + cleanup_all_ns +} + +checktool "nft --version" "test without nft tool" +checktool "socat -h" "run test without socat" + +trap cleanup EXIT +setup_ns ns1 ns2 ns3 nsrouter + +if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2" +ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3" + +ip -net "$nsrouter" link set veth0 up +ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$nsrouter" link set veth1 up +ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1 +ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad + +ip -net "$nsrouter" link set veth2 up +ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2 +ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad + +ip -net "$ns1" link set eth0 up +ip -net "$ns2" link set eth0 up +ip -net "$ns3" link set eth0 up + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" route add default via dead:1::1 + +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns2" route add default via dead:2::1 + +ip -net "$ns3" addr add 10.0.3.99/24 dev eth0 +ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad +ip -net "$ns3" route add default via 10.0.3.1 +ip -net "$ns3" route add default via dead:3::1 + +ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null + +test_ping() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then + return 2 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.3.99 > /dev/null; then + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:3::99 > /dev/null; then + return 2 + fi + + return 0 +} + +test_ping_router() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then + return 3 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then + return 4 + fi + + return 0 +} + + +listener_ready() +{ + local ns="$1" + local port="$2" + local proto="$3" + ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port" +} + +test_tproxy() +{ + local traffic_origin="$1" + local ip_proto="$2" + local expect_ns1_ns2="$3" + local expect_ns1_ns3="$4" + local expect_nsrouter_ns2="$5" + local expect_nsrouter_ns3="$6" + + # derived variables + local testname="test_${ip_proto}_tcp_${traffic_origin}" + local socat_ipproto + local ns1_ip + local ns2_ip + local ns3_ip + local ns2_target + local ns3_target + local nftables_subject + local ip_command + + # socat 1.8.0 has a bug that requires to specify the IP family to bind (fixed in 1.8.0.1) + case $ip_proto in + "ip") + socat_ipproto="-4" + ns1_ip=10.0.1.99 + ns2_ip=10.0.2.99 + ns3_ip=10.0.3.99 + ns2_target="tcp:$ns2_ip:8080" + ns3_target="tcp:$ns3_ip:8080" + nftables_subject="ip daddr $ns2_ip tcp dport 8080" + ip_command="ip" + ;; + "ip6") + socat_ipproto="-6" + ns1_ip=dead:1::99 + ns2_ip=dead:2::99 + ns3_ip=dead:3::99 + ns2_target="tcp:[$ns2_ip]:8080" + ns3_target="tcp:[$ns3_ip]:8080" + nftables_subject="ip6 daddr $ns2_ip tcp dport 8080" + ip_command="ip -6" + ;; + *) + echo "FAIL: unsupported protocol" + exit 255 + ;; + esac + + case $traffic_origin in + # to capture the local originated traffic we need to mark the outgoing + # traffic so the policy based routing rule redirects it and can be processed + # in the prerouting chain. + "local") + nftables_rules=" +flush ruleset +table inet filter { + chain divert { + type filter hook prerouting priority 0; policy accept; + $nftables_subject tproxy $ip_proto to :12345 meta mark set 1 accept + } + chain output { + type route hook output priority 0; policy accept; + $nftables_subject meta mark set 1 accept + } +}" + ;; + "forward") + nftables_rules=" +flush ruleset +table inet filter { + chain divert { + type filter hook prerouting priority 0; policy accept; + $nftables_subject tproxy $ip_proto to :12345 meta mark set 1 accept + } +}" + ;; + *) + echo "FAIL: unsupported parameter for traffic origin" + exit 255 + ;; + esac + + # shellcheck disable=SC2046 # Intended splitting of ip_command + ip netns exec "$nsrouter" $ip_command rule add fwmark 1 table 100 + ip netns exec "$nsrouter" $ip_command route add local "${ns2_ip}" dev lo table 100 + echo "$nftables_rules" | ip netns exec "$nsrouter" nft -f /dev/stdin + + timeout "$timeout" ip netns exec "$nsrouter" socat "$socat_ipproto" tcp-listen:12345,fork,ip-transparent SYSTEM:"cat" 2>/dev/null & + local tproxy_pid=$! + + timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" tcp-listen:8080,fork SYSTEM:"echo PONG_NS2" 2>/dev/null & + local server2_pid=$! + + timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" tcp-listen:8080,fork SYSTEM:"echo PONG_NS3" 2>/dev/null & + local server3_pid=$! + + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" 12345 "-t" + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" 8080 "-t" + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns3" 8080 "-t" + + local result + # request from ns1 to ns2 (forwarded traffic) + result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO "$ns2_target") + if [ "$result" == "$expect_ns1_ns2" ] ;then + echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2" + else + echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2, not \"${expect_ns1_ns2}\" as intended" + ret=1 + fi + + # request from ns1 to ns3(forwarded traffic) + result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO "$ns3_target") + if [ "$result" = "$expect_ns1_ns3" ] ;then + echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3" + else + echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3, not \"$expect_ns1_ns3\" as intended" + ret=1 + fi + + # request from nsrouter to ns2 (localy originated traffic) + result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO "$ns2_target") + if [ "$result" == "$expect_nsrouter_ns2" ] ;then + echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2" + else + echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2, not \"$expect_nsrouter_ns2\" as intended" + ret=1 + fi + + # request from nsrouter to ns3 (localy originated traffic) + result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO "$ns3_target") + if [ "$result" = "$expect_nsrouter_ns3" ] ;then + echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3" + else + echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3, not \"$expect_nsrouter_ns3\" as intended" + ret=1 + fi + + # cleanup + kill "$tproxy_pid" "$server2_pid" "$server3_pid" 2>/dev/null + # shellcheck disable=SC2046 # Intended splitting of ip_command + ip netns exec "$nsrouter" $ip_command rule del fwmark 1 table 100 + ip netns exec "$nsrouter" $ip_command route flush table 100 +} + + +test_ipv4_tcp_forward() +{ + local traffic_origin="forward" + local ip_proto="ip" + local expect_ns1_ns2="I_M_PROXIED" + local expect_ns1_ns3="PONG_NS3" + local expect_nsrouter_ns2="PONG_NS2" + local expect_nsrouter_ns3="PONG_NS3" + + test_tproxy "$traffic_origin" \ + "$ip_proto" \ + "$expect_ns1_ns2" \ + "$expect_ns1_ns3" \ + "$expect_nsrouter_ns2" \ + "$expect_nsrouter_ns3" +} + +test_ipv4_tcp_local() +{ + local traffic_origin="local" + local ip_proto="ip" + local expect_ns1_ns2="I_M_PROXIED" + local expect_ns1_ns3="PONG_NS3" + local expect_nsrouter_ns2="I_M_PROXIED" + local expect_nsrouter_ns3="PONG_NS3" + + test_tproxy "$traffic_origin" \ + "$ip_proto" \ + "$expect_ns1_ns2" \ + "$expect_ns1_ns3" \ + "$expect_nsrouter_ns2" \ + "$expect_nsrouter_ns3" +} + +test_ipv6_tcp_forward() +{ + local traffic_origin="forward" + local ip_proto="ip6" + local expect_ns1_ns2="I_M_PROXIED" + local expect_ns1_ns3="PONG_NS3" + local expect_nsrouter_ns2="PONG_NS2" + local expect_nsrouter_ns3="PONG_NS3" + + test_tproxy "$traffic_origin" \ + "$ip_proto" \ + "$expect_ns1_ns2" \ + "$expect_ns1_ns3" \ + "$expect_nsrouter_ns2" \ + "$expect_nsrouter_ns3" +} + +test_ipv6_tcp_local() +{ + local traffic_origin="local" + local ip_proto="ip6" + local expect_ns1_ns2="I_M_PROXIED" + local expect_ns1_ns3="PONG_NS3" + local expect_nsrouter_ns2="I_M_PROXIED" + local expect_nsrouter_ns3="PONG_NS3" + + test_tproxy "$traffic_origin" \ + "$ip_proto" \ + "$expect_ns1_ns2" \ + "$expect_ns1_ns3" \ + "$expect_nsrouter_ns2" \ + "$expect_nsrouter_ns3" +} + +if test_ping; then + # queue bypass works (rules were skipped, no listener) + echo "PASS: ${ns1} can reach ${ns2}" +else + echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2 + exit $ret +fi + +test_ipv4_tcp_forward +test_ipv4_tcp_local +test_ipv6_tcp_forward +test_ipv6_tcp_local + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh b/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh new file mode 100755 index 000000000000..d16de13fe5a7 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_tproxy_udp.sh @@ -0,0 +1,262 @@ +#!/bin/bash +# +# This tests tproxy on the following scenario: +# +# +------------+ +# +-------+ | nsrouter | +-------+ +# |ns1 |.99 .1| |.1 .99| ns2| +# | eth0|---------------|veth0 veth1|------------------|eth0 | +# | | 10.0.1.0/24 | | 10.0.2.0/24 | | +# +-------+ dead:1::/64 | veth2 | dead:2::/64 +-------+ +# +------------+ +# |.1 +# | +# | +# | +-------+ +# | .99| ns3| +# +------------------------|eth0 | +# 10.0.3.0/24 | | +# dead:3::/64 +-------+ +# +# The tproxy implementation acts as an echo server so the client +# must receive the same message it sent if it has been proxied. +# If is not proxied the servers return PONG_NS# with the number +# of the namespace the server is running. +# shellcheck disable=SC2162,SC2317 + +source lib.sh +ret=0 +# UDP is slow +timeout=15 + +cleanup() +{ + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + ip netns pids "$ns3" | xargs kill 2>/dev/null + ip netns pids "$nsrouter" | xargs kill 2>/dev/null + + cleanup_all_ns +} + +checktool "nft --version" "test without nft tool" +checktool "socat -h" "run test without socat" + +trap cleanup EXIT +setup_ns ns1 ns2 ns3 nsrouter + +if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2" +ip link add veth2 netns "$nsrouter" type veth peer name eth0 netns "$ns3" + +ip -net "$nsrouter" link set veth0 up +ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$nsrouter" link set veth1 up +ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1 +ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad + +ip -net "$nsrouter" link set veth2 up +ip -net "$nsrouter" addr add 10.0.3.1/24 dev veth2 +ip -net "$nsrouter" addr add dead:3::1/64 dev veth2 nodad + +ip -net "$ns1" link set eth0 up +ip -net "$ns2" link set eth0 up +ip -net "$ns3" link set eth0 up + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" route add default via dead:1::1 + +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns2" route add default via dead:2::1 + +ip -net "$ns3" addr add 10.0.3.99/24 dev eth0 +ip -net "$ns3" addr add dead:3::99/64 dev eth0 nodad +ip -net "$ns3" route add default via 10.0.3.1 +ip -net "$ns3" route add default via dead:3::1 + +ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth2.forwarding=1 > /dev/null + +test_ping() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then + return 2 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.3.99 > /dev/null; then + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:3::99 > /dev/null; then + return 2 + fi + + return 0 +} + +test_ping_router() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then + return 3 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then + return 4 + fi + + return 0 +} + + +listener_ready() +{ + local ns="$1" + local port="$2" + local proto="$3" + ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port" +} + +test_tproxy_udp_forward() +{ + local ip_proto="$1" + + local expect_ns1_ns2="I_M_PROXIED" + local expect_ns1_ns3="PONG_NS3" + local expect_nsrouter_ns2="PONG_NS2" + local expect_nsrouter_ns3="PONG_NS3" + + # derived variables + local testname="test_${ip_proto}_udp_forward" + local socat_ipproto + local ns1_ip + local ns2_ip + local ns3_ip + local ns1_ip_port + local ns2_ip_port + local ns3_ip_port + local ip_command + + # socat 1.8.0 has a bug that requires to specify the IP family to bind (fixed in 1.8.0.1) + case $ip_proto in + "ip") + socat_ipproto="-4" + ns1_ip=10.0.1.99 + ns2_ip=10.0.2.99 + ns3_ip=10.0.3.99 + ns1_ip_port="$ns1_ip:18888" + ns2_ip_port="$ns2_ip:8080" + ns3_ip_port="$ns3_ip:8080" + ip_command="ip" + ;; + "ip6") + socat_ipproto="-6" + ns1_ip=dead:1::99 + ns2_ip=dead:2::99 + ns3_ip=dead:3::99 + ns1_ip_port="[$ns1_ip]:18888" + ns2_ip_port="[$ns2_ip]:8080" + ns3_ip_port="[$ns3_ip]:8080" + ip_command="ip -6" + ;; + *) + echo "FAIL: unsupported protocol" + exit 255 + ;; + esac + + # shellcheck disable=SC2046 # Intended splitting of ip_command + ip netns exec "$nsrouter" $ip_command rule add fwmark 1 table 100 + ip netns exec "$nsrouter" $ip_command route add local "$ns2_ip" dev lo table 100 + ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet filter { + chain divert { + type filter hook prerouting priority 0; policy accept; + $ip_proto daddr $ns2_ip udp dport 8080 tproxy $ip_proto to :12345 meta mark set 1 accept + } +} +EOF + + timeout "$timeout" ip netns exec "$nsrouter" socat -u "$socat_ipproto" udp-listen:12345,fork,ip-transparent,reuseport udp:"$ns1_ip_port",ip-transparent,reuseport,bind="$ns2_ip_port" 2>/dev/null & + local tproxy_pid=$! + + timeout "$timeout" ip netns exec "$ns2" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS2" 2>/dev/null & + local server2_pid=$! + + timeout "$timeout" ip netns exec "$ns3" socat "$socat_ipproto" udp-listen:8080,fork SYSTEM:"echo PONG_NS3" 2>/dev/null & + local server3_pid=$! + + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" 12345 "-u" + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" 8080 "-u" + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns3" 8080 "-u" + + local result + # request from ns1 to ns2 (forwarded traffic) + result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port",sourceport=18888) + if [ "$result" == "$expect_ns1_ns2" ] ;then + echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2" + else + echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns2, not \"${expect_ns1_ns2}\" as intended" + ret=1 + fi + + # request from ns1 to ns3 (forwarded traffic) + result=$(echo I_M_PROXIED | ip netns exec "$ns1" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port") + if [ "$result" = "$expect_ns1_ns3" ] ;then + echo "PASS: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3" + else + echo "ERROR: tproxy test $testname: ns1 got reply \"$result\" connecting to ns3, not \"$expect_ns1_ns3\" as intended" + ret=1 + fi + + # request from nsrouter to ns2 (localy originated traffic) + result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns2_ip_port") + if [ "$result" == "$expect_nsrouter_ns2" ] ;then + echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2" + else + echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns2, not \"$expect_nsrouter_ns2\" as intended" + ret=1 + fi + + # request from nsrouter to ns3 (localy originated traffic) + result=$(echo I_M_PROXIED | ip netns exec "$nsrouter" socat -t 2 -T 2 STDIO udp:"$ns3_ip_port") + if [ "$result" = "$expect_nsrouter_ns3" ] ;then + echo "PASS: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3" + else + echo "ERROR: tproxy test $testname: nsrouter got reply \"$result\" connecting to ns3, not \"$expect_nsrouter_ns3\" as intended" + ret=1 + fi + + # cleanup + kill "$tproxy_pid" "$server2_pid" "$server3_pid" 2>/dev/null + # shellcheck disable=SC2046 # Intended splitting of ip_command + ip netns exec "$nsrouter" $ip_command rule del fwmark 1 table 100 + ip netns exec "$nsrouter" $ip_command route flush table 100 +} + + +if test_ping; then + # queue bypass works (rules were skipped, no listener) + echo "PASS: ${ns1} can reach ${ns2}" +else + echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2 + exit $ret +fi + +test_tproxy_udp_forward "ip" +test_tproxy_udp_forward "ip6" + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh index 4485fd7675ed..24ad41d526d9 100755 --- a/tools/testing/selftests/net/netfilter/rpath.sh +++ b/tools/testing/selftests/net/netfilter/rpath.sh @@ -1,8 +1,7 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# return code to signal skipped test -ksft_skip=4 +source lib.sh # search for legacy iptables (it uses the xtables extensions if iptables-legacy --version >/dev/null 2>&1; then @@ -32,17 +31,10 @@ if [ -z "$iptables$ip6tables$nft" ]; then exit $ksft_skip fi -sfx=$(mktemp -u "XXXXXXXX") -ns1="ns1-$sfx" -ns2="ns2-$sfx" -trap "ip netns del $ns1; ip netns del $ns2" EXIT - -# create two netns, disable rp_filter in ns2 and -# keep IPv6 address when moving into VRF -ip netns add "$ns1" -ip netns add "$ns2" -ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0 -ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0 +trap cleanup_all_ns EXIT + +# create two netns, keep IPv6 address when moving into VRF +setup_ns ns1 ns2 ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1 # a standard connection between the netns, should not trigger rp filter @@ -61,9 +53,20 @@ ip -net "$ns2" a a 192.168.42.1/24 dev d0 ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad +# avoid neighbor lookups and enable martian IPv6 pings +ns2_hwaddr=$(ip -net "$ns2" link show dev v0 | \ + sed -n 's, *link/ether \([^ ]*\) .*,\1,p') +ns1_hwaddr=$(ip -net "$ns1" link show dev v0 | \ + sed -n 's, *link/ether \([^ ]*\) .*,\1,p') +ip -net "$ns1" neigh add fec0:42::1 lladdr "$ns2_hwaddr" nud permanent dev v0 +ip -net "$ns1" neigh add fec0:23::1 lladdr "$ns2_hwaddr" nud permanent dev v0 +ip -net "$ns2" neigh add fec0:42::2 lladdr "$ns1_hwaddr" nud permanent dev d0 +ip -net "$ns2" neigh add fec0:23::2 lladdr "$ns1_hwaddr" nud permanent dev v0 + # firewall matches to test [ -n "$iptables" ] && { common='-t raw -A PREROUTING -s 192.168.0.0/16' + common+=' -p icmp --icmp-type echo-request' if ! ip netns exec "$ns2" "$iptables" $common -m rpfilter;then echo "Cannot add rpfilter rule" exit $ksft_skip @@ -72,6 +75,7 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad } [ -n "$ip6tables" ] && { common='-t raw -A PREROUTING -s fec0::/16' + common+=' -p icmpv6 --icmpv6-type echo-request' if ! ip netns exec "$ns2" "$ip6tables" $common -m rpfilter;then echo "Cannot add rpfilter rule" exit $ksft_skip @@ -82,8 +86,10 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad table inet t { chain c { type filter hook prerouting priority raw; - ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter - ip6 saddr fec0::/16 fib saddr . iif oif exists counter + ip saddr 192.168.0.0/16 icmp type echo-request \ + fib saddr . iif oif exists counter + ip6 saddr fec0::/16 icmpv6 type echo-request \ + fib saddr . iif oif exists counter } } EOF diff --git a/tools/testing/selftests/net/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c index 21bb1cfd8a85..b282d1785c9b 100644 --- a/tools/testing/selftests/net/netfilter/sctp_collision.c +++ b/tools/testing/selftests/net/netfilter/sctp_collision.c @@ -9,9 +9,10 @@ int main(int argc, char *argv[]) { struct sockaddr_in saddr = {}, daddr = {}; - int sd, ret, len = sizeof(daddr); + socklen_t len = sizeof(daddr); struct timeval tv = {25, 0}; char buf[] = "hello"; + int sd, ret; if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n", diff --git a/tools/testing/selftests/net/netfilter/udpclash.c b/tools/testing/selftests/net/netfilter/udpclash.c new file mode 100644 index 000000000000..79de163d61ab --- /dev/null +++ b/tools/testing/selftests/net/netfilter/udpclash.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Usage: ./udpclash <IP> <PORT> + * + * Emit THREAD_COUNT UDP packets sharing the same saddr:daddr pair. + * + * This mimics DNS resolver libraries that emit A and AAAA requests + * in parallel. + * + * This exercises conntrack clash resolution logic added and later + * refined in + * + * 71d8c47fc653 ("netfilter: conntrack: introduce clash resolution on insertion race") + * ed07d9a021df ("netfilter: nf_conntrack: resolve clash for matching conntracks") + * 6a757c07e51f ("netfilter: conntrack: allow insertion of clashing entries") + */ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <pthread.h> + +#define THREAD_COUNT 128 + +struct thread_args { + const struct sockaddr_in *si_remote; + int sockfd; +}; + +static volatile int wait = 1; + +static void *thread_main(void *varg) +{ + const struct sockaddr_in *si_remote; + const struct thread_args *args = varg; + static const char msg[] = "foo"; + + si_remote = args->si_remote; + + while (wait == 1) + ; + + if (sendto(args->sockfd, msg, strlen(msg), MSG_NOSIGNAL, + (struct sockaddr *)si_remote, sizeof(*si_remote)) < 0) + exit(111); + + return varg; +} + +static int run_test(int fd, const struct sockaddr_in *si_remote) +{ + struct thread_args thread_args = { + .si_remote = si_remote, + .sockfd = fd, + }; + pthread_t *tid = calloc(THREAD_COUNT, sizeof(pthread_t)); + unsigned int repl_count = 0, timeout = 0; + int i; + + if (!tid) { + perror("calloc"); + return 1; + } + + for (i = 0; i < THREAD_COUNT; i++) { + int err = pthread_create(&tid[i], NULL, &thread_main, &thread_args); + + if (err != 0) { + perror("pthread_create"); + exit(1); + } + } + + wait = 0; + + for (i = 0; i < THREAD_COUNT; i++) + pthread_join(tid[i], NULL); + + while (repl_count < THREAD_COUNT) { + struct sockaddr_in si_repl; + socklen_t si_repl_len = sizeof(si_repl); + char repl[512]; + ssize_t ret; + + ret = recvfrom(fd, repl, sizeof(repl), MSG_NOSIGNAL, + (struct sockaddr *) &si_repl, &si_repl_len); + if (ret < 0) { + if (timeout++ > 5000) { + fputs("timed out while waiting for reply from thread\n", stderr); + break; + } + + /* give reply time to pass though the stack */ + usleep(1000); + continue; + } + + if (si_repl_len != sizeof(*si_remote)) { + fprintf(stderr, "warning: reply has unexpected repl_len %d vs %d\n", + (int)si_repl_len, (int)sizeof(si_repl)); + } else if (si_remote->sin_addr.s_addr != si_repl.sin_addr.s_addr || + si_remote->sin_port != si_repl.sin_port) { + char a[64], b[64]; + + inet_ntop(AF_INET, &si_remote->sin_addr, a, sizeof(a)); + inet_ntop(AF_INET, &si_repl.sin_addr, b, sizeof(b)); + + fprintf(stderr, "reply from wrong source: want %s:%d got %s:%d\n", + a, ntohs(si_remote->sin_port), b, ntohs(si_repl.sin_port)); + } + + repl_count++; + } + + printf("got %d of %d replies\n", repl_count, THREAD_COUNT); + + free(tid); + + return repl_count == THREAD_COUNT ? 0 : 1; +} + +int main(int argc, char *argv[]) +{ + struct sockaddr_in si_local = { + .sin_family = AF_INET, + }; + struct sockaddr_in si_remote = { + .sin_family = AF_INET, + }; + int fd, ret; + + if (argc < 3) { + fputs("Usage: send_udp <daddr> <dport>\n", stderr); + return 1; + } + + si_remote.sin_port = htons(atoi(argv[2])); + si_remote.sin_addr.s_addr = inet_addr(argv[1]); + + fd = socket(AF_INET, SOCK_DGRAM|SOCK_CLOEXEC|SOCK_NONBLOCK, IPPROTO_UDP); + if (fd < 0) { + perror("socket"); + return 1; + } + + if (bind(fd, (struct sockaddr *)&si_local, sizeof(si_local)) < 0) { + perror("bind"); + return 1; + } + + ret = run_test(fd, &si_remote); + + close(fd); + + return ret; +} diff --git a/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh b/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh new file mode 100755 index 000000000000..912cb9583af1 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/vxlan_mtu_frag.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +if ! modprobe -q -n br_netfilter 2>&1; then + echo "SKIP: Test needs br_netfilter kernel module" + exit $ksft_skip +fi + +cleanup() +{ + cleanup_all_ns +} + +trap cleanup EXIT + +setup_ns host vtep router + +create_topology() +{ + ip link add host-eth0 netns "$host" type veth peer name vtep-host netns "$vtep" + ip link add vtep-router netns "$vtep" type veth peer name router-vtep netns "$router" +} + +setup_host() +{ + # bring ports up + ip -n "$host" addr add 10.0.0.1/24 dev host-eth0 + ip -n "$host" link set host-eth0 up + + # Add VLAN 10,20 + for vid in 10 20; do + ip -n "$host" link add link host-eth0 name host-eth0.$vid type vlan id $vid + ip -n "$host" addr add 10.0.$vid.1/24 dev host-eth0.$vid + ip -n "$host" link set host-eth0.$vid up + done +} + +setup_vtep() +{ + # create bridge on vtep + ip -n "$vtep" link add name br0 type bridge + ip -n "$vtep" link set br0 type bridge vlan_filtering 1 + + # VLAN 10 is untagged PVID + ip -n "$vtep" link set dev vtep-host master br0 + bridge -n "$vtep" vlan add dev vtep-host vid 10 pvid untagged + + # VLAN 20 as other VID + ip -n "$vtep" link set dev vtep-host master br0 + bridge -n "$vtep" vlan add dev vtep-host vid 20 + + # single-vxlan device on vtep + ip -n "$vtep" address add dev vtep-router 60.0.0.1/24 + ip -n "$vtep" link add dev vxd type vxlan external \ + vnifilter local 60.0.0.1 remote 60.0.0.2 dstport 4789 ttl 64 + ip -n "$vtep" link set vxd master br0 + + # Add VLAN-VNI 1-1 mappings + bridge -n "$vtep" link set dev vxd vlan_tunnel on + for vid in 10 20; do + bridge -n "$vtep" vlan add dev vxd vid $vid + bridge -n "$vtep" vlan add dev vxd vid $vid tunnel_info id $vid + bridge -n "$vtep" vni add dev vxd vni $vid + done + + # bring ports up + ip -n "$vtep" link set vxd up + ip -n "$vtep" link set vtep-router up + ip -n "$vtep" link set vtep-host up + ip -n "$vtep" link set dev br0 up +} + +setup_router() +{ + # bring ports up + ip -n "$router" link set router-vtep up +} + +setup() +{ + modprobe -q br_netfilter + create_topology + setup_host + setup_vtep + setup_router +} + +test_large_mtu_untagged_traffic() +{ + ip -n "$vtep" link set vxd mtu 1000 + ip -n "$host" neigh add 10.0.0.2 lladdr ca:fe:ba:be:00:01 dev host-eth0 + ip netns exec "$host" \ + ping -q 10.0.0.2 -I host-eth0 -c 1 -W 0.5 -s2000 > /dev/null 2>&1 + return 0 +} + +test_large_mtu_tagged_traffic() +{ + for vid in 10 20; do + ip -n "$vtep" link set vxd mtu 1000 + ip -n "$host" neigh add 10.0.$vid.2 lladdr ca:fe:ba:be:00:01 dev host-eth0.$vid + ip netns exec "$host" \ + ping -q 10.0.$vid.2 -I host-eth0.$vid -c 1 -W 0.5 -s2000 > /dev/null 2>&1 + done + return 0 +} + +do_test() +{ + # Frames will be dropped so ping will not succeed + # If it doesn't panic, it passes + test_large_mtu_tagged_traffic + test_large_mtu_untagged_traffic +} + +setup && \ +echo "Test for VxLAN fragmentation with large MTU in br_netfilter:" && \ +do_test && echo "PASS!" +exit $? diff --git a/tools/testing/selftests/net/netlink-dumps.c b/tools/testing/selftests/net/netlink-dumps.c new file mode 100644 index 000000000000..51129c564d0a --- /dev/null +++ b/tools/testing/selftests/net/netlink-dumps.c @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <fcntl.h> +#include <stdio.h> +#include <string.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> + +#include <linux/genetlink.h> +#include <linux/neighbour.h> +#include <linux/netdevice.h> +#include <linux/netlink.h> +#include <linux/mqueue.h> +#include <linux/rtnetlink.h> + +#include "kselftest_harness.h" + +#include <ynl.h> + +struct ext_ack { + int err; + + __u32 attr_offs; + __u32 miss_type; + __u32 miss_nest; + const char *str; +}; + +enum get_ea_ret { + ERROR = -1, + NO_CTRL = 0, + FOUND_DONE, + FOUND_ERR, + FOUND_EXTACK, +}; + +static enum get_ea_ret +nl_get_extack(char *buf, size_t n, struct ext_ack *ea) +{ + enum get_ea_ret ret = NO_CTRL; + const struct nlmsghdr *nlh; + const struct nlattr *attr; + ssize_t rem; + + for (rem = n; rem > 0; NLMSG_NEXT(nlh, rem)) { + nlh = (struct nlmsghdr *)&buf[n - rem]; + if (!NLMSG_OK(nlh, rem)) + return ERROR; + + if (nlh->nlmsg_type == NLMSG_ERROR) + ret = FOUND_ERR; + else if (nlh->nlmsg_type == NLMSG_DONE) + ret = FOUND_DONE; + else + continue; + + ea->err = -*(int *)NLMSG_DATA(nlh); + + if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) + return ret; + + ynl_attr_for_each(attr, nlh, sizeof(int)) { + switch (ynl_attr_type(attr)) { + case NLMSGERR_ATTR_OFFS: + ea->attr_offs = ynl_attr_get_u32(attr); + break; + case NLMSGERR_ATTR_MISS_TYPE: + ea->miss_type = ynl_attr_get_u32(attr); + break; + case NLMSGERR_ATTR_MISS_NEST: + ea->miss_nest = ynl_attr_get_u32(attr); + break; + case NLMSGERR_ATTR_MSG: + ea->str = ynl_attr_get_str(attr); + break; + } + } + + return FOUND_EXTACK; + } + + return ret; +} + +static const struct { + struct nlmsghdr nlhdr; + struct ndmsg ndm; + struct nlattr ahdr; + __u32 val; +} dump_neigh_bad = { + .nlhdr = { + .nlmsg_len = sizeof(dump_neigh_bad), + .nlmsg_type = RTM_GETNEIGH, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, + .nlmsg_seq = 1, + }, + .ndm = { + .ndm_family = 123, + }, + .ahdr = { + .nla_len = 4 + 4, + .nla_type = NDA_FLAGS_EXT, + }, + .val = -1, // should fail MASK validation +}; + +TEST(dump_extack) +{ + int netlink_sock; + int i, cnt, ret; + char buf[8192]; + int one = 1; + ssize_t n; + + netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + ASSERT_GE(netlink_sock, 0); + + n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_CAP_ACK, + &one, sizeof(one)); + ASSERT_EQ(n, 0); + n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_EXT_ACK, + &one, sizeof(one)); + ASSERT_EQ(n, 0); + n = setsockopt(netlink_sock, SOL_NETLINK, NETLINK_GET_STRICT_CHK, + &one, sizeof(one)); + ASSERT_EQ(n, 0); + + /* Dump so many times we fill up the buffer */ + cnt = 80; + for (i = 0; i < cnt; i++) { + n = send(netlink_sock, &dump_neigh_bad, + sizeof(dump_neigh_bad), 0); + ASSERT_EQ(n, sizeof(dump_neigh_bad)); + } + + /* Read out the ENOBUFS */ + n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT); + EXPECT_EQ(n, -1); + EXPECT_EQ(errno, ENOBUFS); + + ret = NO_CTRL; + for (i = 0; i < cnt; i++) { + struct ext_ack ea = {}; + + n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT); + if (n < 0) { + ASSERT_GE(i, 10); + break; + } + ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr)); + + ret = nl_get_extack(buf, n, &ea); + /* Once we fill the buffer we'll see one ENOBUFS followed + * by a number of EBUSYs. Then the last recv() will finally + * trigger and complete the dump. + */ + if (ret == FOUND_ERR && (ea.err == ENOBUFS || ea.err == EBUSY)) + continue; + EXPECT_EQ(ret, FOUND_EXTACK); + EXPECT_EQ(ea.err, EINVAL); + EXPECT_EQ(ea.attr_offs, + sizeof(struct nlmsghdr) + sizeof(struct ndmsg)); + } + /* Make sure last message was a full DONE+extack */ + EXPECT_EQ(ret, FOUND_EXTACK); +} + +static const struct { + struct nlmsghdr nlhdr; + struct genlmsghdr genlhdr; + struct nlattr ahdr; + __u16 val; + __u16 pad; +} dump_policies = { + .nlhdr = { + .nlmsg_len = sizeof(dump_policies), + .nlmsg_type = GENL_ID_CTRL, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP, + .nlmsg_seq = 1, + }, + .genlhdr = { + .cmd = CTRL_CMD_GETPOLICY, + .version = 2, + }, + .ahdr = { + .nla_len = 6, + .nla_type = CTRL_ATTR_FAMILY_ID, + }, + .val = GENL_ID_CTRL, + .pad = 0, +}; + +// Sanity check for the test itself, make sure the dump doesn't fit in one msg +TEST(test_sanity) +{ + int netlink_sock; + char buf[8192]; + ssize_t n; + + netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + ASSERT_GE(netlink_sock, 0); + + n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0); + ASSERT_EQ(n, sizeof(dump_policies)); + + n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT); + ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr)); + + n = recv(netlink_sock, buf, sizeof(buf), MSG_DONTWAIT); + ASSERT_GE(n, (ssize_t)sizeof(struct nlmsghdr)); + + close(netlink_sock); +} + +TEST(close_in_progress) +{ + int netlink_sock; + ssize_t n; + + netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + ASSERT_GE(netlink_sock, 0); + + n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0); + ASSERT_EQ(n, sizeof(dump_policies)); + + close(netlink_sock); +} + +TEST(close_with_ref) +{ + char cookie[NOTIFY_COOKIE_LEN] = {}; + int netlink_sock, mq_fd; + struct sigevent sigev; + ssize_t n; + + netlink_sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + ASSERT_GE(netlink_sock, 0); + + n = send(netlink_sock, &dump_policies, sizeof(dump_policies), 0); + ASSERT_EQ(n, sizeof(dump_policies)); + + mq_fd = syscall(__NR_mq_open, "sed", O_CREAT | O_WRONLY, 0600, 0); + ASSERT_GE(mq_fd, 0); + + memset(&sigev, 0, sizeof(sigev)); + sigev.sigev_notify = SIGEV_THREAD; + sigev.sigev_value.sival_ptr = cookie; + sigev.sigev_signo = netlink_sock; + + syscall(__NR_mq_notify, mq_fd, &sigev); + + close(netlink_sock); + + // give mqueue time to fire + usleep(100 * 1000); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh index 6974474c26f3..38871bdef67f 100755 --- a/tools/testing/selftests/net/netns-name.sh +++ b/tools/testing/selftests/net/netns-name.sh @@ -7,10 +7,12 @@ set -o pipefail DEV=dummy-dev0 DEV2=dummy-dev1 ALT_NAME=some-alt-name +NSIM_ADDR=2025 RET_CODE=0 cleanup() { + cleanup_netdevsim $NSIM_ADDR cleanup_ns $NS $test_ns } @@ -25,12 +27,15 @@ setup_ns NS test_ns # # Test basic move without a rename +# Use netdevsim because it has extra asserts for notifiers. # -ip -netns $NS link add name $DEV type dummy || fail -ip -netns $NS link set dev $DEV netns $test_ns || + +nsim=$(create_netdevsim $NSIM_ADDR $NS) +ip -netns $NS link set dev $nsim netns $test_ns || fail "Can't perform a netns move" -ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move" -ip -netns $test_ns link del $DEV || fail +ip -netns $test_ns link show dev $nsim >> /dev/null || + fail "Device not found after move" +cleanup_netdevsim $NSIM_ADDR # # Test move with a conflict @@ -78,6 +83,16 @@ ip -netns $NS link show dev $ALT_NAME 2> /dev/null && fail "Can still find alt-name after move" ip -netns $test_ns link del $DEV || fail +# +# Test no conflict of the same name/ifindex in different netns +# +ip -netns $NS link add name $DEV index 100 type dummy || fail +ip -netns $NS link add netns $test_ns name $DEV index 100 type dummy || + fail "Can create in netns without moving" +ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found" +ip -netns $NS link del $DEV || fail +ip -netns $test_ns link del $DEV || fail + echo -ne "$(basename $0) \t\t\t\t" if [ $RET_CODE -eq 0 ]; then echo "[ OK ]" diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index cd8a58097448..1f5227f3d64d 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -385,7 +385,7 @@ static int get_bind_to_device(int sd, char *name, size_t len) name[0] = '\0'; rc = getsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, name, &optlen); if (rc < 0) - log_err_errno("setsockopt(SO_BINDTODEVICE)"); + log_err_errno("getsockopt(SO_BINDTODEVICE)"); return rc; } @@ -535,7 +535,7 @@ static int set_freebind(int sd, int version) break; case AF_INET6: if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) { - log_err_errno("setsockopt(IPV6_FREEBIND"); + log_err_errno("setsockopt(IPV6_FREEBIND)"); rc = -1; } break; @@ -812,7 +812,7 @@ static int convert_addr(struct sock_args *args, const char *_str, sep++; if (str_to_uint(sep, 1, pfx_len_max, &args->prefix_len) != 0) { - fprintf(stderr, "Invalid port\n"); + fprintf(stderr, "Invalid prefix length\n"); return 1; } } else { @@ -1272,7 +1272,7 @@ static int msg_loop(int client, int sd, void *addr, socklen_t alen, } } - nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1; + nfds = interactive ? MAX(fileno(stdin), sd) + 1 : sd + 1; while (1) { FD_ZERO(&rfds); FD_SET(sd, &rfds); @@ -1492,7 +1492,7 @@ static int lsock_init(struct sock_args *args) sd = socket(args->version, args->type, args->protocol); if (sd < 0) { log_err_errno("Error opening socket"); - return -1; + return -1; } if (set_reuseaddr(sd) != 0) @@ -1912,7 +1912,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args) * waiting to be told when to continue */ if (read(fd, &buf, sizeof(buf)) <= 0) { - log_err_errno("Failed to read IPC status from status"); + log_err_errno("Failed to read IPC status from pipe"); return 1; } if (!buf) { diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index 93d9d914529b..5c66421ab8aa 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -2,8 +2,9 @@ # SPDX-License-Identifier: GPL-2.0 import time +from os import system from lib.py import ksft_run, ksft_exit, ksft_pr -from lib.py import ksft_eq, ksft_ge, ksft_busy_wait +from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_busy_wait from lib.py import NetdevFamily, NetdevSimDev, ip @@ -18,6 +19,160 @@ def lo_check(nf) -> None: ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0) +def napi_list_check(nf) -> None: + with NetdevSimDev(queue_count=100) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 100) + + for q in [50, 0, 99]: + for i in range(4): + nsim.dfs_write("queue_reset", f"{q} {i}") + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 100, + comment=f"queue count after reset queue {q} mode {i}") + +def napi_set_threaded(nf) -> None: + """ + Test that verifies various cases of napi threaded + set and unset at napi and device level. + """ + with NetdevSimDev(queue_count=2) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + # set napi threaded and verify + nf.napi_set({'id': napi0_id, 'threaded': "enabled"}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + + # check it is not set for napi1 + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + + ip(f"link set dev {nsim.ifname} down") + ip(f"link set dev {nsim.ifname} up") + + # verify if napi threaded is still set + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + + # check it is still not set for napi1 + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + + # unset napi threaded and verify + nf.napi_set({'id': napi0_id, 'threaded': "disabled"}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "disabled") + ksft_eq(napi0.get('pid'), None) + + # set threaded at device level + system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is set for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "enabled") + ksft_ne(napi1.get('pid'), None) + + # unset threaded at device level + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "disabled") + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + + # set napi threaded for napi0 + nf.napi_set({'id': napi0_id, 'threaded': 1}) + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + + # unset threaded at device level + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "disabled") + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + +def dev_set_threaded(nf) -> None: + """ + Test that verifies various cases of napi threaded + set and unset at device level using sysfs. + """ + with NetdevSimDev(queue_count=2) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + + napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True) + ksft_eq(len(napis), 2) + + napi0_id = napis[0]['id'] + napi1_id = napis[1]['id'] + + # set threaded + system(f"echo 1 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is set for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "enabled") + ksft_ne(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "enabled") + ksft_ne(napi1.get('pid'), None) + + # unset threaded + system(f"echo 0 > /sys/class/net/{nsim.ifname}/threaded") + + # check napi threaded is unset for both napis + napi0 = nf.napi_get({'id': napi0_id}) + ksft_eq(napi0['threaded'], "disabled") + ksft_eq(napi0.get('pid'), None) + napi1 = nf.napi_get({'id': napi1_id}) + ksft_eq(napi1['threaded'], "disabled") + ksft_eq(napi1.get('pid'), None) + +def nsim_rxq_reset_down(nf) -> None: + """ + Test that the queue API supports resetting a queue + while the interface is down. We should convert this + test to testing real HW once more devices support + queue API. + """ + with NetdevSimDev(queue_count=4) as nsimdev: + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} down") + for i in [0, 2, 3]: + nsim.dfs_write("queue_reset", f"1 {i}") + + def page_pool_check(nf) -> None: with NetdevSimDev() as nsimdev: nsim = nsimdev.nsims[0] @@ -89,7 +244,8 @@ def page_pool_check(nf) -> None: def main() -> None: nf = NetdevFamily() - ksft_run([empty_check, lo_check, page_pool_check], + ksft_run([empty_check, lo_check, page_pool_check, napi_list_check, + dev_set_threaded, napi_set_threaded, nsim_rxq_reset_down], args=(nf, )) ksft_exit() diff --git a/tools/testing/selftests/net/openvswitch/Makefile b/tools/testing/selftests/net/openvswitch/Makefile index 2f1508abc826..3fd1da2ec07d 100644 --- a/tools/testing/selftests/net/openvswitch/Makefile +++ b/tools/testing/selftests/net/openvswitch/Makefile @@ -2,7 +2,7 @@ top_srcdir = ../../../../.. -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) +CFLAGS += -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES) TEST_PROGS := openvswitch.sh diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index cc0bfae2bafa..b327d3061ed5 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -25,6 +25,7 @@ tests=" nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT netlink_checks ovsnl: validate netlink attrs and settings upcall_interfaces ovs: test the upcall interfaces + tunnel_metadata ovs: test extraction of tunnel metadata drop_reason drop: test drop reasons are emitted psample psample: Sampling packets with psample" @@ -113,13 +114,13 @@ ovs_add_dp () { } ovs_add_if () { - info "Adding IF to DP: br:$2 if:$3" - if [ "$4" != "-u" ]; then - ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if "$2" "$3" \ - || return 1 + info "Adding IF to DP: br:$3 if:$4 ($2)" + if [ "$5" != "-u" ]; then + ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if \ + -t "$2" "$3" "$4" || return 1 else python3 $ovs_base/ovs-dpctl.py add-if \ - -u "$2" "$3" >$ovs_dir/$3.out 2>$ovs_dir/$3.err & + -u -t "$2" "$3" "$4" >$ovs_dir/$4.out 2>$ovs_dir/$4.err & pid=$! on_exit "ovs_sbx $1 kill -TERM $pid 2>/dev/null" fi @@ -166,13 +167,15 @@ ovs_add_netns_and_veths () { fi if [ "$7" != "-u" ]; then - ovs_add_if "$1" "$2" "$4" || return 1 + ovs_add_if "$1" "netdev" "$2" "$4" || return 1 else - ovs_add_if "$1" "$2" "$4" -u || return 1 + ovs_add_if "$1" "netdev" "$2" "$4" -u || return 1 fi - [ $TRACING -eq 1 ] && ovs_netns_spawn_daemon "$1" "$ns" \ - tcpdump -i any -s 65535 + if [ $TRACING -eq 1 ]; then + ovs_netns_spawn_daemon "$1" "$3" tcpdump -l -i any -s 6553 + ovs_wait grep -q "listening on any" ${ovs_dir}/stderr + fi return 0 } @@ -328,6 +331,11 @@ test_psample() { # - drop packets and verify the right drop reason is reported test_drop_reason() { which perf >/dev/null 2>&1 || return $ksft_skip + which pahole >/dev/null 2>&1 || return $ksft_skip + + ovs_drop_subsys=$(pahole -C skb_drop_reason_subsys | + awk '/OPENVSWITCH/ { print $3; }' | + tr -d ,) sbx_add "test_drop_reason" || return $? @@ -371,7 +379,7 @@ test_drop_reason() { "in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop' ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20 - ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION + ovs_drop_reason_count 0x${ovs_drop_subsys}0001 # OVS_DROP_FLOW_ACTION if [[ "$?" -ne "2" ]]; then info "Did not detect expected drops: $?" return 1 @@ -388,7 +396,7 @@ test_drop_reason() { ovs_drop_record_and_run \ "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000 - ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR + ovs_drop_reason_count 0x${ovs_drop_subsys}0004 # OVS_DROP_EXPLICIT_ACTION_ERROR if [[ "$?" -ne "1" ]]; then info "Did not detect expected explicit error drops: $?" return 1 @@ -396,7 +404,7 @@ test_drop_reason() { ovs_drop_record_and_run \ "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000 - ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION + ovs_drop_reason_count 0x${ovs_drop_subsys}0003 # OVS_DROP_EXPLICIT_ACTION if [[ "$?" -ne "1" ]]; then info "Did not detect expected explicit drops: $?" return 1 @@ -749,6 +757,79 @@ test_upcall_interfaces() { return 0 } +ovs_add_kernel_tunnel() { + local sbxname=$1; shift + local ns=$1; shift + local tnl_type=$1; shift + local name=$1; shift + local addr=$1; shift + + info "setting up kernel ${tnl_type} tunnel ${name}" + ovs_sbx "${sbxname}" ip -netns ${ns} link add dev ${name} type ${tnl_type} $* || return 1 + on_exit "ovs_sbx ${sbxname} ip -netns ${ns} link del ${name} >/dev/null 2>&1" + ovs_sbx "${sbxname}" ip -netns ${ns} addr add dev ${name} ${addr} || return 1 + ovs_sbx "${sbxname}" ip -netns ${ns} link set dev ${name} mtu 1450 up || return 1 +} + +test_tunnel_metadata() { + which arping >/dev/null 2>&1 || return $ksft_skip + + sbxname="test_tunnel_metadata" + sbx_add "${sbxname}" || return 1 + + info "setting up new DP" + ovs_add_dp "${sbxname}" tdp0 -V 2:1 || return 1 + + ovs_add_netns_and_veths "${sbxname}" tdp0 tns left0 l0 \ + 172.31.110.1/24 || return 1 + + info "removing veth interface from openvswitch and setting IP" + ovs_del_if "${sbxname}" tdp0 left0 || return 1 + ovs_sbx "${sbxname}" ip addr add 172.31.110.2/24 dev left0 || return 1 + ovs_sbx "${sbxname}" ip link set left0 up || return 1 + + info "setting up tunnel port in openvswitch" + ovs_add_if "${sbxname}" "vxlan" tdp0 ovs-vxlan0 -u || return 1 + on_exit "ovs_sbx ${sbxname} ip link del ovs-vxlan0" + ovs_wait ip link show ovs-vxlan0 &>/dev/null || return 1 + ovs_sbx "${sbxname}" ip link set ovs-vxlan0 up || return 1 + + configs=$(echo ' + 1 172.31.221.1/24 1155332 32 set udpcsum flags\(df\|csum\) + 2 172.31.222.1/24 1234567 45 set noudpcsum flags\(df\) + 3 172.31.223.1/24 1020304 23 unset udpcsum flags\(csum\) + 4 172.31.224.1/24 1357986 15 unset noudpcsum' | sed '/^$/d') + + while read -r i addr id ttl df csum flags; do + ovs_add_kernel_tunnel "${sbxname}" tns vxlan vxlan${i} ${addr} \ + remote 172.31.110.2 id ${id} dstport 4789 \ + ttl ${ttl} df ${df} ${csum} || return 1 + done <<< "${configs}" + + ovs_wait grep -q 'listening on upcall packet handler' \ + ${ovs_dir}/ovs-vxlan0.out || return 1 + + info "sending arping" + for i in 1 2 3 4; do + ovs_sbx "${sbxname}" ip netns exec tns \ + arping -I vxlan${i} 172.31.22${i}.2 -c 1 \ + >${ovs_dir}/arping.stdout 2>${ovs_dir}/arping.stderr + done + + info "checking that received decapsulated packets carry correct metadata" + while read -r i addr id ttl df csum flags; do + arp_hdr="arp\\(sip=172.31.22${i}.1,tip=172.31.22${i}.2,op=1,sha=" + addrs="src=172.31.110.1,dst=172.31.110.2" + ports="tp_src=[0-9]*,tp_dst=4789" + tnl_md="tunnel\\(tun_id=${id},${addrs},ttl=${ttl},${ports},${flags}\\)" + + ovs_sbx "${sbxname}" grep -qE "MISS upcall.*${tnl_md}.*${arp_hdr}" \ + ${ovs_dir}/ovs-vxlan0.out || return 1 + done <<< "${configs}" + + return 0 +} + run_test() { ( tname="$1" diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index 8a0396bfaf99..b521e0dea506 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -1877,7 +1877,7 @@ class OvsPacket(GenericNetlinkSocket): elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_EXECUTE: up.execute(msg) else: - print("Unkonwn cmd: %d" % msg["cmd"]) + print("Unknown cmd: %d" % msg["cmd"]) except NetlinkError as ne: raise ne diff --git a/tools/testing/selftests/net/ovpn/.gitignore b/tools/testing/selftests/net/ovpn/.gitignore new file mode 100644 index 000000000000..ee44c081ca7c --- /dev/null +++ b/tools/testing/selftests/net/ovpn/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0+ +ovpn-cli diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile new file mode 100644 index 000000000000..dbe0388c8512 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/Makefile @@ -0,0 +1,34 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +CFLAGS = -pedantic -Wextra -Wall -Wl,--no-as-needed -g -O0 -ggdb $(KHDR_INCLUDES) +VAR_CFLAGS = $(shell pkg-config --cflags libnl-3.0 libnl-genl-3.0 2>/dev/null) +ifeq ($(VAR_CFLAGS),) +VAR_CFLAGS = -I/usr/include/libnl3 +endif +CFLAGS += $(VAR_CFLAGS) + + +LDLIBS = -lmbedtls -lmbedcrypto +VAR_LDLIBS = $(shell pkg-config --libs libnl-3.0 libnl-genl-3.0 2>/dev/null) +ifeq ($(VAR_LDLIBS),) +VAR_LDLIBS = -lnl-genl-3 -lnl-3 +endif +LDLIBS += $(VAR_LDLIBS) + + +TEST_FILES = common.sh + +TEST_PROGS := \ + test-chachapoly.sh \ + test-close-socket-tcp.sh \ + test-close-socket.sh \ + test-float.sh \ + test-large-mtu.sh \ + test-tcp.sh \ + test.sh \ +# end of TEST_PROGS + +TEST_GEN_FILES := ovpn-cli + +include ../../lib.mk diff --git a/tools/testing/selftests/net/ovpn/common.sh b/tools/testing/selftests/net/ovpn/common.sh new file mode 100644 index 000000000000..88869c675d03 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/common.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +UDP_PEERS_FILE=${UDP_PEERS_FILE:-udp_peers.txt} +TCP_PEERS_FILE=${TCP_PEERS_FILE:-tcp_peers.txt} +OVPN_CLI=${OVPN_CLI:-./ovpn-cli} +ALG=${ALG:-aes} +PROTO=${PROTO:-UDP} +FLOAT=${FLOAT:-0} + +LAN_IP="11.11.11.11" + +create_ns() { + ip netns add peer${1} +} + +setup_ns() { + MODE="P2P" + + if [ ${1} -eq 0 ]; then + MODE="MP" + for p in $(seq 1 ${NUM_PEERS}); do + ip link add veth${p} netns peer0 type veth peer name veth${p} netns peer${p} + + ip -n peer0 addr add 10.10.${p}.1/24 dev veth${p} + ip -n peer0 addr add fd00:0:0:${p}::1/64 dev veth${p} + ip -n peer0 link set veth${p} up + + ip -n peer${p} addr add 10.10.${p}.2/24 dev veth${p} + ip -n peer${p} addr add fd00:0:0:${p}::2/64 dev veth${p} + ip -n peer${p} link set veth${p} up + done + fi + + ip netns exec peer${1} ${OVPN_CLI} new_iface tun${1} $MODE + ip -n peer${1} addr add ${2} dev tun${1} + # add a secondary IP to peer 1, to test a LAN behind a client + if [ ${1} -eq 1 -a -n "${LAN_IP}" ]; then + ip -n peer${1} addr add ${LAN_IP} dev tun${1} + ip -n peer0 route add ${LAN_IP} via $(echo ${2} |sed -e s'!/.*!!') dev tun0 + fi + if [ -n "${3}" ]; then + ip -n peer${1} link set mtu ${3} dev tun${1} + fi + ip -n peer${1} link set tun${1} up +} + +add_peer() { + if [ "${PROTO}" == "UDP" ]; then + if [ ${1} -eq 0 ]; then + ip netns exec peer0 ${OVPN_CLI} new_multi_peer tun0 1 ${UDP_PEERS_FILE} + + for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 ${ALG} 0 \ + data64.key + done + else + RADDR=$(awk "NR == ${1} {print \$2}" ${UDP_PEERS_FILE}) + RPORT=$(awk "NR == ${1} {print \$3}" ${UDP_PEERS_FILE}) + LPORT=$(awk "NR == ${1} {print \$5}" ${UDP_PEERS_FILE}) + ip netns exec peer${1} ${OVPN_CLI} new_peer tun${1} ${1} ${LPORT} \ + ${RADDR} ${RPORT} + ip netns exec peer${1} ${OVPN_CLI} new_key tun${1} ${1} 1 0 ${ALG} 1 \ + data64.key + fi + else + if [ ${1} -eq 0 ]; then + (ip netns exec peer0 ${OVPN_CLI} listen tun0 1 ${TCP_PEERS_FILE} && { + for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 \ + ${ALG} 0 data64.key + done + }) & + sleep 5 + else + ip netns exec peer${1} ${OVPN_CLI} connect tun${1} ${1} 10.10.${1}.1 1 \ + data64.key + fi + fi +} + +cleanup() { + # some ovpn-cli processes sleep in background so they need manual poking + killall $(basename ${OVPN_CLI}) 2>/dev/null || true + + # netns peer0 is deleted without erasing ifaces first + for p in $(seq 1 10); do + ip -n peer${p} link set tun${p} down 2>/dev/null || true + ip netns exec peer${p} ${OVPN_CLI} del_iface tun${p} 2>/dev/null || true + done + for p in $(seq 1 10); do + ip -n peer0 link del veth${p} 2>/dev/null || true + done + for p in $(seq 0 10); do + ip netns del peer${p} 2>/dev/null || true + done +} + +if [ "${PROTO}" == "UDP" ]; then + NUM_PEERS=${NUM_PEERS:-$(wc -l ${UDP_PEERS_FILE} | awk '{print $1}')} +else + NUM_PEERS=${NUM_PEERS:-$(wc -l ${TCP_PEERS_FILE} | awk '{print $1}')} +fi + + diff --git a/tools/testing/selftests/net/ovpn/config b/tools/testing/selftests/net/ovpn/config new file mode 100644 index 000000000000..42699740936d --- /dev/null +++ b/tools/testing/selftests/net/ovpn/config @@ -0,0 +1,10 @@ +CONFIG_CRYPTO=y +CONFIG_CRYPTO_AES=y +CONFIG_CRYPTO_CHACHA20POLY1305=y +CONFIG_CRYPTO_GCM=y +CONFIG_DST_CACHE=y +CONFIG_INET=y +CONFIG_NET=y +CONFIG_NET_UDP_TUNNEL=y +CONFIG_OVPN=m +CONFIG_STREAM_PARSER=y diff --git a/tools/testing/selftests/net/ovpn/data64.key b/tools/testing/selftests/net/ovpn/data64.key new file mode 100644 index 000000000000..a99e88c4e290 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/data64.key @@ -0,0 +1,5 @@ +jRqMACN7d7/aFQNT8S7jkrBD8uwrgHbG5OQZP2eu4R1Y7tfpS2bf5RHv06Vi163CGoaIiTX99R3B +ia9ycAH8Wz1+9PWv51dnBLur9jbShlgZ2QHLtUc4a/gfT7zZwULXuuxdLnvR21DDeMBaTbkgbai9 +uvAa7ne1liIgGFzbv+Bas4HDVrygxIxuAnP5Qgc3648IJkZ0QEXPF+O9f0n5+QIvGCxkAUVx+5K6 +KIs+SoeWXnAopELmoGSjUpFtJbagXK82HfdqpuUxT2Tnuef0/14SzVE/vNleBNu2ZbyrSAaah8tE +BofkPJUBFY+YQcfZNM5Dgrw3i+Bpmpq/gpdg5w== diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c new file mode 100644 index 000000000000..0f3babf19fd0 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c @@ -0,0 +1,2387 @@ +// SPDX-License-Identifier: GPL-2.0 +/* OpenVPN data channel accelerator + * + * Copyright (C) 2020-2025 OpenVPN, Inc. + * + * Author: Antonio Quartulli <antonio@openvpn.net> + */ + +#include <stdio.h> +#include <inttypes.h> +#include <stdbool.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <net/if.h> +#include <netinet/in.h> +#include <time.h> + +#include <linux/ovpn.h> +#include <linux/types.h> +#include <linux/netlink.h> + +#include <netlink/socket.h> +#include <netlink/netlink.h> +#include <netlink/genl/genl.h> +#include <netlink/genl/family.h> +#include <netlink/genl/ctrl.h> + +#include <mbedtls/base64.h> +#include <mbedtls/error.h> + +#include <sys/socket.h> + +#include "kselftest.h" + +/* defines to make checkpatch happy */ +#define strscpy strncpy + +/* libnl < 3.5.0 does not set the NLA_F_NESTED on its own, therefore we + * have to explicitly do it to prevent the kernel from failing upon + * parsing of the message + */ +#define nla_nest_start(_msg, _type) \ + nla_nest_start(_msg, (_type) | NLA_F_NESTED) + +/* libnl < 3.11.0 does not implement nla_get_uint() */ +uint64_t ovpn_nla_get_uint(struct nlattr *attr) +{ + if (nla_len(attr) == sizeof(uint32_t)) + return nla_get_u32(attr); + else + return nla_get_u64(attr); +} + +typedef int (*ovpn_nl_cb)(struct nl_msg *msg, void *arg); + +enum ovpn_key_direction { + KEY_DIR_IN = 0, + KEY_DIR_OUT, +}; + +#define KEY_LEN (256 / 8) +#define NONCE_LEN 8 + +#define PEER_ID_UNDEF 0x00FFFFFF +#define MAX_PEERS 10 + +struct nl_ctx { + struct nl_sock *nl_sock; + struct nl_msg *nl_msg; + struct nl_cb *nl_cb; + + int ovpn_dco_id; +}; + +enum ovpn_cmd { + CMD_INVALID, + CMD_NEW_IFACE, + CMD_DEL_IFACE, + CMD_LISTEN, + CMD_CONNECT, + CMD_NEW_PEER, + CMD_NEW_MULTI_PEER, + CMD_SET_PEER, + CMD_DEL_PEER, + CMD_GET_PEER, + CMD_NEW_KEY, + CMD_DEL_KEY, + CMD_GET_KEY, + CMD_SWAP_KEYS, + CMD_LISTEN_MCAST, +}; + +struct ovpn_ctx { + enum ovpn_cmd cmd; + + __u8 key_enc[KEY_LEN]; + __u8 key_dec[KEY_LEN]; + __u8 nonce[NONCE_LEN]; + + enum ovpn_cipher_alg cipher; + + sa_family_t sa_family; + + unsigned long peer_id; + unsigned long lport; + + union { + struct sockaddr_in in4; + struct sockaddr_in6 in6; + } remote; + + union { + struct sockaddr_in in4; + struct sockaddr_in6 in6; + } peer_ip; + + bool peer_ip_set; + + unsigned int ifindex; + char ifname[IFNAMSIZ]; + enum ovpn_mode mode; + bool mode_set; + + int socket; + int cli_sockets[MAX_PEERS]; + + __u32 keepalive_interval; + __u32 keepalive_timeout; + + enum ovpn_key_direction key_dir; + enum ovpn_key_slot key_slot; + int key_id; + + const char *peers_file; +}; + +static int ovpn_nl_recvmsgs(struct nl_ctx *ctx) +{ + int ret; + + ret = nl_recvmsgs(ctx->nl_sock, ctx->nl_cb); + + switch (ret) { + case -NLE_INTR: + fprintf(stderr, + "netlink received interrupt due to signal - ignoring\n"); + break; + case -NLE_NOMEM: + fprintf(stderr, "netlink out of memory error\n"); + break; + case -NLE_AGAIN: + fprintf(stderr, + "netlink reports blocking read - aborting wait\n"); + break; + default: + if (ret) + fprintf(stderr, "netlink reports error (%d): %s\n", + ret, nl_geterror(-ret)); + break; + } + + return ret; +} + +static struct nl_ctx *nl_ctx_alloc_flags(struct ovpn_ctx *ovpn, int cmd, + int flags) +{ + struct nl_ctx *ctx; + int err, ret; + + ctx = calloc(1, sizeof(*ctx)); + if (!ctx) + return NULL; + + ctx->nl_sock = nl_socket_alloc(); + if (!ctx->nl_sock) { + fprintf(stderr, "cannot allocate netlink socket\n"); + goto err_free; + } + + nl_socket_set_buffer_size(ctx->nl_sock, 8192, 8192); + + ret = genl_connect(ctx->nl_sock); + if (ret) { + fprintf(stderr, "cannot connect to generic netlink: %s\n", + nl_geterror(ret)); + goto err_sock; + } + + /* enable Extended ACK for detailed error reporting */ + err = 1; + setsockopt(nl_socket_get_fd(ctx->nl_sock), SOL_NETLINK, NETLINK_EXT_ACK, + &err, sizeof(err)); + + ctx->ovpn_dco_id = genl_ctrl_resolve(ctx->nl_sock, OVPN_FAMILY_NAME); + if (ctx->ovpn_dco_id < 0) { + fprintf(stderr, "cannot find ovpn_dco netlink component: %d\n", + ctx->ovpn_dco_id); + goto err_free; + } + + ctx->nl_msg = nlmsg_alloc(); + if (!ctx->nl_msg) { + fprintf(stderr, "cannot allocate netlink message\n"); + goto err_sock; + } + + ctx->nl_cb = nl_cb_alloc(NL_CB_DEFAULT); + if (!ctx->nl_cb) { + fprintf(stderr, "failed to allocate netlink callback\n"); + goto err_msg; + } + + nl_socket_set_cb(ctx->nl_sock, ctx->nl_cb); + + genlmsg_put(ctx->nl_msg, 0, 0, ctx->ovpn_dco_id, 0, flags, cmd, 0); + + if (ovpn->ifindex > 0) + NLA_PUT_U32(ctx->nl_msg, OVPN_A_IFINDEX, ovpn->ifindex); + + return ctx; +nla_put_failure: +err_msg: + nlmsg_free(ctx->nl_msg); +err_sock: + nl_socket_free(ctx->nl_sock); +err_free: + free(ctx); + return NULL; +} + +static struct nl_ctx *nl_ctx_alloc(struct ovpn_ctx *ovpn, int cmd) +{ + return nl_ctx_alloc_flags(ovpn, cmd, 0); +} + +static void nl_ctx_free(struct nl_ctx *ctx) +{ + if (!ctx) + return; + + nl_socket_free(ctx->nl_sock); + nlmsg_free(ctx->nl_msg); + nl_cb_put(ctx->nl_cb); + free(ctx); +} + +static int ovpn_nl_cb_error(struct sockaddr_nl (*nla)__always_unused, + struct nlmsgerr *err, void *arg) +{ + struct nlmsghdr *nlh = (struct nlmsghdr *)err - 1; + struct nlattr *tb_msg[NLMSGERR_ATTR_MAX + 1]; + int len = nlh->nlmsg_len; + struct nlattr *attrs; + int *ret = arg; + int ack_len = sizeof(*nlh) + sizeof(int) + sizeof(*nlh); + + *ret = err->error; + + if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS)) + return NL_STOP; + + if (!(nlh->nlmsg_flags & NLM_F_CAPPED)) + ack_len += err->msg.nlmsg_len - sizeof(*nlh); + + if (len <= ack_len) + return NL_STOP; + + attrs = (void *)((uint8_t *)nlh + ack_len); + len -= ack_len; + + nla_parse(tb_msg, NLMSGERR_ATTR_MAX, attrs, len, NULL); + if (tb_msg[NLMSGERR_ATTR_MSG]) { + len = strnlen((char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]), + nla_len(tb_msg[NLMSGERR_ATTR_MSG])); + fprintf(stderr, "kernel error: %*s\n", len, + (char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG])); + } + + if (tb_msg[NLMSGERR_ATTR_MISS_NEST]) { + fprintf(stderr, "missing required nesting type %u\n", + nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_NEST])); + } + + if (tb_msg[NLMSGERR_ATTR_MISS_TYPE]) { + fprintf(stderr, "missing required attribute type %u\n", + nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_TYPE])); + } + + return NL_STOP; +} + +static int ovpn_nl_cb_finish(struct nl_msg (*msg)__always_unused, + void *arg) +{ + int *status = arg; + + *status = 0; + return NL_SKIP; +} + +static int ovpn_nl_cb_ack(struct nl_msg (*msg)__always_unused, + void *arg) +{ + int *status = arg; + + *status = 0; + return NL_STOP; +} + +static int ovpn_nl_msg_send(struct nl_ctx *ctx, ovpn_nl_cb cb) +{ + int status = 1; + + nl_cb_err(ctx->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &status); + nl_cb_set(ctx->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish, + &status); + nl_cb_set(ctx->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_ack, &status); + + if (cb) + nl_cb_set(ctx->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, ctx); + + nl_send_auto_complete(ctx->nl_sock, ctx->nl_msg); + + while (status == 1) + ovpn_nl_recvmsgs(ctx); + + if (status < 0) + fprintf(stderr, "failed to send netlink message: %s (%d)\n", + strerror(-status), status); + + return status; +} + +static int ovpn_parse_key(const char *file, struct ovpn_ctx *ctx) +{ + int idx_enc, idx_dec, ret = -1; + unsigned char *ckey = NULL; + __u8 *bkey = NULL; + size_t olen = 0; + long ckey_len; + FILE *fp; + + fp = fopen(file, "r"); + if (!fp) { + fprintf(stderr, "cannot open: %s\n", file); + return -1; + } + + /* get file size */ + fseek(fp, 0L, SEEK_END); + ckey_len = ftell(fp); + rewind(fp); + + /* if the file is longer, let's just read a portion */ + if (ckey_len > 256) + ckey_len = 256; + + ckey = malloc(ckey_len); + if (!ckey) + goto err; + + ret = fread(ckey, 1, ckey_len, fp); + if (ret != ckey_len) { + fprintf(stderr, + "couldn't read enough data from key file: %dbytes read\n", + ret); + goto err; + } + + olen = 0; + ret = mbedtls_base64_decode(NULL, 0, &olen, ckey, ckey_len); + if (ret != MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL) { + char buf[256]; + + mbedtls_strerror(ret, buf, sizeof(buf)); + fprintf(stderr, "unexpected base64 error1: %s (%d)\n", buf, + ret); + + goto err; + } + + bkey = malloc(olen); + if (!bkey) { + fprintf(stderr, "cannot allocate binary key buffer\n"); + goto err; + } + + ret = mbedtls_base64_decode(bkey, olen, &olen, ckey, ckey_len); + if (ret) { + char buf[256]; + + mbedtls_strerror(ret, buf, sizeof(buf)); + fprintf(stderr, "unexpected base64 error2: %s (%d)\n", buf, + ret); + + goto err; + } + + if (olen < 2 * KEY_LEN + NONCE_LEN) { + fprintf(stderr, + "not enough data in key file, found %zdB but needs %dB\n", + olen, 2 * KEY_LEN + NONCE_LEN); + goto err; + } + + switch (ctx->key_dir) { + case KEY_DIR_IN: + idx_enc = 0; + idx_dec = 1; + break; + case KEY_DIR_OUT: + idx_enc = 1; + idx_dec = 0; + break; + default: + goto err; + } + + memcpy(ctx->key_enc, bkey + KEY_LEN * idx_enc, KEY_LEN); + memcpy(ctx->key_dec, bkey + KEY_LEN * idx_dec, KEY_LEN); + memcpy(ctx->nonce, bkey + 2 * KEY_LEN, NONCE_LEN); + + ret = 0; + +err: + fclose(fp); + free(bkey); + free(ckey); + + return ret; +} + +static int ovpn_parse_cipher(const char *cipher, struct ovpn_ctx *ctx) +{ + if (strcmp(cipher, "aes") == 0) + ctx->cipher = OVPN_CIPHER_ALG_AES_GCM; + else if (strcmp(cipher, "chachapoly") == 0) + ctx->cipher = OVPN_CIPHER_ALG_CHACHA20_POLY1305; + else if (strcmp(cipher, "none") == 0) + ctx->cipher = OVPN_CIPHER_ALG_NONE; + else + return -ENOTSUP; + + return 0; +} + +static int ovpn_parse_key_direction(const char *dir, struct ovpn_ctx *ctx) +{ + int in_dir; + + in_dir = strtoll(dir, NULL, 10); + switch (in_dir) { + case KEY_DIR_IN: + case KEY_DIR_OUT: + ctx->key_dir = in_dir; + break; + default: + fprintf(stderr, + "invalid key direction provided. Can be 0 or 1 only\n"); + return -1; + } + + return 0; +} + +static int ovpn_socket(struct ovpn_ctx *ctx, sa_family_t family, int proto) +{ + struct sockaddr_storage local_sock = { 0 }; + struct sockaddr_in6 *in6; + struct sockaddr_in *in; + int ret, s, sock_type; + size_t sock_len; + + if (proto == IPPROTO_UDP) + sock_type = SOCK_DGRAM; + else if (proto == IPPROTO_TCP) + sock_type = SOCK_STREAM; + else + return -EINVAL; + + s = socket(family, sock_type, 0); + if (s < 0) { + perror("cannot create socket"); + return -1; + } + + switch (family) { + case AF_INET: + in = (struct sockaddr_in *)&local_sock; + in->sin_family = family; + in->sin_port = htons(ctx->lport); + in->sin_addr.s_addr = htonl(INADDR_ANY); + sock_len = sizeof(*in); + break; + case AF_INET6: + in6 = (struct sockaddr_in6 *)&local_sock; + in6->sin6_family = family; + in6->sin6_port = htons(ctx->lport); + in6->sin6_addr = in6addr_any; + sock_len = sizeof(*in6); + break; + default: + return -1; + } + + int opt = 1; + + ret = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)); + + if (ret < 0) { + perror("setsockopt for SO_REUSEADDR"); + return ret; + } + + ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)); + if (ret < 0) { + perror("setsockopt for SO_REUSEPORT"); + return ret; + } + + if (family == AF_INET6) { + opt = 0; + if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &opt, + sizeof(opt))) { + perror("failed to set IPV6_V6ONLY"); + return -1; + } + } + + ret = bind(s, (struct sockaddr *)&local_sock, sock_len); + if (ret < 0) { + perror("cannot bind socket"); + goto err_socket; + } + + ctx->socket = s; + ctx->sa_family = family; + return 0; + +err_socket: + close(s); + return -1; +} + +static int ovpn_udp_socket(struct ovpn_ctx *ctx, sa_family_t family) +{ + return ovpn_socket(ctx, family, IPPROTO_UDP); +} + +static int ovpn_listen(struct ovpn_ctx *ctx, sa_family_t family) +{ + int ret; + + ret = ovpn_socket(ctx, family, IPPROTO_TCP); + if (ret < 0) + return ret; + + ret = listen(ctx->socket, 10); + if (ret < 0) { + perror("listen"); + close(ctx->socket); + return -1; + } + + return 0; +} + +static int ovpn_accept(struct ovpn_ctx *ctx) +{ + socklen_t socklen; + int ret; + + socklen = sizeof(ctx->remote); + ret = accept(ctx->socket, (struct sockaddr *)&ctx->remote, &socklen); + if (ret < 0) { + perror("accept"); + goto err; + } + + fprintf(stderr, "Connection received!\n"); + + switch (socklen) { + case sizeof(struct sockaddr_in): + case sizeof(struct sockaddr_in6): + break; + default: + fprintf(stderr, "error: expecting IPv4 or IPv6 connection\n"); + close(ret); + ret = -EINVAL; + goto err; + } + + return ret; +err: + close(ctx->socket); + return ret; +} + +static int ovpn_connect(struct ovpn_ctx *ovpn) +{ + socklen_t socklen; + int s, ret; + + s = socket(ovpn->remote.in4.sin_family, SOCK_STREAM, 0); + if (s < 0) { + perror("cannot create socket"); + return -1; + } + + switch (ovpn->remote.in4.sin_family) { + case AF_INET: + socklen = sizeof(struct sockaddr_in); + break; + case AF_INET6: + socklen = sizeof(struct sockaddr_in6); + break; + default: + return -EOPNOTSUPP; + } + + ret = connect(s, (struct sockaddr *)&ovpn->remote, socklen); + if (ret < 0) { + perror("connect"); + goto err; + } + + fprintf(stderr, "connected\n"); + + ovpn->socket = s; + + return 0; +err: + close(s); + return ret; +} + +static int ovpn_new_peer(struct ovpn_ctx *ovpn, bool is_tcp) +{ + struct nlattr *attr; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_NEW); + if (!ctx) + return -ENOMEM; + + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_SOCKET, ovpn->socket); + + if (!is_tcp) { + switch (ovpn->remote.in4.sin_family) { + case AF_INET: + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV4, + ovpn->remote.in4.sin_addr.s_addr); + NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT, + ovpn->remote.in4.sin_port); + break; + case AF_INET6: + NLA_PUT(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV6, + sizeof(ovpn->remote.in6.sin6_addr), + &ovpn->remote.in6.sin6_addr); + NLA_PUT_U32(ctx->nl_msg, + OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID, + ovpn->remote.in6.sin6_scope_id); + NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT, + ovpn->remote.in6.sin6_port); + break; + default: + fprintf(stderr, + "Invalid family for remote socket address\n"); + goto nla_put_failure; + } + } + + if (ovpn->peer_ip_set) { + switch (ovpn->peer_ip.in4.sin_family) { + case AF_INET: + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_VPN_IPV4, + ovpn->peer_ip.in4.sin_addr.s_addr); + break; + case AF_INET6: + NLA_PUT(ctx->nl_msg, OVPN_A_PEER_VPN_IPV6, + sizeof(struct in6_addr), + &ovpn->peer_ip.in6.sin6_addr); + break; + default: + fprintf(stderr, "Invalid family for peer address\n"); + goto nla_put_failure; + } + } + + nla_nest_end(ctx->nl_msg, attr); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_set_peer(struct ovpn_ctx *ovpn) +{ + struct nlattr *attr; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_SET); + if (!ctx) + return -ENOMEM; + + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_INTERVAL, + ovpn->keepalive_interval); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_TIMEOUT, + ovpn->keepalive_timeout); + nla_nest_end(ctx->nl_msg, attr); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_del_peer(struct ovpn_ctx *ovpn) +{ + struct nlattr *attr; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_DEL); + if (!ctx) + return -ENOMEM; + + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + nla_nest_end(ctx->nl_msg, attr); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_handle_peer(struct nl_msg *msg, void (*arg)__always_unused) +{ + struct nlattr *pattrs[OVPN_A_PEER_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *attrs[OVPN_A_MAX + 1]; + __u16 rport = 0, lport = 0; + + nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!attrs[OVPN_A_PEER]) { + fprintf(stderr, "no packet content in netlink message\n"); + return NL_SKIP; + } + + nla_parse(pattrs, OVPN_A_PEER_MAX, nla_data(attrs[OVPN_A_PEER]), + nla_len(attrs[OVPN_A_PEER]), NULL); + + if (pattrs[OVPN_A_PEER_ID]) + fprintf(stderr, "* Peer %u\n", + nla_get_u32(pattrs[OVPN_A_PEER_ID])); + + if (pattrs[OVPN_A_PEER_SOCKET_NETNSID]) + fprintf(stderr, "\tsocket NetNS ID: %d\n", + nla_get_s32(pattrs[OVPN_A_PEER_SOCKET_NETNSID])); + + if (pattrs[OVPN_A_PEER_VPN_IPV4]) { + char buf[INET_ADDRSTRLEN]; + + inet_ntop(AF_INET, nla_data(pattrs[OVPN_A_PEER_VPN_IPV4]), + buf, sizeof(buf)); + fprintf(stderr, "\tVPN IPv4: %s\n", buf); + } + + if (pattrs[OVPN_A_PEER_VPN_IPV6]) { + char buf[INET6_ADDRSTRLEN]; + + inet_ntop(AF_INET6, nla_data(pattrs[OVPN_A_PEER_VPN_IPV6]), + buf, sizeof(buf)); + fprintf(stderr, "\tVPN IPv6: %s\n", buf); + } + + if (pattrs[OVPN_A_PEER_LOCAL_PORT]) + lport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_LOCAL_PORT])); + + if (pattrs[OVPN_A_PEER_REMOTE_PORT]) + rport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_REMOTE_PORT])); + + if (pattrs[OVPN_A_PEER_REMOTE_IPV6]) { + void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV6]; + char buf[INET6_ADDRSTRLEN]; + int scope_id = -1; + + if (pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]) { + void *p = pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]; + + scope_id = nla_get_u32(p); + } + + inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf)); + fprintf(stderr, "\tRemote: %s:%hu (scope-id: %u)\n", buf, rport, + scope_id); + + if (pattrs[OVPN_A_PEER_LOCAL_IPV6]) { + void *ip = pattrs[OVPN_A_PEER_LOCAL_IPV6]; + + inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf)); + fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport); + } + } + + if (pattrs[OVPN_A_PEER_REMOTE_IPV4]) { + void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV4]; + char buf[INET_ADDRSTRLEN]; + + inet_ntop(AF_INET, nla_data(ip), buf, sizeof(buf)); + fprintf(stderr, "\tRemote: %s:%hu\n", buf, rport); + + if (pattrs[OVPN_A_PEER_LOCAL_IPV4]) { + void *p = pattrs[OVPN_A_PEER_LOCAL_IPV4]; + + inet_ntop(AF_INET, nla_data(p), buf, sizeof(buf)); + fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport); + } + } + + if (pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]) { + void *p = pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]; + + fprintf(stderr, "\tKeepalive interval: %u sec\n", + nla_get_u32(p)); + } + + if (pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]) + fprintf(stderr, "\tKeepalive timeout: %u sec\n", + nla_get_u32(pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT])); + + if (pattrs[OVPN_A_PEER_VPN_RX_BYTES]) + fprintf(stderr, "\tVPN RX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_BYTES])); + + if (pattrs[OVPN_A_PEER_VPN_TX_BYTES]) + fprintf(stderr, "\tVPN TX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_BYTES])); + + if (pattrs[OVPN_A_PEER_VPN_RX_PACKETS]) + fprintf(stderr, "\tVPN RX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_PACKETS])); + + if (pattrs[OVPN_A_PEER_VPN_TX_PACKETS]) + fprintf(stderr, "\tVPN TX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_PACKETS])); + + if (pattrs[OVPN_A_PEER_LINK_RX_BYTES]) + fprintf(stderr, "\tLINK RX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_BYTES])); + + if (pattrs[OVPN_A_PEER_LINK_TX_BYTES]) + fprintf(stderr, "\tLINK TX bytes: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_BYTES])); + + if (pattrs[OVPN_A_PEER_LINK_RX_PACKETS]) + fprintf(stderr, "\tLINK RX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_PACKETS])); + + if (pattrs[OVPN_A_PEER_LINK_TX_PACKETS]) + fprintf(stderr, "\tLINK TX packets: %" PRIu64 "\n", + ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_PACKETS])); + + return NL_SKIP; +} + +static int ovpn_get_peer(struct ovpn_ctx *ovpn) +{ + int flags = 0, ret = -1; + struct nlattr *attr; + struct nl_ctx *ctx; + + if (ovpn->peer_id == PEER_ID_UNDEF) + flags = NLM_F_DUMP; + + ctx = nl_ctx_alloc_flags(ovpn, OVPN_CMD_PEER_GET, flags); + if (!ctx) + return -ENOMEM; + + if (ovpn->peer_id != PEER_ID_UNDEF) { + attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id); + nla_nest_end(ctx->nl_msg, attr); + } + + ret = ovpn_nl_msg_send(ctx, ovpn_handle_peer); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_new_key(struct ovpn_ctx *ovpn) +{ + struct nlattr *keyconf, *key_dir; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_NEW); + if (!ctx) + return -ENOMEM; + + keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_KEY_ID, ovpn->key_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_CIPHER_ALG, ovpn->cipher); + + key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_ENCRYPT_DIR); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_enc); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce); + nla_nest_end(ctx->nl_msg, key_dir); + + key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_DECRYPT_DIR); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_dec); + NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce); + nla_nest_end(ctx->nl_msg, key_dir); + + nla_nest_end(ctx->nl_msg, keyconf); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_del_key(struct ovpn_ctx *ovpn) +{ + struct nlattr *keyconf; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_DEL); + if (!ctx) + return -ENOMEM; + + keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot); + nla_nest_end(ctx->nl_msg, keyconf); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_handle_key(struct nl_msg *msg, void (*arg)__always_unused) +{ + struct nlattr *kattrs[OVPN_A_KEYCONF_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *attrs[OVPN_A_MAX + 1]; + + nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!attrs[OVPN_A_KEYCONF]) { + fprintf(stderr, "no packet content in netlink message\n"); + return NL_SKIP; + } + + nla_parse(kattrs, OVPN_A_KEYCONF_MAX, nla_data(attrs[OVPN_A_KEYCONF]), + nla_len(attrs[OVPN_A_KEYCONF]), NULL); + + if (kattrs[OVPN_A_KEYCONF_PEER_ID]) + fprintf(stderr, "* Peer %u\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_PEER_ID])); + if (kattrs[OVPN_A_KEYCONF_SLOT]) { + fprintf(stderr, "\t- Slot: "); + switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])) { + case OVPN_KEY_SLOT_PRIMARY: + fprintf(stderr, "primary\n"); + break; + case OVPN_KEY_SLOT_SECONDARY: + fprintf(stderr, "secondary\n"); + break; + default: + fprintf(stderr, "invalid (%u)\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])); + break; + } + } + if (kattrs[OVPN_A_KEYCONF_KEY_ID]) + fprintf(stderr, "\t- Key ID: %u\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_KEY_ID])); + if (kattrs[OVPN_A_KEYCONF_CIPHER_ALG]) { + fprintf(stderr, "\t- Cipher: "); + switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])) { + case OVPN_CIPHER_ALG_NONE: + fprintf(stderr, "none\n"); + break; + case OVPN_CIPHER_ALG_AES_GCM: + fprintf(stderr, "aes-gcm\n"); + break; + case OVPN_CIPHER_ALG_CHACHA20_POLY1305: + fprintf(stderr, "chacha20poly1305\n"); + break; + default: + fprintf(stderr, "invalid (%u)\n", + nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])); + break; + } + } + + return NL_SKIP; +} + +static int ovpn_get_key(struct ovpn_ctx *ovpn) +{ + struct nlattr *keyconf; + struct nl_ctx *ctx; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_GET); + if (!ctx) + return -ENOMEM; + + keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot); + nla_nest_end(ctx->nl_msg, keyconf); + + ret = ovpn_nl_msg_send(ctx, ovpn_handle_key); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +static int ovpn_swap_keys(struct ovpn_ctx *ovpn) +{ + struct nl_ctx *ctx; + struct nlattr *kc; + int ret = -1; + + ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_SWAP); + if (!ctx) + return -ENOMEM; + + kc = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF); + NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id); + nla_nest_end(ctx->nl_msg, kc); + + ret = ovpn_nl_msg_send(ctx, NULL); +nla_put_failure: + nl_ctx_free(ctx); + return ret; +} + +/* Helper function used to easily add attributes to a rtnl message */ +static int ovpn_addattr(struct nlmsghdr *n, int maxlen, int type, + const void *data, int alen) +{ + int len = RTA_LENGTH(alen); + struct rtattr *rta; + + if ((int)(NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len)) > maxlen) { + fprintf(stderr, "%s: rtnl: message exceeded bound of %d\n", + __func__, maxlen); + return -EMSGSIZE; + } + + rta = nlmsg_tail(n); + rta->rta_type = type; + rta->rta_len = len; + + if (!data) + memset(RTA_DATA(rta), 0, alen); + else + memcpy(RTA_DATA(rta), data, alen); + + n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len); + + return 0; +} + +static struct rtattr *ovpn_nest_start(struct nlmsghdr *msg, size_t max_size, + int attr) +{ + struct rtattr *nest = nlmsg_tail(msg); + + if (ovpn_addattr(msg, max_size, attr, NULL, 0) < 0) + return NULL; + + return nest; +} + +static void ovpn_nest_end(struct nlmsghdr *msg, struct rtattr *nest) +{ + nest->rta_len = (uint8_t *)nlmsg_tail(msg) - (uint8_t *)nest; +} + +#define RT_SNDBUF_SIZE (1024 * 2) +#define RT_RCVBUF_SIZE (1024 * 4) + +/* Open RTNL socket */ +static int ovpn_rt_socket(void) +{ + int sndbuf = RT_SNDBUF_SIZE, rcvbuf = RT_RCVBUF_SIZE, fd; + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (fd < 0) { + fprintf(stderr, "%s: cannot open netlink socket\n", __func__); + return fd; + } + + if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf, + sizeof(sndbuf)) < 0) { + fprintf(stderr, "%s: SO_SNDBUF\n", __func__); + close(fd); + return -1; + } + + if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf, + sizeof(rcvbuf)) < 0) { + fprintf(stderr, "%s: SO_RCVBUF\n", __func__); + close(fd); + return -1; + } + + return fd; +} + +/* Bind socket to Netlink subsystem */ +static int ovpn_rt_bind(int fd, uint32_t groups) +{ + struct sockaddr_nl local = { 0 }; + socklen_t addr_len; + + local.nl_family = AF_NETLINK; + local.nl_groups = groups; + + if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) { + fprintf(stderr, "%s: cannot bind netlink socket: %d\n", + __func__, errno); + return -errno; + } + + addr_len = sizeof(local); + if (getsockname(fd, (struct sockaddr *)&local, &addr_len) < 0) { + fprintf(stderr, "%s: cannot getsockname: %d\n", __func__, + errno); + return -errno; + } + + if (addr_len != sizeof(local)) { + fprintf(stderr, "%s: wrong address length %d\n", __func__, + addr_len); + return -EINVAL; + } + + if (local.nl_family != AF_NETLINK) { + fprintf(stderr, "%s: wrong address family %d\n", __func__, + local.nl_family); + return -EINVAL; + } + + return 0; +} + +typedef int (*ovpn_parse_reply_cb)(struct nlmsghdr *msg, void *arg); + +/* Send Netlink message and run callback on reply (if specified) */ +static int ovpn_rt_send(struct nlmsghdr *payload, pid_t peer, + unsigned int groups, ovpn_parse_reply_cb cb, + void *arg_cb) +{ + int len, rem_len, fd, ret, rcv_len; + struct sockaddr_nl nladdr = { 0 }; + struct nlmsgerr *err; + struct nlmsghdr *h; + char buf[1024 * 16]; + struct iovec iov = { + .iov_base = payload, + .iov_len = payload->nlmsg_len, + }; + struct msghdr nlmsg = { + .msg_name = &nladdr, + .msg_namelen = sizeof(nladdr), + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + nladdr.nl_family = AF_NETLINK; + nladdr.nl_pid = peer; + nladdr.nl_groups = groups; + + payload->nlmsg_seq = time(NULL); + + /* no need to send reply */ + if (!cb) + payload->nlmsg_flags |= NLM_F_ACK; + + fd = ovpn_rt_socket(); + if (fd < 0) { + fprintf(stderr, "%s: can't open rtnl socket\n", __func__); + return -errno; + } + + ret = ovpn_rt_bind(fd, 0); + if (ret < 0) { + fprintf(stderr, "%s: can't bind rtnl socket\n", __func__); + ret = -errno; + goto out; + } + + ret = sendmsg(fd, &nlmsg, 0); + if (ret < 0) { + fprintf(stderr, "%s: rtnl: error on sendmsg()\n", __func__); + ret = -errno; + goto out; + } + + /* prepare buffer to store RTNL replies */ + memset(buf, 0, sizeof(buf)); + iov.iov_base = buf; + + while (1) { + /* + * iov_len is modified by recvmsg(), therefore has to be initialized before + * using it again + */ + iov.iov_len = sizeof(buf); + rcv_len = recvmsg(fd, &nlmsg, 0); + if (rcv_len < 0) { + if (errno == EINTR || errno == EAGAIN) { + fprintf(stderr, "%s: interrupted call\n", + __func__); + continue; + } + fprintf(stderr, "%s: rtnl: error on recvmsg()\n", + __func__); + ret = -errno; + goto out; + } + + if (rcv_len == 0) { + fprintf(stderr, + "%s: rtnl: socket reached unexpected EOF\n", + __func__); + ret = -EIO; + goto out; + } + + if (nlmsg.msg_namelen != sizeof(nladdr)) { + fprintf(stderr, + "%s: sender address length: %u (expected %zu)\n", + __func__, nlmsg.msg_namelen, sizeof(nladdr)); + ret = -EIO; + goto out; + } + + h = (struct nlmsghdr *)buf; + while (rcv_len >= (int)sizeof(*h)) { + len = h->nlmsg_len; + rem_len = len - sizeof(*h); + + if (rem_len < 0 || len > rcv_len) { + if (nlmsg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "%s: truncated message\n", + __func__); + ret = -EIO; + goto out; + } + fprintf(stderr, "%s: malformed message: len=%d\n", + __func__, len); + ret = -EIO; + goto out; + } + + if (h->nlmsg_type == NLMSG_DONE) { + ret = 0; + goto out; + } + + if (h->nlmsg_type == NLMSG_ERROR) { + err = (struct nlmsgerr *)NLMSG_DATA(h); + if (rem_len < (int)sizeof(struct nlmsgerr)) { + fprintf(stderr, "%s: ERROR truncated\n", + __func__); + ret = -EIO; + goto out; + } + + if (err->error) { + fprintf(stderr, "%s: (%d) %s\n", + __func__, err->error, + strerror(-err->error)); + ret = err->error; + goto out; + } + + ret = 0; + if (cb) { + int r = cb(h, arg_cb); + + if (r <= 0) + ret = r; + } + goto out; + } + + if (cb) { + int r = cb(h, arg_cb); + + if (r <= 0) { + ret = r; + goto out; + } + } else { + fprintf(stderr, "%s: RTNL: unexpected reply\n", + __func__); + } + + rcv_len -= NLMSG_ALIGN(len); + h = (struct nlmsghdr *)((uint8_t *)h + + NLMSG_ALIGN(len)); + } + + if (nlmsg.msg_flags & MSG_TRUNC) { + fprintf(stderr, "%s: message truncated\n", __func__); + continue; + } + + if (rcv_len) { + fprintf(stderr, "%s: rtnl: %d not parsed bytes\n", + __func__, rcv_len); + ret = -1; + goto out; + } + } +out: + close(fd); + + return ret; +} + +struct ovpn_link_req { + struct nlmsghdr n; + struct ifinfomsg i; + char buf[256]; +}; + +static int ovpn_new_iface(struct ovpn_ctx *ovpn) +{ + struct rtattr *linkinfo, *data; + struct ovpn_link_req req = { 0 }; + int ret = -1; + + fprintf(stdout, "Creating interface %s with mode %u\n", ovpn->ifname, + ovpn->mode); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i)); + req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL; + req.n.nlmsg_type = RTM_NEWLINK; + + if (ovpn_addattr(&req.n, sizeof(req), IFLA_IFNAME, ovpn->ifname, + strlen(ovpn->ifname) + 1) < 0) + goto err; + + linkinfo = ovpn_nest_start(&req.n, sizeof(req), IFLA_LINKINFO); + if (!linkinfo) + goto err; + + if (ovpn_addattr(&req.n, sizeof(req), IFLA_INFO_KIND, OVPN_FAMILY_NAME, + strlen(OVPN_FAMILY_NAME) + 1) < 0) + goto err; + + if (ovpn->mode_set) { + data = ovpn_nest_start(&req.n, sizeof(req), IFLA_INFO_DATA); + if (!data) + goto err; + + if (ovpn_addattr(&req.n, sizeof(req), IFLA_OVPN_MODE, + &ovpn->mode, sizeof(uint8_t)) < 0) + goto err; + + ovpn_nest_end(&req.n, data); + } + + ovpn_nest_end(&req.n, linkinfo); + + req.i.ifi_family = AF_PACKET; + + ret = ovpn_rt_send(&req.n, 0, 0, NULL, NULL); +err: + return ret; +} + +static int ovpn_del_iface(struct ovpn_ctx *ovpn) +{ + struct ovpn_link_req req = { 0 }; + + fprintf(stdout, "Deleting interface %s ifindex %u\n", ovpn->ifname, + ovpn->ifindex); + + req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i)); + req.n.nlmsg_flags = NLM_F_REQUEST; + req.n.nlmsg_type = RTM_DELLINK; + + req.i.ifi_family = AF_PACKET; + req.i.ifi_index = ovpn->ifindex; + + return ovpn_rt_send(&req.n, 0, 0, NULL, NULL); +} + +static int nl_seq_check(struct nl_msg (*msg)__always_unused, + void (*arg)__always_unused) +{ + return NL_OK; +} + +struct mcast_handler_args { + const char *group; + int id; +}; + +static int mcast_family_handler(struct nl_msg *msg, void *arg) +{ + struct mcast_handler_args *grp = arg; + struct nlattr *tb[CTRL_ATTR_MAX + 1]; + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *mcgrp; + int rem_mcgrp; + + nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL); + + if (!tb[CTRL_ATTR_MCAST_GROUPS]) + return NL_SKIP; + + nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) { + struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1]; + + nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX, + nla_data(mcgrp), nla_len(mcgrp), NULL); + + if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] || + !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]) + continue; + if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]), + grp->group, nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]))) + continue; + grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]); + break; + } + + return NL_SKIP; +} + +static int mcast_error_handler(struct sockaddr_nl (*nla)__always_unused, + struct nlmsgerr *err, void *arg) +{ + int *ret = arg; + + *ret = err->error; + return NL_STOP; +} + +static int mcast_ack_handler(struct nl_msg (*msg)__always_unused, void *arg) +{ + int *ret = arg; + + *ret = 0; + return NL_STOP; +} + +static int ovpn_handle_msg(struct nl_msg *msg, void *arg) +{ + struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg)); + struct nlattr *attrs[OVPN_A_MAX + 1]; + struct nlmsghdr *nlh = nlmsg_hdr(msg); + char ifname[IF_NAMESIZE]; + int *ret = arg; + __u32 ifindex; + + fprintf(stderr, "received message from ovpn-dco\n"); + + *ret = -1; + + if (!genlmsg_valid_hdr(nlh, 0)) { + fprintf(stderr, "invalid header\n"); + return NL_STOP; + } + + if (nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0), + genlmsg_attrlen(gnlh, 0), NULL)) { + fprintf(stderr, "received bogus data from ovpn-dco\n"); + return NL_STOP; + } + + if (!attrs[OVPN_A_IFINDEX]) { + fprintf(stderr, "no ifindex in this message\n"); + return NL_STOP; + } + + ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]); + if (!if_indextoname(ifindex, ifname)) { + fprintf(stderr, "cannot resolve ifname for ifindex: %u\n", + ifindex); + return NL_STOP; + } + + switch (gnlh->cmd) { + case OVPN_CMD_PEER_DEL_NTF: + fprintf(stdout, "received CMD_PEER_DEL_NTF\n"); + break; + case OVPN_CMD_KEY_SWAP_NTF: + fprintf(stdout, "received CMD_KEY_SWAP_NTF\n"); + break; + default: + fprintf(stderr, "received unknown command: %d\n", gnlh->cmd); + return NL_STOP; + } + + *ret = 0; + return NL_OK; +} + +static int ovpn_get_mcast_id(struct nl_sock *sock, const char *family, + const char *group) +{ + struct nl_msg *msg; + struct nl_cb *cb; + int ret, ctrlid; + struct mcast_handler_args grp = { + .group = group, + .id = -ENOENT, + }; + + msg = nlmsg_alloc(); + if (!msg) + return -ENOMEM; + + cb = nl_cb_alloc(NL_CB_DEFAULT); + if (!cb) { + ret = -ENOMEM; + goto out_fail_cb; + } + + ctrlid = genl_ctrl_resolve(sock, "nlctrl"); + + genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0); + + ret = -ENOBUFS; + NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family); + + ret = nl_send_auto_complete(sock, msg); + if (ret < 0) + goto nla_put_failure; + + ret = 1; + + nl_cb_err(cb, NL_CB_CUSTOM, mcast_error_handler, &ret); + nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, mcast_ack_handler, &ret); + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, mcast_family_handler, &grp); + + while (ret > 0) + nl_recvmsgs(sock, cb); + + if (ret == 0) + ret = grp.id; + nla_put_failure: + nl_cb_put(cb); + out_fail_cb: + nlmsg_free(msg); + return ret; +} + +static int ovpn_listen_mcast(void) +{ + struct nl_sock *sock; + struct nl_cb *cb; + int mcid, ret; + + sock = nl_socket_alloc(); + if (!sock) { + fprintf(stderr, "cannot allocate netlink socket\n"); + ret = -ENOMEM; + goto err_free; + } + + nl_socket_set_buffer_size(sock, 8192, 8192); + + ret = genl_connect(sock); + if (ret < 0) { + fprintf(stderr, "cannot connect to generic netlink: %s\n", + nl_geterror(ret)); + goto err_free; + } + + mcid = ovpn_get_mcast_id(sock, OVPN_FAMILY_NAME, OVPN_MCGRP_PEERS); + if (mcid < 0) { + fprintf(stderr, "cannot get mcast group: %s\n", + nl_geterror(mcid)); + goto err_free; + } + + ret = nl_socket_add_membership(sock, mcid); + if (ret) { + fprintf(stderr, "failed to join mcast group: %d\n", ret); + goto err_free; + } + + ret = 1; + cb = nl_cb_alloc(NL_CB_DEFAULT); + nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check, NULL); + nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, &ret); + nl_cb_err(cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &ret); + + while (ret == 1) { + int err = nl_recvmsgs(sock, cb); + + if (err < 0) { + fprintf(stderr, + "cannot receive netlink message: (%d) %s\n", + err, nl_geterror(-err)); + ret = -1; + break; + } + } + + nl_cb_put(cb); +err_free: + nl_socket_free(sock); + return ret; +} + +static void usage(const char *cmd) +{ + fprintf(stderr, + "Usage %s <command> <iface> [arguments..]\n", + cmd); + fprintf(stderr, "where <command> can be one of the following\n\n"); + + fprintf(stderr, "* new_iface <iface> [mode]: create new ovpn interface\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tmode:\n"); + fprintf(stderr, "\t\t- P2P for peer-to-peer mode (i.e. client)\n"); + fprintf(stderr, "\t\t- MP for multi-peer mode (i.e. server)\n"); + + fprintf(stderr, "* del_iface <iface>: delete ovpn interface\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + + fprintf(stderr, + "* listen <iface> <lport> <peers_file> [ipv6]: listen for incoming peer TCP connections\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tlport: TCP port to listen to\n"); + fprintf(stderr, + "\tpeers_file: file containing one peer per line: Line format:\n"); + fprintf(stderr, "\t\t<peer_id> <vpnaddr>\n"); + fprintf(stderr, + "\tipv6: whether the socket should listen to the IPv6 wildcard address\n"); + + fprintf(stderr, + "* connect <iface> <peer_id> <raddr> <rport> [key_file]: start connecting peer of TCP-based VPN session\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the connecting peer\n"); + fprintf(stderr, "\traddr: peer IP address to connect to\n"); + fprintf(stderr, "\trport: peer TCP port to connect to\n"); + fprintf(stderr, + "\tkey_file: file containing the symmetric key for encryption\n"); + + fprintf(stderr, + "* new_peer <iface> <peer_id> <lport> <raddr> <rport> [vpnaddr]: add new peer\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tlport: local UDP port to bind to\n"); + fprintf(stderr, + "\tpeer_id: peer ID to be used in data packets to/from this peer\n"); + fprintf(stderr, "\traddr: peer IP address\n"); + fprintf(stderr, "\trport: peer UDP port\n"); + fprintf(stderr, "\tvpnaddr: peer VPN IP\n"); + + fprintf(stderr, + "* new_multi_peer <iface> <lport> <peers_file>: add multiple peers as listed in the file\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tlport: local UDP port to bind to\n"); + fprintf(stderr, + "\tpeers_file: text file containing one peer per line. Line format:\n"); + fprintf(stderr, "\t\t<peer_id> <raddr> <rport> <vpnaddr>\n"); + + fprintf(stderr, + "* set_peer <iface> <peer_id> <keepalive_interval> <keepalive_timeout>: set peer attributes\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n"); + fprintf(stderr, + "\tkeepalive_interval: interval for sending ping messages\n"); + fprintf(stderr, + "\tkeepalive_timeout: time after which a peer is timed out\n"); + + fprintf(stderr, "* del_peer <iface> <peer_id>: delete peer\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to delete\n"); + + fprintf(stderr, "* get_peer <iface> [peer_id]: retrieve peer(s) status\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, + "\tpeer_id: peer ID of the peer to query. All peers are returned if omitted\n"); + + fprintf(stderr, + "* new_key <iface> <peer_id> <slot> <key_id> <cipher> <key_dir> <key_file>: set data channel key\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, + "\tpeer_id: peer ID of the peer to configure the key for\n"); + fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n"); + fprintf(stderr, "\tkey_id: an ID from 0 to 7\n"); + fprintf(stderr, + "\tcipher: cipher to use, supported: aes (AES-GCM), chachapoly (CHACHA20POLY1305)\n"); + fprintf(stderr, + "\tkey_dir: key direction, must 0 on one host and 1 on the other\n"); + fprintf(stderr, "\tkey_file: file containing the pre-shared key\n"); + + fprintf(stderr, + "* del_key <iface> <peer_id> [slot]: erase existing data channel key\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n"); + fprintf(stderr, "\tslot: slot to erase. PRIMARY if omitted\n"); + + fprintf(stderr, + "* get_key <iface> <peer_id> <slot>: retrieve non sensible key data\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to query\n"); + fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n"); + + fprintf(stderr, + "* swap_keys <iface> <peer_id>: swap content of primary and secondary key slots\n"); + fprintf(stderr, "\tiface: ovpn interface name\n"); + fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n"); + + fprintf(stderr, + "* listen_mcast: listen to ovpn netlink multicast messages\n"); +} + +static int ovpn_parse_remote(struct ovpn_ctx *ovpn, const char *host, + const char *service, const char *vpnip) +{ + int ret; + struct addrinfo *result; + struct addrinfo hints = { + .ai_family = ovpn->sa_family, + .ai_socktype = SOCK_DGRAM, + .ai_protocol = IPPROTO_UDP + }; + + if (host) { + ret = getaddrinfo(host, service, &hints, &result); + if (ret) { + fprintf(stderr, "getaddrinfo on remote error: %s\n", + gai_strerror(ret)); + return -1; + } + + if (!(result->ai_family == AF_INET && + result->ai_addrlen == sizeof(struct sockaddr_in)) && + !(result->ai_family == AF_INET6 && + result->ai_addrlen == sizeof(struct sockaddr_in6))) { + ret = -EINVAL; + goto out; + } + + memcpy(&ovpn->remote, result->ai_addr, result->ai_addrlen); + } + + if (vpnip) { + ret = getaddrinfo(vpnip, NULL, &hints, &result); + if (ret) { + fprintf(stderr, "getaddrinfo on vpnip error: %s\n", + gai_strerror(ret)); + return -1; + } + + if (!(result->ai_family == AF_INET && + result->ai_addrlen == sizeof(struct sockaddr_in)) && + !(result->ai_family == AF_INET6 && + result->ai_addrlen == sizeof(struct sockaddr_in6))) { + ret = -EINVAL; + goto out; + } + + memcpy(&ovpn->peer_ip, result->ai_addr, result->ai_addrlen); + ovpn->sa_family = result->ai_family; + + ovpn->peer_ip_set = true; + } + + ret = 0; +out: + freeaddrinfo(result); + return ret; +} + +static int ovpn_parse_new_peer(struct ovpn_ctx *ovpn, const char *peer_id, + const char *raddr, const char *rport, + const char *vpnip) +{ + ovpn->peer_id = strtoul(peer_id, NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + return ovpn_parse_remote(ovpn, raddr, rport, vpnip); +} + +static int ovpn_parse_key_slot(const char *arg, struct ovpn_ctx *ovpn) +{ + int slot = strtoul(arg, NULL, 10); + + if (errno == ERANGE || slot < 1 || slot > 2) { + fprintf(stderr, "key slot out of range\n"); + return -1; + } + + switch (slot) { + case 1: + ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY; + break; + case 2: + ovpn->key_slot = OVPN_KEY_SLOT_SECONDARY; + break; + } + + return 0; +} + +static int ovpn_send_tcp_data(int socket) +{ + uint16_t len = htons(1000); + uint8_t buf[1002]; + int ret; + + memcpy(buf, &len, sizeof(len)); + memset(buf + sizeof(len), 0x86, sizeof(buf) - sizeof(len)); + + ret = send(socket, buf, sizeof(buf), MSG_NOSIGNAL); + + fprintf(stdout, "Sent %u bytes over TCP socket\n", ret); + + return ret > 0 ? 0 : ret; +} + +static int ovpn_recv_tcp_data(int socket) +{ + uint8_t buf[1002]; + uint16_t len; + int ret; + + ret = recv(socket, buf, sizeof(buf), MSG_NOSIGNAL); + + if (ret < 2) { + fprintf(stderr, ">>>> Error while reading TCP data: %d\n", ret); + return ret; + } + + memcpy(&len, buf, sizeof(len)); + len = ntohs(len); + + fprintf(stdout, ">>>> Received %u bytes over TCP socket, header: %u\n", + ret, len); + + return 0; +} + +static enum ovpn_cmd ovpn_parse_cmd(const char *cmd) +{ + if (!strcmp(cmd, "new_iface")) + return CMD_NEW_IFACE; + + if (!strcmp(cmd, "del_iface")) + return CMD_DEL_IFACE; + + if (!strcmp(cmd, "listen")) + return CMD_LISTEN; + + if (!strcmp(cmd, "connect")) + return CMD_CONNECT; + + if (!strcmp(cmd, "new_peer")) + return CMD_NEW_PEER; + + if (!strcmp(cmd, "new_multi_peer")) + return CMD_NEW_MULTI_PEER; + + if (!strcmp(cmd, "set_peer")) + return CMD_SET_PEER; + + if (!strcmp(cmd, "del_peer")) + return CMD_DEL_PEER; + + if (!strcmp(cmd, "get_peer")) + return CMD_GET_PEER; + + if (!strcmp(cmd, "new_key")) + return CMD_NEW_KEY; + + if (!strcmp(cmd, "del_key")) + return CMD_DEL_KEY; + + if (!strcmp(cmd, "get_key")) + return CMD_GET_KEY; + + if (!strcmp(cmd, "swap_keys")) + return CMD_SWAP_KEYS; + + if (!strcmp(cmd, "listen_mcast")) + return CMD_LISTEN_MCAST; + + return CMD_INVALID; +} + +/* Send process to background and waits for signal. + * + * This helper is called at the end of commands + * creating sockets, so that the latter stay alive + * along with the process that created them. + * + * A signal is expected to be delivered in order to + * terminate the waiting processes + */ +static void ovpn_waitbg(void) +{ + daemon(1, 1); + pause(); +} + +static int ovpn_run_cmd(struct ovpn_ctx *ovpn) +{ + char peer_id[10], vpnip[INET6_ADDRSTRLEN], laddr[128], lport[10]; + char raddr[128], rport[10]; + int n, ret; + FILE *fp; + + switch (ovpn->cmd) { + case CMD_NEW_IFACE: + ret = ovpn_new_iface(ovpn); + break; + case CMD_DEL_IFACE: + ret = ovpn_del_iface(ovpn); + break; + case CMD_LISTEN: + ret = ovpn_listen(ovpn, ovpn->sa_family); + if (ret < 0) { + fprintf(stderr, "cannot listen on TCP socket\n"); + return ret; + } + + fp = fopen(ovpn->peers_file, "r"); + if (!fp) { + fprintf(stderr, "cannot open file: %s\n", + ovpn->peers_file); + return -1; + } + + int num_peers = 0; + + while ((n = fscanf(fp, "%s %s\n", peer_id, vpnip)) == 2) { + struct ovpn_ctx peer_ctx = { 0 }; + + if (num_peers == MAX_PEERS) { + fprintf(stderr, "max peers reached!\n"); + return -E2BIG; + } + + peer_ctx.ifindex = ovpn->ifindex; + peer_ctx.sa_family = ovpn->sa_family; + + peer_ctx.socket = ovpn_accept(ovpn); + if (peer_ctx.socket < 0) { + fprintf(stderr, "cannot accept connection!\n"); + return -1; + } + + /* store peer sockets to test TCP I/O */ + ovpn->cli_sockets[num_peers] = peer_ctx.socket; + + ret = ovpn_parse_new_peer(&peer_ctx, peer_id, NULL, + NULL, vpnip); + if (ret < 0) { + fprintf(stderr, "error while parsing line\n"); + return -1; + } + + ret = ovpn_new_peer(&peer_ctx, true); + if (ret < 0) { + fprintf(stderr, + "cannot add peer to VPN: %s %s\n", + peer_id, vpnip); + return ret; + } + num_peers++; + } + + for (int i = 0; i < num_peers; i++) { + ret = ovpn_recv_tcp_data(ovpn->cli_sockets[i]); + if (ret < 0) + break; + } + ovpn_waitbg(); + break; + case CMD_CONNECT: + ret = ovpn_connect(ovpn); + if (ret < 0) { + fprintf(stderr, "cannot connect TCP socket\n"); + return ret; + } + + ret = ovpn_new_peer(ovpn, true); + if (ret < 0) { + fprintf(stderr, "cannot add peer to VPN\n"); + close(ovpn->socket); + return ret; + } + + if (ovpn->cipher != OVPN_CIPHER_ALG_NONE) { + ret = ovpn_new_key(ovpn); + if (ret < 0) { + fprintf(stderr, "cannot set key\n"); + return ret; + } + } + + ret = ovpn_send_tcp_data(ovpn->socket); + ovpn_waitbg(); + break; + case CMD_NEW_PEER: + ret = ovpn_udp_socket(ovpn, AF_INET6); + if (ret < 0) + return ret; + + ret = ovpn_new_peer(ovpn, false); + ovpn_waitbg(); + break; + case CMD_NEW_MULTI_PEER: + ret = ovpn_udp_socket(ovpn, AF_INET6); + if (ret < 0) + return ret; + + fp = fopen(ovpn->peers_file, "r"); + if (!fp) { + fprintf(stderr, "cannot open file: %s\n", + ovpn->peers_file); + return -1; + } + + while ((n = fscanf(fp, "%s %s %s %s %s %s\n", peer_id, laddr, + lport, raddr, rport, vpnip)) == 6) { + struct ovpn_ctx peer_ctx = { 0 }; + + peer_ctx.ifindex = ovpn->ifindex; + peer_ctx.socket = ovpn->socket; + peer_ctx.sa_family = AF_UNSPEC; + + ret = ovpn_parse_new_peer(&peer_ctx, peer_id, raddr, + rport, vpnip); + if (ret < 0) { + fprintf(stderr, "error while parsing line\n"); + return -1; + } + + ret = ovpn_new_peer(&peer_ctx, false); + if (ret < 0) { + fprintf(stderr, + "cannot add peer to VPN: %s %s %s %s\n", + peer_id, raddr, rport, vpnip); + return ret; + } + } + ovpn_waitbg(); + break; + case CMD_SET_PEER: + ret = ovpn_set_peer(ovpn); + break; + case CMD_DEL_PEER: + ret = ovpn_del_peer(ovpn); + break; + case CMD_GET_PEER: + if (ovpn->peer_id == PEER_ID_UNDEF) + fprintf(stderr, "List of peers connected to: %s\n", + ovpn->ifname); + + ret = ovpn_get_peer(ovpn); + break; + case CMD_NEW_KEY: + ret = ovpn_new_key(ovpn); + break; + case CMD_DEL_KEY: + ret = ovpn_del_key(ovpn); + break; + case CMD_GET_KEY: + ret = ovpn_get_key(ovpn); + break; + case CMD_SWAP_KEYS: + ret = ovpn_swap_keys(ovpn); + break; + case CMD_LISTEN_MCAST: + ret = ovpn_listen_mcast(); + break; + case CMD_INVALID: + ret = -EINVAL; + break; + } + + return ret; +} + +static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[]) +{ + int ret; + + /* no args required for LISTEN_MCAST */ + if (ovpn->cmd == CMD_LISTEN_MCAST) + return 0; + + /* all commands need an ifname */ + if (argc < 3) + return -EINVAL; + + strscpy(ovpn->ifname, argv[2], IFNAMSIZ - 1); + ovpn->ifname[IFNAMSIZ - 1] = '\0'; + + /* all commands, except NEW_IFNAME, needs an ifindex */ + if (ovpn->cmd != CMD_NEW_IFACE) { + ovpn->ifindex = if_nametoindex(ovpn->ifname); + if (!ovpn->ifindex) { + fprintf(stderr, "cannot find interface: %s\n", + strerror(errno)); + return -1; + } + } + + switch (ovpn->cmd) { + case CMD_NEW_IFACE: + if (argc < 4) + break; + + if (!strcmp(argv[3], "P2P")) { + ovpn->mode = OVPN_MODE_P2P; + } else if (!strcmp(argv[3], "MP")) { + ovpn->mode = OVPN_MODE_MP; + } else { + fprintf(stderr, "Cannot parse iface mode: %s\n", + argv[3]); + return -1; + } + ovpn->mode_set = true; + break; + case CMD_DEL_IFACE: + break; + case CMD_LISTEN: + if (argc < 5) + return -EINVAL; + + ovpn->lport = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->lport > 65535) { + fprintf(stderr, "lport value out of range\n"); + return -1; + } + + ovpn->peers_file = argv[4]; + + ovpn->sa_family = AF_INET; + if (argc > 5 && !strcmp(argv[5], "ipv6")) + ovpn->sa_family = AF_INET6; + break; + case CMD_CONNECT: + if (argc < 6) + return -EINVAL; + + ovpn->sa_family = AF_INET; + + ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[5], + NULL); + if (ret < 0) { + fprintf(stderr, "Cannot parse remote peer data\n"); + return -1; + } + + if (argc > 6) { + ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY; + ovpn->key_id = 0; + ovpn->cipher = OVPN_CIPHER_ALG_AES_GCM; + ovpn->key_dir = KEY_DIR_OUT; + + ret = ovpn_parse_key(argv[6], ovpn); + if (ret) + return -1; + } + break; + case CMD_NEW_PEER: + if (argc < 7) + return -EINVAL; + + ovpn->lport = strtoul(argv[4], NULL, 10); + if (errno == ERANGE || ovpn->lport > 65535) { + fprintf(stderr, "lport value out of range\n"); + return -1; + } + + const char *vpnip = (argc > 7) ? argv[7] : NULL; + + ret = ovpn_parse_new_peer(ovpn, argv[3], argv[5], argv[6], + vpnip); + if (ret < 0) + return -1; + break; + case CMD_NEW_MULTI_PEER: + if (argc < 5) + return -EINVAL; + + ovpn->lport = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->lport > 65535) { + fprintf(stderr, "lport value out of range\n"); + return -1; + } + + ovpn->peers_file = argv[4]; + break; + case CMD_SET_PEER: + if (argc < 6) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ovpn->keepalive_interval = strtoul(argv[4], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, + "keepalive interval value out of range\n"); + return -1; + } + + ovpn->keepalive_timeout = strtoul(argv[5], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, + "keepalive interval value out of range\n"); + return -1; + } + break; + case CMD_DEL_PEER: + if (argc < 4) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + break; + case CMD_GET_PEER: + ovpn->peer_id = PEER_ID_UNDEF; + if (argc > 3) { + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + } + break; + case CMD_NEW_KEY: + if (argc < 9) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ret = ovpn_parse_key_slot(argv[4], ovpn); + if (ret) + return -1; + + ovpn->key_id = strtoul(argv[5], NULL, 10); + if (errno == ERANGE || ovpn->key_id > 2) { + fprintf(stderr, "key ID out of range\n"); + return -1; + } + + ret = ovpn_parse_cipher(argv[6], ovpn); + if (ret < 0) + return -1; + + ret = ovpn_parse_key_direction(argv[7], ovpn); + if (ret < 0) + return -1; + + ret = ovpn_parse_key(argv[8], ovpn); + if (ret) + return -1; + break; + case CMD_DEL_KEY: + if (argc < 4) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ret = ovpn_parse_key_slot(argv[4], ovpn); + if (ret) + return ret; + break; + case CMD_GET_KEY: + if (argc < 5) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + + ret = ovpn_parse_key_slot(argv[4], ovpn); + if (ret) + return ret; + break; + case CMD_SWAP_KEYS: + if (argc < 4) + return -EINVAL; + + ovpn->peer_id = strtoul(argv[3], NULL, 10); + if (errno == ERANGE) { + fprintf(stderr, "peer ID value out of range\n"); + return -1; + } + break; + case CMD_LISTEN_MCAST: + break; + case CMD_INVALID: + break; + } + + return 0; +} + +int main(int argc, char *argv[]) +{ + struct ovpn_ctx ovpn; + int ret; + + if (argc < 2) { + usage(argv[0]); + return -1; + } + + memset(&ovpn, 0, sizeof(ovpn)); + ovpn.sa_family = AF_UNSPEC; + ovpn.cipher = OVPN_CIPHER_ALG_NONE; + + ovpn.cmd = ovpn_parse_cmd(argv[1]); + if (ovpn.cmd == CMD_INVALID) { + fprintf(stderr, "Error: unknown command.\n\n"); + usage(argv[0]); + return -1; + } + + ret = ovpn_parse_cmd_args(&ovpn, argc, argv); + if (ret < 0) { + fprintf(stderr, "Error: invalid arguments.\n\n"); + if (ret == -EINVAL) + usage(argv[0]); + return ret; + } + + ret = ovpn_run_cmd(&ovpn); + if (ret) + fprintf(stderr, "Cannot execute command: %s (%d)\n", + strerror(-ret), ret); + + return ret; +} diff --git a/tools/testing/selftests/net/ovpn/tcp_peers.txt b/tools/testing/selftests/net/ovpn/tcp_peers.txt new file mode 100644 index 000000000000..d753eebe8716 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/tcp_peers.txt @@ -0,0 +1,5 @@ +1 5.5.5.2 +2 5.5.5.3 +3 5.5.5.4 +4 5.5.5.5 +5 5.5.5.6 diff --git a/tools/testing/selftests/net/ovpn/test-chachapoly.sh b/tools/testing/selftests/net/ovpn/test-chachapoly.sh new file mode 100755 index 000000000000..32504079a2b8 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-chachapoly.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +ALG="chachapoly" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh new file mode 100755 index 000000000000..093d44772ffd --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +PROTO="TCP" + +source test-close-socket.sh diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh new file mode 100755 index 000000000000..5e48a8b67928 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +#set -x +set -e + +source ./common.sh + +cleanup + +modprobe -q ovpn || true + +for p in $(seq 0 ${NUM_PEERS}); do + create_ns ${p} +done + +for p in $(seq 0 ${NUM_PEERS}); do + setup_ns ${p} 5.5.5.$((${p} + 1))/24 +done + +for p in $(seq 0 ${NUM_PEERS}); do + add_peer ${p} +done + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120 + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120 +done + +sleep 1 + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1)) +done + +ip netns exec peer0 iperf3 -1 -s & +sleep 1 +ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1 + +cleanup + +modprobe -r ovpn || true diff --git a/tools/testing/selftests/net/ovpn/test-float.sh b/tools/testing/selftests/net/ovpn/test-float.sh new file mode 100755 index 000000000000..ba5d725e18b0 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-float.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +FLOAT="1" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-large-mtu.sh b/tools/testing/selftests/net/ovpn/test-large-mtu.sh new file mode 100755 index 000000000000..ce2a2cb64f72 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-large-mtu.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +MTU="1500" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test-tcp.sh b/tools/testing/selftests/net/ovpn/test-tcp.sh new file mode 100755 index 000000000000..ba3f1f315a34 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test-tcp.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +PROTO="TCP" + +source test.sh diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh new file mode 100755 index 000000000000..e8acdc303307 --- /dev/null +++ b/tools/testing/selftests/net/ovpn/test.sh @@ -0,0 +1,117 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020-2025 OpenVPN, Inc. +# +# Author: Antonio Quartulli <antonio@openvpn.net> + +#set -x +set -e + +source ./common.sh + +cleanup + +modprobe -q ovpn || true + +for p in $(seq 0 ${NUM_PEERS}); do + create_ns ${p} +done + +for p in $(seq 0 ${NUM_PEERS}); do + setup_ns ${p} 5.5.5.$((${p} + 1))/24 ${MTU} +done + +for p in $(seq 0 ${NUM_PEERS}); do + add_peer ${p} +done + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120 + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120 +done + +sleep 1 + +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1)) + ip netns exec peer0 ping -qfc 500 -s 3000 -w 3 5.5.5.$((${p} + 1)) +done + +# ping LAN behind client 1 +ip netns exec peer0 ping -qfc 500 -w 3 ${LAN_IP} + +if [ "$FLOAT" == "1" ]; then + # make clients float.. + for p in $(seq 1 ${NUM_PEERS}); do + ip -n peer${p} addr del 10.10.${p}.2/24 dev veth${p} + ip -n peer${p} addr add 10.10.${p}.3/24 dev veth${p} + done + for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer${p} ping -qfc 500 -w 3 5.5.5.1 + done +fi + +ip netns exec peer0 iperf3 -1 -s & +sleep 1 +ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1 + +echo "Adding secondary key and then swap:" +for p in $(seq 1 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 2 1 ${ALG} 0 data64.key + ip netns exec peer${p} ${OVPN_CLI} new_key tun${p} ${p} 2 1 ${ALG} 1 data64.key + ip netns exec peer${p} ${OVPN_CLI} swap_keys tun${p} ${p} +done + +sleep 1 + +echo "Querying all peers:" +ip netns exec peer0 ${OVPN_CLI} get_peer tun0 +ip netns exec peer1 ${OVPN_CLI} get_peer tun1 + +echo "Querying peer 1:" +ip netns exec peer0 ${OVPN_CLI} get_peer tun0 1 + +echo "Querying non-existent peer 10:" +ip netns exec peer0 ${OVPN_CLI} get_peer tun0 10 || true + +echo "Deleting peer 1:" +ip netns exec peer0 ${OVPN_CLI} del_peer tun0 1 +ip netns exec peer1 ${OVPN_CLI} del_peer tun1 1 + +echo "Querying keys:" +for p in $(seq 2 ${NUM_PEERS}); do + ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 1 + ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 2 +done + +echo "Deleting peer while sending traffic:" +(ip netns exec peer2 ping -qf -w 4 5.5.5.1)& +sleep 2 +ip netns exec peer0 ${OVPN_CLI} del_peer tun0 2 +# following command fails in TCP mode +# (both ends get conn reset when one peer disconnects) +ip netns exec peer2 ${OVPN_CLI} del_peer tun2 2 || true + +echo "Deleting keys:" +for p in $(seq 3 ${NUM_PEERS}); do + ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 1 + ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 2 +done + +echo "Setting timeout to 3s MP:" +for p in $(seq 3 ${NUM_PEERS}); do + ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 3 3 || true + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 0 0 +done +# wait for peers to timeout +sleep 5 + +echo "Setting timeout to 3s P2P:" +for p in $(seq 3 ${NUM_PEERS}); do + ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 3 3 +done +sleep 5 + +cleanup + +modprobe -r ovpn || true diff --git a/tools/testing/selftests/net/ovpn/udp_peers.txt b/tools/testing/selftests/net/ovpn/udp_peers.txt new file mode 100644 index 000000000000..e9773ddf875c --- /dev/null +++ b/tools/testing/selftests/net/ovpn/udp_peers.txt @@ -0,0 +1,6 @@ +1 10.10.1.1 1 10.10.1.2 1 5.5.5.2 +2 10.10.2.1 1 10.10.2.2 1 5.5.5.3 +3 10.10.3.1 1 10.10.3.2 1 5.5.5.4 +4 fd00:0:0:4::1 1 fd00:0:0:4::2 1 5.5.5.5 +5 fd00:0:0:5::1 1 fd00:0:0:5::2 1 5.5.5.6 +6 fd00:0:0:6::1 1 fd00:0:0:6::2 1 5.5.5.7 diff --git a/tools/testing/selftests/net/packetdrill/Makefile b/tools/testing/selftests/net/packetdrill/Makefile new file mode 100644 index 000000000000..ff54641493e9 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_INCLUDES := \ + defaults.sh \ + ksft_runner.sh \ + set_sysctls.py \ + ../../kselftest/ktap_helpers.sh \ +# end of TEST_INCLUDES + +TEST_PROGS := $(wildcard *.pkt) + +include ../../lib.mk diff --git a/tools/testing/selftests/net/packetdrill/config b/tools/testing/selftests/net/packetdrill/config new file mode 100644 index 000000000000..c4a19a785521 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/config @@ -0,0 +1,11 @@ +CONFIG_HZ=1000 +CONFIG_HZ_1000=y +CONFIG_IPV6=y +CONFIG_NET_NS=y +CONFIG_NET_SCH_FIFO=y +CONFIG_NET_SCH_FQ=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYN_COOKIES=y +CONFIG_TCP_CONG_CUBIC=y +CONFIG_TCP_MD5SIG=y +CONFIG_TUN=y diff --git a/tools/testing/selftests/net/packetdrill/defaults.sh b/tools/testing/selftests/net/packetdrill/defaults.sh new file mode 100755 index 000000000000..37edd3dc3b07 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/defaults.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Set standard production config values that relate to TCP behavior. + +# Flush old cached data (fastopen cookies). +ip tcp_metrics flush all > /dev/null 2>&1 + +# TCP min, default, and max receive and send buffer sizes. +sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))" +sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304" + +# TCP timestamps. +sysctl -q net.ipv4.tcp_timestamps=1 + +# TCP SYN(ACK) retry thresholds +sysctl -q net.ipv4.tcp_syn_retries=5 +sysctl -q net.ipv4.tcp_synack_retries=5 + +# TCP Forward RTO-Recovery, RFC 5682. +sysctl -q net.ipv4.tcp_frto=2 + +# TCP Selective Acknowledgements (SACK) +sysctl -q net.ipv4.tcp_sack=1 + +# TCP Duplicate Selective Acknowledgements (DSACK) +sysctl -q net.ipv4.tcp_dsack=1 + +# TCP FACK (Forward Acknowldgement) +sysctl -q net.ipv4.tcp_fack=0 + +# TCP reordering degree ("dupthresh" threshold for entering Fast Recovery). +sysctl -q net.ipv4.tcp_reordering=3 + +# TCP congestion control. +sysctl -q net.ipv4.tcp_congestion_control=cubic + +# TCP slow start after idle. +sysctl -q net.ipv4.tcp_slow_start_after_idle=0 + +# TCP RACK and TLP. +sysctl -q net.ipv4.tcp_early_retrans=4 net.ipv4.tcp_recovery=1 + +# TCP method for deciding when to defer sending to accumulate big TSO packets. +sysctl -q net.ipv4.tcp_tso_win_divisor=3 + +# TCP Explicit Congestion Notification (ECN) +sysctl -q net.ipv4.tcp_ecn=0 + +sysctl -q net.ipv4.tcp_pacing_ss_ratio=200 +sysctl -q net.ipv4.tcp_pacing_ca_ratio=120 +sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1 + +sysctl -q net.ipv4.tcp_fastopen=0x3 +# Use TFO_COOKIE in ksft_runner.sh for this key. +sysctl -q net.ipv4.tcp_fastopen_key=a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 + +sysctl -q net.ipv4.tcp_syncookies=1 + +# Override the default qdisc on the tun device. +# Many tests fail with timing errors if the default +# is FQ and that paces their flows. +tc qdisc add dev tun0 root pfifo + diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh new file mode 100755 index 000000000000..b34e5cf0112e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh" + +declare -A ip_args=( + [ipv4]="--ip_version=ipv4 + --local_ip=192.168.0.1 + --gateway_ip=192.168.0.1 + --netmask_ip=255.255.0.0 + --remote_ip=192.0.2.1 + -D TFO_COOKIE=3021b9d889017eeb + -D TFO_COOKIE_ZERO=b7c12350a90dc8f5 + -D CMSG_LEVEL_IP=SOL_IP + -D CMSG_TYPE_RECVERR=IP_RECVERR" + [ipv6]="--ip_version=ipv6 + --mtu=1520 + --local_ip=fd3d:0a0b:17d6::1 + --gateway_ip=fd3d:0a0b:17d6:8888::1 + --remote_ip=fd3d:fa7b:d17d::1 + -D TFO_COOKIE=c1d1e9742a47a9bc + -D TFO_COOKIE_ZERO=82af1a8f9a205c34 + -D CMSG_LEVEL_IP=SOL_IPV6 + -D CMSG_TYPE_RECVERR=IPV6_RECVERR" +) + +if [ $# -ne 1 ]; then + ktap_exit_fail_msg "usage: $0 <script>" + exit "$KSFT_FAIL" +fi +script="$(basename $1)" + +if [ -z "$(which packetdrill)" ]; then + ktap_skip_all "packetdrill not found in PATH" + exit "$KSFT_SKIP" +fi + +declare -a optargs +failfunc=ktap_test_fail + +if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then + optargs+=('--tolerance_usecs=14000') + failfunc=ktap_test_xfail +fi + +ip_versions=$(grep -E '^--ip_version=' $script | cut -d '=' -f 2) +if [[ -z $ip_versions ]]; then + ip_versions="ipv4 ipv6" +elif [[ ! "$ip_versions" =~ ^ipv[46]$ ]]; then + ktap_exit_fail_msg "Too many or unsupported --ip_version: $ip_versions" + exit "$KSFT_FAIL" +fi + +ktap_print_header +ktap_set_plan $(echo $ip_versions | wc -w) + +for ip_version in $ip_versions; do + unshare -n packetdrill ${ip_args[$ip_version]} ${optargs[@]} $script > /dev/null \ + && ktap_test_pass $ip_version || $failfunc $ip_version +done + +ktap_finished diff --git a/tools/testing/selftests/net/packetdrill/set_sysctls.py b/tools/testing/selftests/net/packetdrill/set_sysctls.py new file mode 100755 index 000000000000..5ddf456ae973 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/set_sysctls.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Sets sysctl values and writes a file that restores them. + +The arguments are of the form "<proc-file>=<val>" separated by spaces. +The program first reads the current value of the proc-file and creates +a shell script named "/tmp/sysctl_restore_${PACKETDRILL_PID}.sh" which +restores the values when executed. It then sets the new values. + +PACKETDRILL_PID is set by packetdrill to the pid of itself, so a .pkt +file could restore sysctls by running `/tmp/sysctl_restore_${PPID}.sh` +at the end. +""" + +import os +import subprocess +import sys + +filename = '/tmp/sysctl_restore_%s.sh' % os.environ['PACKETDRILL_PID'] + +# Open file for restoring sysctl values +restore_file = open(filename, 'w') +print('#!/bin/bash', file=restore_file) + +for a in sys.argv[1:]: + sysctl = a.split('=') + # sysctl[0] contains the proc-file name, sysctl[1] the new value + + # read current value and add restore command to file + cur_val = subprocess.check_output(['cat', sysctl[0]], universal_newlines=True) + print('echo "%s" > %s' % (cur_val.strip(), sysctl[0]), file=restore_file) + + # set new value + cmd = 'echo "%s" > %s' % (sysctl[1], sysctl[0]) + os.system(cmd) + +os.system('chmod u+x %s' % filename) diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt new file mode 100644 index 000000000000..38535701656e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking accept. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0...0.200 accept(3, ..., ...) = 4 + + +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + + +.1 write(4, ..., 2000) = 2000 + +0 > P. 1:2001(2000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt new file mode 100644 index 000000000000..3692ef102381 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking connect. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + + +.1...0.200 connect(3, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.1 < S. 0:0(0) ack 1 win 5792 <mss 1460,nop,wscale 2,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt new file mode 100644 index 000000000000..657e42ca65b5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking read. + +--tolerance_usecs=10000 +--mss=1000 + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + + +0...0.100 read(4, ..., 2000) = 2000 + +.1 < P. 1:2001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 2001 + + +.1...0.200 read(4, ..., 2000) = 2000 + +.1 < P. 2001:4001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 + + +.1 < P. 4001:6001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 6001 + +0...0.000 read(4, ..., 1000) = 1000 + +0...0.000 read(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt new file mode 100644 index 000000000000..cec5a0725d95 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking write. +--tolerance_usecs=10000 + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_min_tso_segs=10 +` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 50000 <mss 1000,nop,wscale 0> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 50000 + +0 accept(3, ..., ...) = 4 + +// Kernel doubles our value -> sk->sk_sndbuf is set to 42000 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [21000], 4) = 0 + +0 getsockopt(4, SOL_SOCKET, SO_SNDBUF, [42000], [4]) = 0 + +// A write of 60000 does not block. + +0...0.300 write(4, ..., 61000) = 61000 // this write() blocks + + +.1 < . 1:1(0) ack 10001 win 50000 + + +.1 < . 1:1(0) ack 30001 win 50000 + +// This ACK should wakeup the write(). An ACK of 35001 does not. + +.1 < . 1:1(0) ack 36001 win 50000 + +// Reset to sysctls defaults. +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt new file mode 100644 index 000000000000..8514d6bdbb6d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test basic connection teardown where local process closes first: +// the local process calls close() first, so we send a FIN, and receive an ACK. +// Then we receive a FIN and ACK it. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +.01...0.011 connect(3, ..., ...) = 0 + +0 > S 0:0(0) <...> + +0 < S. 0:0(0) ack 1 win 32768 <mss 1000,nop,wscale 6,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + + +0 write(3, ..., 1000) = 1000 + +0 > P. 1:1001(1000) ack 1 + +0 < . 1:1(0) ack 1001 win 257 + + +0 close(3) = 0 + +0 > F. 1001:1001(0) ack 1 + +0 < . 1:1(0) ack 1002 win 257 + + +0 < F. 1:1(0) ack 1002 win 257 + +0 > . 1002:1002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt new file mode 100644 index 000000000000..04103134bd99 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test to make sure no RST is being sent when close() +// is called on a socket with SYN_SENT state. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <...> + +// Application decideds to close the socket in SYN_SENT state +// Make sure no RST is sent after close(). + +0 close(3) = 0 + +// Receive syn-ack to trigger the send side packet examination: +// If a RESET were sent right after close(), it would have failed with +// a mismatched timestamp. + +.1 < S. 0:0(0) ack 1 win 32000 <mss 1460,nop,wscale 7> + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt new file mode 100644 index 000000000000..5f3a2914213a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify behavior for the sequence: remote side sends FIN, then we close(). +// Since the remote side (client) closes first, we test our LAST_ACK code path. + +`./defaults.sh` + +// Initialize a server socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + +// Client closes first. + +.01 < F. 1:1(0) ack 1 win 257 + +0 > . 1:1(0) ack 2 + +// App notices that client closed. + +0 read(4, ..., 1000) = 0 + +// Then we close. + +.01 close(4) = 0 + +0 > F. 1:1(0) ack 2 + +// Client ACKs our FIN. + +.01 < . 2:2(0) ack 2 win 257 + +// Verify that we send RST in response to any incoming segments +// (because the kernel no longer has any record of this socket). + +.01 < . 2:2(0) ack 2 win 257 + +0 > R 2:2(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt new file mode 100644 index 000000000000..eef01d5f1118 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_no_rst.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +.1 < . 1:1(0) ack 1 win 32792 + + + +0 accept(3, ..., ...) = 4 + +0 < . 1:1001(1000) ack 1 win 32792 + +0 > . 1:1(0) ack 1001 + +0 read(4, ..., 1000) = 1000 + +// resend the payload + a FIN + +0 < F. 1:1001(1000) ack 1 win 32792 +// Why do we have a delay and no dsack ? + +0~+.04 > . 1:1(0) ack 1002 + + +0 close(4) = 0 + +// According to RFC 2525, section 2.17 +// we should _not_ send an RST here, because there was no data to consume. + +0 > F. 1:1(0) ack 1002 diff --git a/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt new file mode 100644 index 000000000000..c790d0af635e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_dsack_mult.pkt @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test various DSACK (RFC 2883) behaviors. + +--mss=1000 + +`./defaults.sh` + + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + +// First SACK range. + +0 < P. 1001:2001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 <nop, nop, sack 1001:2001> + +// Check SACK coalescing (contiguous sequence). + +0 < P. 2001:3001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 <nop,nop,sack 1001:3001> + +// Check we have two SACK ranges for non contiguous sequences. + +0 < P. 4001:5001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 <nop,nop,sack 4001:5001 1001:3001> + +// Three ranges. + +0 < P. 7001:8001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 1 <nop,nop,sack 7001:8001 4001:5001 1001:3001> + +// DSACK (1001:3001) + SACK (6001:7001) + +0 < P. 1:6001(6000) ack 1 win 1024 + +0 > . 1:1(0) ack 6001 <nop,nop,sack 1001:3001 7001:8001> + +// DSACK (7001:8001) + +0 < P. 6001:8001(2000) ack 1 win 1024 + +0 > . 1:1(0) ack 8001 <nop,nop,sack 7001:8001> + +// DSACK for an older segment. + +0 < P. 1:1001(1000) ack 1 win 1024 + +0 > . 1:1(0) ack 8001 <nop,nop,sack 1:1001> diff --git a/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt new file mode 100644 index 000000000000..643baf3267cf --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test ECN: verify that Linux TCP ECN sending code uses ECT0 (not ECT1). +// +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=1 # fully enabled +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +// ECN handshake: send EW flags in SYN packet, E flag in SYN-ACK response ++.002 ... 0.004 connect(4, ..., ...) = 0 + + +0 > SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.002 < SE. 0:0(0) ack 1 win 32767 <mss 1000,nop,wscale 6,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + +// Write 1 MSS. ++.002 write(4, ..., 1000) = 1000 +// Send 1 MSS with ect0. + +0 > [ect0] P. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt new file mode 100644 index 000000000000..f95b9b3c9fa1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. The large chunk itself should be packetized as +// usual. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write another 10040B chunk with no coalescing options. + +0 send(4, ..., 10400, MSG_EOR) = 10400 + +// Write a 2KB chunk. This chunk should not be appended to the packets created +// the previous chunk. + +0 write(4, ..., 2000) = 2000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:20801(10800) ack 1 ++.001 < . 1:1(0) ack 20801 win 514 +// This 2KB packet should be sent alone. + +0 > P. 20801:22801(2000) ack 1 ++.001 < . 1:1(0) ack 22801 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt new file mode 100644 index 000000000000..2ff66075288e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. Also, when packets are retransmitted, they +// will not be coalesce into the same skb. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write 10 400B chunks with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 +// The 9 remaining 400B chunks should be sent as individual packets. + +0 > P. 10801:11201(400) ack 1 + +0 > P. 11201:11601(400) ack 1 + +0 > P. 11601:12001(400) ack 1 + +0 > P. 12001:12401(400) ack 1 + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 + +0 > P. 13201:13601(400) ack 1 + +0 > P. 13601:14001(400) ack 1 + +0 > P. 14001:14401(400) ack 1 +// The last 10KB chunk should be sent separately. + +0 > P. 14401:24401(10000) ack 1 + ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 11201 win 514 ++.001 < . 1:1(0) ack 11601 win 514 ++.001 < . 1:1(0) ack 12001 win 514 <sack 13201:14401,nop,nop> +// TCP should fill the hole but no coalescing should happen, and all +// retransmissions should be sent out as individual packets. + +// Note : This is timeout based retransmit. +// Do not put +0 here or flakes will come back. ++.004~+.008 > P. 12001:12401(400) ack 1 + ++.001 < . 1:1(0) ack 12401 win 514 <sack 13201:14401,nop,nop> + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 ++.001 < . 1:1(0) ack 12801 win 514 <sack 13201:14401,nop,nop> ++.001 < . 1:1(0) ack 14401 win 514 ++.001 < . 1:1(0) ack 24401 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt new file mode 100644 index 000000000000..77039c5aac39 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write a 400B chunk with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 + +0 > P. 10801:20801(10000) ack 1 ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 20801 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt new file mode 100644 index 000000000000..dd5a06250595 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk even though we have 10 back-to-back small +// writes. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write 10 400B chunks with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 +// The 9 remaining 400B chunks should be sent as individual packets. + +0 > P. 10801:11201(400) ack 1 + +0 > P. 11201:11601(400) ack 1 + +0 > P. 11601:12001(400) ack 1 + +0 > P. 12001:12401(400) ack 1 + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 + +0 > P. 13201:13601(400) ack 1 + +0 > P. 13601:14001(400) ack 1 + +0 > P. 14001:14401(400) ack 1 +// The last 10KB chunk should be sent separately. + +0 > P. 14401:24401(10000) ack 1 + ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 11201 win 514 ++.001 < . 1:1(0) ack 11601 win 514 ++.001 < . 1:1(0) ack 12001 win 514 ++.001 < . 1:1(0) ack 12401 win 514 ++.001 < . 1:1(0) ack 12801 win 514 ++.001 < . 1:1(0) ack 13201 win 514 ++.001 < . 1:1(0) ack 13601 win 514 ++.001 < . 1:1(0) ack 14001 win 514 ++.001 < . 1:1(0) ack 14401 win 514 ++.001 < . 1:1(0) ack 24401 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt new file mode 100644 index 000000000000..0d3c8077e830 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. +// In this variant we test a simple case where in-flight == ssthresh +// all the way through recovery, so during fast recovery we send one segment +// for each segment SACKed/ACKed. + +// Set up config. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// RTT 100ms + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 10 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1:1001. + +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop> + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop> + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:4001,nop,nop> +// Enter fast recovery. + +0 > . 1:1001(1000) ack 1 + +.01 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd +assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh +}% + +// Write some more, which we will send 1 MSS at a time, +// as in-flight segments are SACKed or ACKed. + +.01 write(4, ..., 7000) = 7000 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:5001,nop,nop> + +0 > . 10001:11001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:6001,nop,nop> + +0 > . 11001:12001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:7001,nop,nop> + +0 > . 12001:13001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:8001,nop,nop> + +0 > . 13001:14001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:9001,nop,nop> + +0 > . 14001:15001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:10001,nop,nop> + +0 > . 15001:16001(1000) ack 1 + + +.02 < . 1:1(0) ack 10001 win 320 + +0 > P. 16001:17001(1000) ack 1 +// Leave fast recovery. + +.01 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd +assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh +}% + + +.03 < . 1:1(0) ack 12001 win 320 + +.02 < . 1:1(0) ack 14001 win 320 + +.02 < . 1:1(0) ack 16001 win 320 + +.02 < . 1:1(0) ack 17001 win 320 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt new file mode 100644 index 000000000000..7842a10b6967 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. The sender sends 20 packets. Packet +// 1 to 4, and 11 to 16 are dropped. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write 20 data segments. + +0 write(4, ..., 20000) = 20000 + +0 > P. 1:10001(10000) ack 1 + +// Receive first DUPACK, entering PRR part + +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop> + +0 > . 10001:11001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop> + +0 > . 11001:12001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop> + +0 > . 1:1001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop> + +0 > . 2001:3001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop> + +0 > . 3001:4001(1000) ack 1 +// Enter PRR CRB ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:11001,nop,nop> + +0 > . 12001:13001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:12001,nop,nop> + +0 > . 13001:14001(1000) ack 1 +// Enter PRR slow start + +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:12001,nop,nop> + +0 > P. 14001:16001(2000) ack 1 ++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:12001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 > . 16001:17001(1000) ack 1 +// inflight reaches ssthresh, goes into packet conservation mode ++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:13001,nop,nop> + +0 > . 17001:18001(1000) ack 1 ++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:14001,nop,nop> + +0 > . 18001:19001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt new file mode 100644 index 000000000000..b66d7644c3b6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. The sender sends 20 packets. Packet +// 1 to 4 are lost. The sender writes another 10 packets. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 20 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1,2,3,4 + +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop> ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop> + +0 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop> + +0 > . 1:1001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop> + +0 > . 2001:3001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop> + +0 > . 3001:4001(1000) ack 1 + +// Receiver ACKs all data. + +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:10001,nop,nop> + +0 < . 1:1(0) ack 2001 win 320 <sack 4001:10001,nop,nop> + +0 < . 1:1(0) ack 3001 win 320 <sack 4001:10001,nop,nop> + +0 < . 1:1(0) ack 10001 win 320 + +// Writes another 10 packets, which the ssthresh*mss amount +// should be sent right away + +.01 write(4, ..., 10000) = 10000 + +0 > . 10001:17001(7000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt new file mode 100644 index 000000000000..8e87bfecabb5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. +// In this variant we verify that the sender uses SACK info on an ACK +// below snd_una. + +// Set up config. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 8> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// RTT 10ms + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 10 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1:1001,4001:5001,7001:8001. + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop> + +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop> + +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001 8001:9001,nop,nop> + +0 > . 1:1001(1000) ack 1 + ++.012 < . 1:1(0) ack 4001 win 320 <sack 8001:9001,nop,nop> + +0 > . 4001:7001(3000) ack 1 + + +0 write(4, ..., 10000) = 10000 + +// The following ACK was reordered - delayed so that it arrives with +// an ACK field below snd_una. Here we check that the newly-SACKed +// 2MSS at 5001:7001 cause us to send out 2 more MSS. ++.002 < . 1:1(0) ack 3001 win 320 <sack 5001:7001,nop,nop> + +0 > . 7001:8001(1000) ack 1 + +0 > . 10001:11001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt new file mode 100644 index 000000000000..32aff9bc4052 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-cookie-not-reqd.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test TFO_SERVER_COOKIE_NOT_REQD flag on receiving +// SYN with data but without Fast Open cookie option. + +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x202` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Since TFO_SERVER_COOKIE_NOT_REQD, a TFO socket will be created with +// the data accepted. + +0 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 read(4, ..., 1024) = 1000 + +// Data After SYN will be accepted too. + +0 < . 1001:2001(1000) ack 1 win 5840 + +0 > . 1:1(0) ack 2001 + +// Should change the implementation later to set the SYN flag as well. + +0 read(4, ..., 1024) = 1000 + +0 write(4, ..., 1000) = 1000 + +0 > P. 1:1001(1000) ack 2001 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt new file mode 100644 index 000000000000..649997a58099 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-no-setsockopt.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test TFO_SERVER_WO_SOCKOPT1 without setsockopt(TCP_FASTOPEN) + +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x402` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 read(4, ..., 512) = 10 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt new file mode 100644 index 000000000000..4a00e0d994f2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-non-tfo-listener.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Server w/o TCP_FASTOPEN socket option + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,FO TFO_COOKIE> + +// Data is ignored since TCP_FASTOPEN is not set on the listener + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = -1 EAGAIN (Resource temporarily unavailable) + +// The above should block until ack comes in below. + +0 < . 1:31(30) ack 1 win 5840 + +0 accept(3, ..., ...) = 4 + + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 read(4, ..., 512) = 30 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt new file mode 100644 index 000000000000..345ed26ff7f8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-pure-syn-data.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test that TFO-enabled server would not respond SYN-ACK with any TFO option +// when receiving a pure SYN-data. It should respond a pure SYN-ack. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6> + +0 > S. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 100 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 close(3) = 0 + +// Test ECN-setup SYN with ECN disabled because this has happened in reality + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < SEW 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6> + +0 > S. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 100 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 close(3) = 0 + +// Test ECN-setup SYN w/ ECN enabled + +0 `sysctl -q net.ipv4.tcp_ecn=2` + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < SEW 999000:999040(40) win 32792 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 6> + +0 > SE. 1234:1234(0) ack 999001 <mss 1460,sackOK,TS val 100 ecr 100,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 100 + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 close(3) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt new file mode 100644 index 000000000000..98e6f84497cd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-rw.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test TFO server with SYN that has TFO cookie and data. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 read(4, ..., 512) = 10 + +0 write(4, ..., 100) = 100 + +0 > P. 1:101(100) ack 11 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt new file mode 100644 index 000000000000..95b1047ffdd5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_basic-zero-payload.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Basic TFO server test +// +// Test zero-payload packet w/ valid TFO cookie - a TFO socket will +// still be created and accepted but read() will not return until a +// later pkt with 10 byte. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +// A TFO socket is created and is writable. + +0 write(4, ..., 100) = 100 + +0 > P. 1:101(100) ack 1 + +0...0.300 read(4, ..., 512) = 10 + +.3 < P. 1:11(10) ack 1 win 5840 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt new file mode 100644 index 000000000000..f75efd51ed0c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_client-ack-dropped-then-recovery-ms-timestamps.pkt @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// A reproducer case for a TFO SYNACK RTO undo bug in: +// 794200d66273 ("tcp: undo cwnd on Fast Open spurious SYNACK retransmit") +// This sequence that tickles this bug is: +// - Fast Open server receives TFO SYN with data, sends SYNACK +// - (client receives SYNACK and sends ACK, but ACK is lost) +// - server app sends some data packets +// - (N of the first data packets are lost) +// - server receives client ACK that has a TS ECR matching first SYNACK, +// and also SACKs suggesting the first N data packets were lost +// - server performs undo of SYNACK RTO, then immediately enters recovery +// - buggy behavior in 794200d66273 then performed an undo that caused +// the connection to be in a bad state, in CA_Open with retrans_out != 0 + +// Check that outbound TS Val ticks are as we would expect with 1000 usec per +// timestamp tick: +--tcp_ts_tick_usecs=1000 + +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:1000(1000) win 65535 <mss 1012,sackOK,TS val 1000 ecr 0,wscale 7,nop,nop,nop,FO TFO_COOKIE> + +0 > S. 0:0(0) ack 1001 <mss 1460,sackOK,TS val 2000 ecr 1000,nop,wscale 8> + +0 accept(3, ..., ...) = 4 + +// Application writes more data + +.010 write(4, ..., 10000) = 10000 + +0 > P. 1:5001(5000) ack 1001 <nop,nop,TS val 2010 ecr 1000> + +0 > P. 5001:10001(5000) ack 1001 <nop,nop,TS val 2010 ecr 1000> + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +0 < . 1001:1001(0) ack 1 win 257 <TS val 1010 ecr 2000,sack 2001:5001> + +0 > P. 1:2001(2000) ack 1001 <nop,nop,TS val 2010 ecr 1010> + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd }% + + +0 < . 1001:1001(0) ack 1 win 257 <TS val 1011 ecr 2000,sack 2001:6001> + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt new file mode 100644 index 000000000000..c3cb0e8bdcf8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_experimental_option.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Test the Experimental Option +// +// SYN w/ FOEXP w/o cookie must generates SYN+ACK w/ FOEXP +// w/ a valid cookie, and the cookie must be the same one +// with one generated by IANA FO + +`./defaults.sh` + +// Request a TFO cookie by Experimental Option +// This must generate the same TFO_COOKIE + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FOEXP> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,FOEXP TFO_COOKIE> + + +0 close(3) = 0 + +// Test if FOEXP with a valid cookie creates a TFO socket + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FOEXP TFO_COOKIE> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 read(4, ..., 512) = 10 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt new file mode 100644 index 000000000000..dc09f8d9a381 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_fin-close-socket.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a FIN pkt with the ACK bit to a TFO socket. +// The socket will go to TCP_CLOSE_WAIT state and data can be +// read until the socket is closed, at which time a FIN will be sent. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// FIN is acked and the socket goes to TCP_CLOSE_WAIT state +// in tcp_fin() called from tcp_data_queue(). + +0 < F. 11:11(0) ack 1 win 32792 + +0 > . 1:1(0) ack 12 + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_CLOSE_WAIT, tcpi_state }% + + +0 read(4, ..., 512) = 10 + +0 close(4) = 0 + +0 > F. 1:1(0) ack 12 + * > F. 1:1(0) ack 12 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt new file mode 100644 index 000000000000..d5543672e2bd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_icmp-before-accept.pkt @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send an ICMP host_unreachable pkt to a pending SYN_RECV req. +// +// If it's a TFO req, the ICMP error will cause it to switch +// to TCP_CLOSE state but remains in the acceptor queue. + +--ip_version=ipv4 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// Out-of-window icmp is ignored but accounted. + +0 `nstat > /dev/null` + +0 < icmp unreachable [5000:6000(1000)] + +0 `nstat | grep TcpExtOutOfWindowIcmps > /dev/null` + +// Valid ICMP unreach. + +0 < icmp unreachable host_unreachable [0:10(10)] + +// Unlike the non-TFO case, the req is still there to be accepted. + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +// tcp_done_with_error() in tcp_v4_err() sets sk->sk_state +// to TCP_CLOSE + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + +// The 1st read will succeed and return the data in SYN + +0 read(4, ..., 512) = 10 + +// The 2nd read will fail. + +0 read(4, ..., 512) = -1 EHOSTUNREACH (No route to host) + +// But is no longer writable because it's in TCP_CLOSE state. + +0 write(4, ..., 100) = -1 EPIPE (Broken Pipe) + +// inbound pkt will trigger RST because the socket has been moved +// off the TCP hash tables. + +0 < . 1:1(0) ack 1 win 32792 + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt new file mode 100644 index 000000000000..040d5547ed80 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-after-accept.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a TFO socket after it has been accepted. +// +// First read() will return all the data and this is consistent +// with the non-TFO case. Second read will return -1 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// 1st read will return the data from SYN. +// tcp_reset() sets sk->sk_err to ECONNRESET for SYN_RECV. + +0 < R. 11:11(0) win 32792 + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + +// This one w/o ACK bit will cause the same effect. +// +0 < R 11:11(0) win 32792 +// See Step 2 in tcp_validate_incoming(). + +// found_ok_skb in tcp_recvmsg_locked() + +0 read(4, ..., 512) = 10 + +// !copied && sk->sk_err -> sock_error(sk) + +0 read(4, ..., 512) = -1 ECONNRESET (Connection reset by peer) + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt new file mode 100644 index 000000000000..7f9de6c66cbd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-before-accept.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a TFO socket before it is accepted. +// +// The socket won't go away and after it's accepted the data +// in the SYN pkt can still be read. But that's about all that +// the acceptor can do with the socket. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +// 1st read will return the data from SYN. + +0 < R. 11:11(0) win 257 + +// This one w/o ACK bit will cause the same effect. +// +0 < R 11:11(0) win 257 + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + + +0 read(4, ..., 512) = 10 + +0 read(4, ..., 512) = -1 ECONNRESET (Connection reset by peer) + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt new file mode 100644 index 000000000000..548a87701b5d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-close-with-unread-data.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a TFO socket after it is accepted. +// +// The socket will change to TCP_CLOSE state with pending data so +// write() will fail. Pending data can be still be read and close() +// won't trigger RST if data is not read +// +// 565b7b2d2e63 ("tcp: do not send reset to already closed sockets") +// https://lore.kernel.org/netdev/4C1A2502.1030502@openvz.org/ + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop, FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// tcp_done() sets sk->sk_state to TCP_CLOSE and clears tp->fastopen_rsk + +0 < R. 11:11(0) win 32792 + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + + +0 write(4, ..., 100) = -1 ECONNRESET(Connection reset by peer) + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt new file mode 100644 index 000000000000..20090bf77655 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_reset-non-tfo-socket.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Send a RST to a fully established socket with pending data before +// it is accepted. +// +// The socket with pending data won't go away and can still be accepted +// with data read. But it will be in TCP_CLOSE state. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Invalid cookie, so accept() fails. + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO aaaaaaaaaaaaaaaa,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK, FO TFO_COOKIE,nop,nop> + + +0 accept(3, ..., ...) = -1 EAGAIN (Resource temporarily unavailable) + +// Complete 3WHS and send data and RST + +0 < . 1:1(0) ack 1 win 32792 + +0 < . 1:11(10) ack 1 win 32792 + +0 < R. 11:11(0) win 32792 + +// A valid reset won't make the fully-established socket go away. +// It's just that the acceptor will get a dead, unusable socket +// in TCP_CLOSE state. + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + + +0 write(4, ..., 100) = -1 ECONNRESET(Connection reset by peer) + +0 read(4, ..., 512) = 10 + +0 read(4, ..., 512) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt new file mode 100644 index 000000000000..9f52d7de3436 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_sockopt-fastopen-key.pkt @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Test the server cookie is generated by aes64 encoding of remote and local +// IP addresses with a master key specified via sockopt TCP_FASTOPEN_KEY +// +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen_key=00000000-00000000-00000000-00000000` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +// Set a key of a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 (big endian). +// This would produce a cookie of TFO_COOKIE like many other +// tests (which the same key but set via sysctl). + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, + "\xa1\xa1\xa1\xa1\xb2\xb2\xb2\xb2\xc3\xc3\xc3\xc3\xd4\xd4\xd4\xd4", 16) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Request a valid cookie TFO_COOKIE + +0 < S 1428932:1428942(10) win 10000 <mss 1012,nop,nop,FO,sackOK,TS val 1 ecr 0,nop,wscale 7> + +0 > S. 0:0(0) ack 1428933 <mss 1460,sackOK,TS val 10000 ecr 1,nop,wscale 8,FO TFO_COOKIE,nop,nop> + +0 < . 1:1(0) ack 1 win 257 <nop,nop,TS val 2 ecr 10000> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + + +0 close(4) = 0 + +0 > F. 1:1(0) ack 1 <nop,nop,TS val 10001 ecr 2> + +0 < F. 1:1(0) ack 2 win 257 <nop,nop,TS val 3 ecr 10001> + +0 > . 2:2(0) ack 2 <nop,nop,TS val 10002 ecr 3> + + +0 close(3) = 0 + +// Restart the listener + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Test setting the key in the listen state, and produces an identical cookie + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, + "\xa1\xa1\xa1\xa1\xb2\xb2\xb2\xb2\xc3\xc3\xc3\xc3\xd4\xd4\xd4\xd4", 16) = 0 + + +0 < S 6814000:6815000(1000) win 10000 <mss 1012,nop,nop,FO TFO_COOKIE,sackOK,TS val 10 ecr 0,nop,wscale 7> + +0 > S. 0:0(0) ack 6815001 <mss 1460,sackOK,TS val 10000 ecr 10,nop,wscale 8> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 < . 1001:1001(0) ack 1 win 257 <nop,nop,TS val 12 ecr 10000> + +0 read(4, ..., 8192) = 1000 + + +0 close(4) = 0 + +0 > F. 1:1(0) ack 1001 <nop,nop,TS val 10101 ecr 12> + +0 < F. 1001:1001(0) ack 2 win 257 <nop,nop,TS val 112 ecr 10101> + +0 > . 2:2(0) ack 1002 <nop,nop,TS val 10102 ecr 112> + + +0 close(3) = 0 + +// Restart the listener + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + +// Test invalid key length (must be 16 bytes) + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, "", 0) = -1 (Invalid Argument) + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_KEY, "", 3) = -1 (Invalid Argument) + +// Previous cookie won't be accepted b/c this listener uses the global key (0-0-0-0) + +0 < S 6814000:6815000(1000) win 10000 <mss 1012,nop,nop,FO TFO_COOKIE,sackOK,TS val 10 ecr 0,nop,wscale 7> + +0 > S. 0:0(0) ack 6814001 <mss 1460,sackOK,TS val 10000 ecr 10,nop,wscale 8,FO TFO_COOKIE_ZERO,nop,nop> diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt new file mode 100644 index 000000000000..e82e06da44c9 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-listener-closed.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Close a listener socket with pending TFO child. +// This will trigger RST pkt to go out. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// RST pkt is generated for each not-yet-accepted TFO child. +// inet_csk_listen_stop() -> inet_child_forget() -> tcp_disconnect() +// -> tcp_need_reset() is true for SYN_RECV + +0 close(3) = 0 + +0 > R. 1:1(0) ack 11 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt new file mode 100644 index 000000000000..2a148bb14cbf --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-reconnect.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,nop,nop,sackOK,nop,nop,FO TFO_COOKIE> + +0 > S. 0:0(0) ack 11 win 65535 <mss 1460,nop,nop,sackOK> + +// sk->sk_state is TCP_SYN_RECV + +0 accept(3, ..., ...) = 4 + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// tcp_disconnect() sets sk->sk_state to TCP_CLOSE + +0 connect(4, AF_UNSPEC, ...) = 0 + +0 > R. 1:1(0) ack 11 win 65535 + +0 %{ assert tcpi_state == TCP_CLOSE, tcpi_state }% + +// connect() sets sk->sk_state to TCP_SYN_SENT + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation is now in progress) + +0 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 %{ assert tcpi_state == TCP_SYN_SENT, tcpi_state }% + +// tp->fastopen_rsk must be NULL + +1 > S 0:0(0) win 65535 <mss 1460,nop,nop,sackOK,nop,wscale 8> diff --git a/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt new file mode 100644 index 000000000000..09fb63f78a0e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fastopen_server_trigger-rst-unread-data-closed.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Close a TFO socket with unread data. +// This will trigger a RST pkt. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 32792 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + +0 %{ assert tcpi_state == TCP_SYN_RECV, tcpi_state }% + +// data_was_unread == true in __tcp_close() + +0 close(4) = 0 + +0 > R. 1:1(0) ack 11 diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt new file mode 100644 index 000000000000..e13f0eee9795 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_INQ and TCP_CM_INQ on the client side. + +--mss=1000 + +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Connect to the server and enable TCP_INQ. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 setsockopt(3, SOL_TCP, TCP_INQ, [1], 4) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 5792 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 7> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700> + +// Now we have 10K of data ready on the socket. + +0 < . 1:10001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 10001 <nop,nop,TS val 200 ecr 700> + +// We read 1K and we should have 9K ready to read. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 1000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=9000}]}, 0) = 1000 +// We read 9K and we should have no further data ready to read. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 9000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=0}]}, 0) = 9000 + +// Server sends more data and closes the connections. + +0 < F. 10001:20001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 20002 <nop,nop,TS val 200 ecr 700> + +// We read 10K and we should have one "fake" byte because the connection is +// closed. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 10000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=1}]}, 0) = 10000 +// Now, receive EOF. + +0 read(3, ..., 2000) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt new file mode 100644 index 000000000000..14dd5f813d50 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_INQ and TCP_CM_INQ on the server side. + +--mss=1000 + +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + +// Accept the connection and enable TCP_INQ. + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_TCP, TCP_INQ, [1], 4) = 0 + +// Now we have 10K of data ready on the socket. + +0 < . 1:10001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 10001 + +// We read 2K and we should have 8K ready to read. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 2000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=8000}]}, 0) = 2000 +// We read 8K and we should have no further data ready to read. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 8000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=0}]}, 0) = 8000 +// Client sends more data and closes the connections. + +0 < F. 10001:20001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 20002 + +// We read 10K and we should have one "fake" byte because the connection is +// closed. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 10000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=1}]}, 0) = 10000 +// Now, receive error. + +0 read(3, ..., 2000) = -1 ENOTCONN (Transport endpoint is not connected) diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt new file mode 100644 index 000000000000..96b01eb5b7a4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test RFC 3042 "Limited Transmit": "sending a new data segment in +// response to each of the first two duplicate acknowledgments that +// arrive at the sender". +// This variation tests a receiver that doesn't support SACK. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write some data, and send the initial congestion window. + +0 write(4, ..., 15000) = 15000 + +0 > P. 1:10001(10000) ack 1 + +// Limited transmit: on first dupack, send a new data segment. + +.11 < . 1:1(0) ack 1 win 320 + +0 > . 10001:11001(1000) ack 1 + +// Limited transmit: on second dupack, send a new data segment. + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 11001:12001(1000) ack 1 + +// It turned out to be reordering, not loss. +// We have one packet newly acked (1001:3001 were DUP-ACK'd) +// So we revert state back to Open. Slow start cwnd from 10 to 11 +// and send 11 - 9 = 2 packets + +.01 < . 1:1(0) ack 3001 win 320 + +0 > P. 12001:14001(2000) ack 1 + + +.02 < . 1:1(0) ack 5001 win 320 + +0 > P. 14001:15001(1000) ack 1 + +// Client gradually ACKs all data. + +.02 < . 1:1(0) ack 7001 win 320 + +.02 < . 1:1(0) ack 9001 win 320 + +.02 < . 1:1(0) ack 11001 win 320 + +.02 < . 1:1(0) ack 13001 win 320 + +.02 < . 1:1(0) ack 15001 win 320 + +// Clean up. + +.17 close(4) = 0 + +0 > F. 15001:15001(0) ack 1 + +.1 < F. 1:1(0) ack 15002 win 257 + +0 > . 15002:15002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt new file mode 100644 index 000000000000..642da51ec3a4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test RFC 3042 "Limited Transmit": "sending a new data segment in +// response to each of the first two duplicate acknowledgments that +// arrive at the sender". +// This variation tests a receiver that supports SACK. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write some data, and send the initial congestion window. + +0 write(4, ..., 15000) = 15000 + +0 > P. 1:10001(10000) ack 1 + +// Limited transmit: on first dupack, send a new data segment. + +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop> + +0 > . 10001:11001(1000) ack 1 + +// Limited transmit: on second dupack, send a new data segment. + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop> + +0 > . 11001:12001(1000) ack 1 + +// It turned out to be reordering, not loss. + +.01 < . 1:1(0) ack 3001 win 320 + +0 > P. 12001:14001(2000) ack 1 + + +.02 < . 1:1(0) ack 5001 win 320 + +0 > P. 14001:15001(1000) ack 1 + +// Client gradually ACKs all data. + +.02 < . 1:1(0) ack 7001 win 320 + +.02 < . 1:1(0) ack 9001 win 320 + +.02 < . 1:1(0) ack 11001 win 320 + +.02 < . 1:1(0) ack 13001 win 320 + +.02 < . 1:1(0) ack 15001 win 320 + +// Clean up. + +.17 close(4) = 0 + +0 > F. 15001:15001(0) ack 1 + +.1 < F. 1:1(0) ack 15002 win 257 + +0 > . 15002:15002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt new file mode 100644 index 000000000000..25dfef95d3f8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test what happens when client does not provide MD5 on SYN, +// but then does on the ACK that completes the three-way handshake. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// Ooh, weird: client provides MD5 option on the ACK: + +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop> + +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop> + +// The TCP listener refcount should be 2, but on buggy kernels it can be 0: + +0 `grep " 0A " /proc/net/tcp /proc/net/tcp6 | grep ":1F90"` + +// Now here comes the legit ACK: + +.01 < . 1:1(0) ack 1 win 514 + +// Make sure the connection is OK: + +0 accept(3, ..., ...) = 4 + + +.01 write(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt new file mode 100644 index 000000000000..7adae7a9ef4a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +// This is a test inspired by an Android client app using SSL. This +// test verifies using TCP_NODELAY would save application latency +// (Perhaps even better with TCP_NAGLE). +// +`./defaults.sh +ethtool -K tun0 tso off gso off +./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + + +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < S. 0:0(0) ack 1 win 5792 <mss 974,nop,nop,sackOK,nop,wscale 7> + +0 > . 1:1(0) ack 1 + +// SSL handshake (resumed session) + +0 write(4, ..., 517) = 517 + +0 > P. 1:518(517) ack 1 + +.1 < . 1:1(0) ack 518 win 229 + + +0 < P. 1:144(143) ack 1 win 229 + +0 > . 518:518(0) ack 144 + +0 read(4, ..., 1000) = 143 + +// Application POST header (51B) and body (2002B) + +0 write(4, ..., 51) = 51 + +0 > P. 518:569(51) ack 144 + +.03 write(4, ..., 2002) = 2002 + +0 > . 569:1543(974) ack 144 + +0 > P. 1543:2517(974) ack 144 +// Without disabling Nagle, this packet will not happen until the remote ACK. + +0 > P. 2517:2571(54) ack 144 + + +.1 < . 1:1(0) ack 2571 win 229 + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt new file mode 100644 index 000000000000..fa9c01813996 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test the MSG_MORE flag will correctly corks the tiny writes +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 +// Disable Nagle by default on this socket. + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +// Test the basic case: MSG_MORE overwrites TCP_NODELAY and enables Nagle. + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 40}], msg_flags=0}, MSG_MORE) = 40 + +.21~+.215 > P. 1:41(40) ack 1 + +.01 < . 1:1(0) ack 41 win 257 + +// Test unsetting MSG_MORE releases the packet + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 100}], msg_flags=0}, MSG_MORE) = 100 ++.005 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 160}], msg_flags=0}, MSG_MORE) = 160 + +.01 sendmsg(4, {msg_name(...)=..., + msg_iov(3)=[{..., 100}, {..., 200}, {..., 195}], + msg_flags=0}, MSG_MORE) = 495 ++.008 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 5}], msg_flags=0}, 0) = 5 + +0 > P. 41:801(760) ack 1 + +.02 < . 1:1(0) ack 801 win 257 + + +// Test >MSS write will unleash MSS packets but hold on the remaining data. + +.1 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 3100}], msg_flags=0}, MSG_MORE) = 3100 + +0 > . 801:3801(3000) ack 1 ++.003 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 50}], msg_flags=0}, MSG_MORE) = 50 + + +.01 < . 1:1(0) ack 2801 win 257 +// Err... we relase the remaining right after the ACK? note that PUSH is reset + +0 > . 3801:3951(150) ack 1 + +// Test we'll hold on the subsequent writes when inflight (3801:3951) > 0 ++.001 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 1}], msg_flags=0}, MSG_MORE) = 1 ++.002 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 2}], msg_flags=0}, MSG_MORE) = 2 ++.003 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 3}], msg_flags=0}, MSG_MORE) = 3 ++.004 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 4}], msg_flags=0}, MSG_MORE) = 4 + +.02 < . 1:1(0) ack 3951 win 257 + +0 > . 3951:3961(10) ack 1 + +.02 < . 1:1(0) ack 3961 win 257 + + +// Test the case a MSG_MORE send followed by a write flushes the data + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 20}], msg_flags=0}, MSG_MORE) = 20 + +.05 write(4, ..., 20) = 20 + +0 > P. 3961:4001(40) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt new file mode 100644 index 000000000000..0ddec5f7dc1a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_CORK and TCP_NODELAY sockopt behavior +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 +// Set TCP_CORK sockopt to hold small packets + +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0 + + +0 write(4, ..., 40) = 40 + +.05 write(4, ..., 40) = 40 + +// Unset TCP_CORK should push pending bytes out + +.01 setsockopt(4, SOL_TCP, TCP_CORK, [0], 4) = 0 + +0 > P. 1:81(80) ack 1 + +.01 < . 1:1(0) ack 81 win 257 + +// Set TCP_CORK sockopt to hold small packets + +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0 + + +0 write(4, ..., 40) = 40 + +.05 write(4, ..., 40) = 40 + +// Set TCP_NODELAY sockopt should push pending bytes out + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +0 > P. 81:161(80) ack 1 + +.01 < . 1:1(0) ack 161 win 257 + +// Set MSG_MORE to hold small packets + +0 send(4, ..., 40, MSG_MORE) = 40 + +.05 send(4, ..., 40, MSG_MORE) = 40 + +// Set TCP_NODELAY sockopt should push pending bytes out + +.01 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +0 > . 161:241(80) ack 1 + +.01 < . 1:1(0) ack 241 win 257 diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt new file mode 100644 index 000000000000..09aabc775e80 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ooo-before-and-after-accept.pkt @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"` + +// Test that a not-yet-accepted socket does not change +// its initial sk_rcvbuf (tcp_rmem[1]) when receiving ooo packets. + + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001> + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001> + +0 < . 1:1001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 1001 <nop,nop,sack 2001:101001> + +0 < . 1001:2001(1000) ack 1 win 257 + +0 > . 1:1(0) ack 101001 + + +0 accept(3, ..., ...) = 4 + + +0 %{ assert SK_MEMINFO_RCVBUF == 131072, SK_MEMINFO_RCVBUF }% + + +0 close(4) = 0 + +0 close(3) = 0 + +// Test that ooo packets for accepted sockets do increase sk_rcvbuf + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < . 2001:41001(39000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:41001> + +0 < . 41001:101001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 1 <nop,nop,sack 2001:101001> + + +0 %{ assert SK_MEMINFO_RCVBUF > 131072, SK_MEMINFO_RCVBUF }% + diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt new file mode 100644 index 000000000000..7e6bc5fb0c8d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"` + + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < . 2001:11001(9000) ack 1 win 257 + +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:11001> + +// check that ooo packet properly updates tcpi_rcv_mss + +0 %{ assert tcpi_rcv_mss == 1000, tcpi_rcv_mss }% + + +0 < . 11001:21001(10000) ack 1 win 257 + +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:21001> + diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt new file mode 100644 index 000000000000..3848b419e68c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + + 0 `nstat -n` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:4001(4000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +0 < P. 4001:54001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +1 < P. 5001:55001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +0 < P. 70001:80001(10000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + + +0 read(4, ..., 100000) = 4000 + +// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd + +0 < P. 4001:54001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 54001 win 0 + +// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times. ++0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "` diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt new file mode 100644 index 000000000000..f575c0ff89da --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + + 0 `nstat -n` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:20001(20000) ack 1 win 257 + +.04 > . 1:1(0) ack 20001 win 18000 + + +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0 + +0 < P. 20001:80001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 20001 win 18000 + + +0 read(4, ..., 20000) = 20000 +// A too big packet is accepted if the receive queue is empty + +0 < P. 20001:80001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 80001 win 0 + diff --git a/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt new file mode 100644 index 000000000000..47550df124ce --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rto_synack_rto_max.pkt @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Test SYN+ACK RTX with 1s RTO. +// +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_rto_max_ms=1000` + +// +// Test 1: TFO SYN+ACK +// + 0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [1], 4) = 0 + + +0 < S 0:10(10) win 1000 <mss 1460,sackOK,nop,nop,FO TFO_COOKIE,nop,nop> + +0 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +// RTO must be capped to 1s + +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + +1 > S. 0:0(0) ack 11 <mss 1460,nop,nop,sackOK> + + +0 < . 11:11(0) ack 1 win 1000 <mss 1460,nop,nop,sackOK> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) != 0, tcpi_options }% + + +0 close(4) = 0 + +0 close(3) = 0 + + +// +// Test 2: non-TFO SYN+ACK +// + +0 socket(..., SOCK_STREAM|SOCK_NONBLOCK, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 1000 <mss 1460,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +// RTO must be capped to 1s + +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +1 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + + +0 < . 1:1(0) ack 1 win 1000 <mss 1460,nop,nop,sackOK> + +0 accept(3, ..., ...) = 4 + +0 %{ assert (tcpi_options & TCPI_OPT_SYN_DATA) == 0, tcpi_options }% + + +0 close(4) = 0 + +0 close(3) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt new file mode 100644 index 000000000000..310ef31518da --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify that setsockopt calls that force a route refresh do not +// cause problems matching SACKs with packets in the write queue. +// This variant tests IP_TOS. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_IP, IP_MTU_DISCOVER, [IP_PMTUDISC_DONT], 1) = 0 + +0...0.010 connect(3, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 65535 <mss 1460,nop,wscale 2,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + + +.01 write(3, ..., 5840) = 5840 + +0 > P. 1:5841(5840) ack 1 + +.01 < . 1:1(0) ack 5841 win 65535 + + +.01 write(3, ..., 5840) = 5840 + +0 > P. 5841:11681(5840) ack 1 + +.01 < . 1:1(0) ack 11681 win 65535 + + +.01 write(3, ..., 14600) = 14600 + +0 > P. 11681:26281(14600) ack 1 + +// Try the socket option that we know can force a route refresh. + +0 setsockopt(3, SOL_IP, IP_TOS, [4], 1) = 0 +// Then revert to avoid routing/mangling/etc implications of that setting. + +0 setsockopt(3, SOL_IP, IP_TOS, [0], 1) = 0 + +// Verify that we do not retransmit the SACKed segments. + +.01 < . 1:1(0) ack 13141 win 65535 <sack 16061:17521 20441:26281,nop,nop> + +0 > . 13141:16061(2920) ack 1 + +0 > P. 17521:20441(2920) ack 1 + +.01 < . 1:1(0) ack 26281 win 65535 diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt new file mode 100644 index 000000000000..f185e1ac57ea --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests non-FACK SACK with SACKs coming in the order +// 2 6 8 3 9, to test what happens when we get a new SACKed range +// (for packet 3) that is on the right of an existing SACKed range +// (for packet 2). + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + + +.1 < . 1:1(0) ack 1 win 257 <sack 2001:3001,nop,nop> ++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001,nop,nop> ++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001 8001:9001,nop,nop> + +// 3 SACKed packets, so we enter Fast Recovery. + +0 > . 1:1001(1000) ack 1 + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_lost == 6, tcpi_lost }% + +// SACK for 3001:4001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. ++.007 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:9001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +0 %{ assert tcpi_reordering == 6, tcpi_reordering }% // 8001:9001 -> 3001:4001 is 6 + +// SACK for 9001:10001. + +.01 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop> + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +// ACK for 1:1001 as packets from t=0.303 arrive. ++.083 < . 1:1(0) ack 1001 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop> + +0 %{ assert tcpi_lost == 4,tcpi_lost }% + +// ACK for 1:4001 as packets from t=0.310 arrive. ++.017 < . 1:1(0) ack 4001 win 257 <sack 6001:7001 8001:10001,nop,nop> + +0 %{ assert tcpi_lost == 3,tcpi_lost }% + +// ACK for 1:7001 as packets from t=0.320 arrive. + +.01 < . 1:1(0) ack 7001 win 257 <sack 8001:10001,nop,nop> + +// ACK for all data as packets from t=0.403 arrive. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt new file mode 100644 index 000000000000..0093b4973934 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests the case where we mark packets 0-4 lost, then +// get a SACK for 3, and then a SACK for 4. + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// SACK for 7001:8001. Using RACK we delay the fast retransmit. + +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop> +// RACK reordering timer ++.027 > . 1:1001(1000) ack 1 + +0 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_lost == 7, tcpi_lost # RACK thinks 1:7001 are lost +assert tcpi_reordering == 3, tcpi_reordering +}% + +// SACK for 3001:4001. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:4001 7001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 %{ +assert tcpi_lost == 6, tcpi_lost # since 3001:4001 is no longer lost +assert tcpi_reordering == 5, tcpi_reordering # 7001:8001 -> 3001:4001 +}% + +// SACK for 4001:5001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. +// It uses the RFC3517 algorithm to mark 1:3001 lost +// because >=3 higher-sequence packets are SACKed. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:8001,nop,nop> + +0 > . 2001:3001(1000) ack 1 + +0 %{ +assert tcpi_lost == 5,tcpi_lost # SACK/RFC3517 thinks 1:3001 are lost +}% + +// SACK for 8001:9001. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:9001,nop,nop> + +// SACK for 9001:10001. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:10001,nop,nop> + +0 > . 5001:6001(1000) ack 1 + +// To simplify clean-up, say we get an ACK for all data. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt new file mode 100644 index 000000000000..980a832dc81c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests the case where we mark packets 0-4 lost, then +// get a SACK for 5, and then a SACK for 6. + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// SACK for 7001:8001. Using RACK we delay a fast retransmit. + +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop> ++.027 > . 1:1001(1000) ack 1 + +0 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_lost == 7,tcpi_lost # RACK thinks 1:7001 are lost +assert tcpi_reordering == 3, tcpi_reordering +}% + +// SACK for 5001:6001. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:6001 7001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 %{ +assert tcpi_lost == 6, tcpi_lost +assert tcpi_reordering == 3, tcpi_reordering # 7001:8001 -> 5001:6001 is 3 +}% + +// SACK for 6001:7001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:8001,nop,nop> + +0 > . 2001:3001(1000) ack 1 + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +// SACK for 8001:9001. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:9001,nop,nop> + +0 > . 3001:4001(1000) ack 1 + +// SACK for 9001:10001. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:10001,nop,nop> + +0 > . 4001:5001(1000) ack 1 + +// To simplify clean-up, say we get an ACK for all data. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt new file mode 100644 index 000000000000..6740859a1360 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// Simplest possible test of open() and then sendfile(). +// We write some zeroes into a file (since packetdrill expects payloads +// to be all zeroes) and then open() the file, then use sendfile() +// and verify that the correct number of zeroes goes out. + +`./defaults.sh +/bin/rm -f /tmp/testfile +/bin/dd bs=1 count=5 if=/dev/zero of=/tmp/testfile status=none +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + + +0 open("/tmp/testfile", O_RDONLY) = 5 + +0 sendfile(4, 5, [0], 5) = 5 + +0 > P. 1:6(5) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt new file mode 100644 index 000000000000..795c476d222d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow. +// In this variant, the receiver ACKs every packet. + +// Set up config. To keep things simple, disable the +// mechanism that defers sending in order to send bigger TSO packets. +`./defaults.sh +sysctl -q net.ipv4.tcp_tso_win_divisor=100` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 30000) = 30000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 1001 win 257 + +0 > P. 10001:12001(2000) ack 1 + + +0 < . 1:1(0) ack 2001 win 257 + +0 > P. 12001:14001(2000) ack 1 + ++.005 < . 1:1(0) ack 3001 win 257 + +0 > P. 14001:16001(2000) ack 1 + + +0 < . 1:1(0) ack 4001 win 257 + +0 > P. 16001:18001(2000) ack 1 + ++.005 < . 1:1(0) ack 5001 win 257 + +0 > P. 18001:20001(2000) ack 1 + + +0 < . 1:1(0) ack 6001 win 257 + +0 > P. 20001:22001(2000) ack 1 + ++.005 < . 1:1(0) ack 7001 win 257 + +0 > P. 22001:24001(2000) ack 1 + + +0 < . 1:1(0) ack 8001 win 257 + +0 > P. 24001:26001(2000) ack 1 + ++.005 < . 1:1(0) ack 9001 win 257 + +0 > P. 26001:28001(2000) ack 1 + + +0 < . 1:1(0) ack 10001 win 257 + +0 > P. 28001:30001(2000) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt new file mode 100644 index 000000000000..9212ae1fd0f2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when an outstanding flight of packets is +// less than the current cwnd, and not big enough to bump up cwnd. +// +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +// Only send 5 packets. + +0 write(4, ..., 5000) = 5000 + +0 > P. 1:5001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 5001 win 257 + +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt new file mode 100644 index 000000000000..416c901ddf51 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when an outstanding flight of packets is +// less than the current cwnd, but still big enough that in slow +// start we want to increase our cwnd a little. +// +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +// Only send 6 packets. + +0 write(4, ..., 6000) = 6000 + +0 > P. 1:6001(6000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 6001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt new file mode 100644 index 000000000000..a894b7d4559c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow. +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Set up config. To keep things simple, disable the +// mechanism that defers sending in order to send bigger TSO packets. +`./defaults.sh +sysctl -q net.ipv4.tcp_tso_win_divisor=100` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 30000) = 30000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2001 win 257 + +0 > P. 10001:14001(4000) ack 1 + ++.005 < . 1:1(0) ack 4001 win 257 + +0 > P. 14001:18001(4000) ack 1 + ++.005 < . 1:1(0) ack 6001 win 257 + +0 > P. 18001:22001(4000) ack 1 + ++.005 < . 1:1(0) ack 8001 win 257 + +0 > P. 22001:26001(4000) ack 1 + ++.005 < . 1:1(0) ack 10001 win 257 + +0 > P. 26001:30001(4000) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt new file mode 100644 index 000000000000..065fae9e9abd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow. +// In this variant, the receiver sends one ACK per 4 packets. + +// Set up config. To keep things simple, disable the +// mechanism that defers sending in order to send bigger TSO packets. +`./defaults.sh +sysctl -q net.ipv4.tcp_tso_win_divisor=100` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 30000) = 30000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +.11 < . 1:1(0) ack 4001 win 257 + +0 > P. 10001:18001(8000) ack 1 + + +.01 < . 1:1(0) ack 8001 win 257 + +0 > P. 18001:26001(8000) ack 1 + ++.005 < . 1:1(0) ack 10001 win 257 + +0 > P. 26001:30001(4000) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt new file mode 100644 index 000000000000..11b213be1138 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start after idle +// This test expects tso size to be at least initial cwnd * mss + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \ + /proc/sys/net/ipv4/tcp_min_tso_segs=10` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 511 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 26000) = 26000 + +0 > P. 1:5001(5000) ack 1 + +0 > P. 5001:10001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +.1 < . 1:1(0) ack 10001 win 511 + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% + +0 > P. 10001:20001(10000) ack 1 + +0 > P. 20001:26001(6000) ack 1 + + +.1 < . 1:1(0) ack 26001 win 511 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + + +2 write(4, ..., 20000) = 20000 +// If slow start after idle works properly, we should send 5 MSS here (cwnd/2) + +0 > P. 26001:31001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt new file mode 100644 index 000000000000..577ed8c8852c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start after window update +// This test expects tso size to be at least initial cwnd * mss + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \ + /proc/sys/net/ipv4/tcp_min_tso_segs=10` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 511 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 26000) = 26000 + +0 > P. 1:5001(5000) ack 1 + +0 > P. 5001:10001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +.1 < . 1:1(0) ack 10001 win 511 + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% + +0 > P. 10001:20001(10000) ack 1 + +0 > P. 20001:26001(6000) ack 1 + + +.1 < . 1:1(0) ack 26001 win 0 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + + +0 write(4, ..., 20000) = 20000 +// 1st win0 probe ++.3~+.310 > . 26000:26000(0) ack 1 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + +// 2nd win0 probe ++.6~+.620 > . 26000:26000(0) ack 1 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + +// 3rd win0 probe ++1.2~+1.240 > . 26000:26000(0) ack 1 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + + +.9 < . 1:1(0) ack 26001 win 511 + +0 > P. 26001:31001(5000) ack 1 + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt new file mode 100644 index 000000000000..869f32c35a2a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when application-limited: in this case, +// with IW10, if we don't fully use our cwnd but instead +// send just 9 packets, then cwnd should grow to twice that +// value, or 18 packets. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 9000) = 9000 + +0 > P. 1:9001(9000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 6001 win 257 + +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 8001 win 257 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 9001 win 257 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt new file mode 100644 index 000000000000..0f77b7955db6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when application-limited: in this case, +// with IW10, if we send exactly 10 packets then cwnd should grow to 20. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 6001 win 257 + +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 8001 win 257 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 10001 win 257 + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt new file mode 100644 index 000000000000..7e9c83d617c2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow, even if TSQ triggers. +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Note we use FQ/pacing to check if TCP Small Queues is not hurting + +`./defaults.sh +tc qdisc replace dev tun0 root fq +sysctl -q net/ipv4/tcp_pacing_ss_ratio=200 +sysctl -e -q net.ipv4.tcp_min_tso_segs=2` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 500 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 40000) = 40000 +// This might change if we cook the initial packet with 10 MSS. + +0 > P. 1:2921(2920) ack 1 + +0 > P. 2921:5841(2920) ack 1 + +0 > P. 5841:8761(2920) ack 1 + +0 > P. 8761:11681(2920) ack 1 + +0 > P. 11681:14601(2920) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2921 win 500 + +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }% + +// Note: after this commit : "net_sched: sch_fq: account for schedule/timers drifts" +// FQ notices that this packet missed the 'time to send next packet' computed +// when prior packet (11681:14601(2920)) was sent. +// So FQ will allow following packet to be sent a bit earlier (quantum/2) +// (FQ commit allows an application/cwnd limited flow to get at most quantum/2 extra credit) + +0 > P. 14601:17521(2920) ack 1 + ++.003 < . 1:1(0) ack 5841 win 500 + +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }% + ++.001 > P. 17521:20441(2920) ack 1 + ++.001 < . 1:1(0) ack 8761 win 500 + +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }% + +// remaining packets are delivered at a constant rate. ++.007 > P. 20441:23361(2920) ack 1 + ++.002 < . 1:1(0) ack 11681 win 500 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% ++.001 < . 1:1(0) ack 14601 win 500 + ++.004 > P. 23361:26281(2920) ack 1 + ++.007 > P. 26281:29201(2920) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, 'cwnd=%d' % tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt new file mode 100644 index 000000000000..0cbd43253236 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh` + +// Initialize a server socket + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_IP, IP_FREEBIND, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Connection should get accepted + +0 < S 0:0(0) win 32972 <mss 1460,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <...> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + + +0 pipe([5, 6]) = 0 + +0 < U. 1:101(100) ack 1 win 257 urg 100 + +0 splice(4, NULL, 6, NULL, 99, 0) = 99 + +0 splice(4, NULL, 6, NULL, 1, 0) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt new file mode 100644 index 000000000000..8940726a3ec2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP fastopen behavior with NULL as buffer pointer, but a non-zero +// buffer length. +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + +// Cache warmup: send a Fast Open cookie request + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation is now in progress) ++0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO,nop,nop> ++0 < S. 123:123(0) ack 1 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6,FO abcd1234,nop,nop> ++0 > . 1:1(0) ack 1 ++0 close(3) = 0 ++0 > F. 1:1(0) ack 1 ++0 < F. 1:1(0) ack 2 win 92 ++0 > . 2:2(0) ack 2 + +// Test with MSG_FASTOPEN without TCP_FASTOPEN_CONNECT. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 sendto(4, NULL, 1, MSG_FASTOPEN, ..., ...) = -1 ++0 close(4) = 0 + +// Test with TCP_FASTOPEN_CONNECT without MSG_FASTOPEN. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5 ++0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(5, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(5, ..., ...) = 0 ++0 sendto(5, NULL, 1, 0, ..., ...) = -1 ++0 close(5) = 0 + +// Test with both TCP_FASTOPEN_CONNECT and MSG_FASTOPEN. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 6 ++0 fcntl(6, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(6, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(6, ..., ...) = 0 ++0 sendto(6, NULL, 1, MSG_FASTOPEN, ..., ...) = -1 ++0 close(6) = 0 + +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt new file mode 100644 index 000000000000..454441e7ecff --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we correctly skip zero-length IOVs. + +--send_omit_free // do not reuse send buffers with zerocopy + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 40}, {..., 0}, {..., 20}], + msg_flags=0}, 0) = 60 + +0 > P. 1:61(60) ack 1 + +.01 < . 1:1(0) ack 61 win 257 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 0}, {..., 0}, {..., 0}], + msg_flags=0}, MSG_ZEROCOPY) = 0 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 10}, {..., 0}, {..., 50}], + msg_flags=0}, MSG_ZEROCOPY) = 60 + +0 > P. 61:121(60) ack 1 + +.01 < . 1:1(0) ack 121 win 257 diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt new file mode 100644 index 000000000000..59f5903f285c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test kernel behavior with NULL as buffer pointer + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.2 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + + +0 write(4, NULL, 1000) = -1 EFAULT (Bad address) + +0 send(4, NULL, 1000, 0) = -1 EFAULT (Bad address) + +0 sendto(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address) + + +0 < . 1:1001(1000) ack 1 win 200 + +0 read(4, NULL, 1000) = -1 EFAULT (Bad address) + +0 recv(4, NULL, 1000, 0) = -1 EFAULT (Bad address) + +0 recvfrom(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address) diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt new file mode 100644 index 000000000000..d7fdb43a8e89 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tcpi_last_data_recv for active session +`./defaults.sh` + +// Create a socket and set it to non-blocking. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + ++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) ++0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.030 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8> ++0 > . 1:1(0) ack 1 + ++1 %{ assert 990 <= tcpi_last_data_recv <= 1010, tcpi_last_data_recv }% + ++0 < . 1:1001(1000) ack 1 win 300 ++0 > . 1:1(0) ack 1001 + ++0 %{ assert tcpi_last_data_recv <= 10, tcpi_last_data_recv }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt new file mode 100644 index 000000000000..a9bcd46f6cb6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test rwnd limited time in tcp_info for client side. + +`./defaults.sh` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +// Server advertises 0 receive window. + +.01 < S. 0:0(0) ack 1 win 0 <mss 1000,nop,nop,sackOK> + + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + +// Make sure that initial rwnd limited time is 0. + +0 %{ assert tcpi_rwnd_limited == 0, tcpi_rwnd_limited }% + +// Receive window limited time starts here. + +0 write(3, ..., 1000) = 1000 + +// Check that rwnd limited time in tcp_info is around 0.1s. + +.1 %{ assert 98000 <= tcpi_rwnd_limited <= 110000, tcpi_rwnd_limited }% + +// Server opens the receive window. + +.1 < . 1:1(0) ack 1 win 2000 + +// Check that rwnd limited time in tcp_info is around 0.2s. + +0 %{ assert 198000 <= tcpi_rwnd_limited <= 210000, tcpi_rwnd_limited }% + + +0 > P. 1:1001(1000) ack 1 + +// Server advertises a very small receive window. + +.03 < . 1:1(0) ack 1001 win 10 + +// Receive window limited time starts again. + +0 write(3, ..., 1000) = 1000 + +// Server opens the receive window again. + +.1 < . 1:1(0) ack 1001 win 2000 +// Check that rwnd limited time in tcp_info is around 0.3s +// and busy time is 0.3 + 0.03 (server opened small window temporarily). + +0 %{ assert 298000 <= tcpi_rwnd_limited <= 310000, tcpi_rwnd_limited;\ + assert 328000 <= tcpi_busy_time <= 340000, tcpi_busy_time;\ +}% + + +0 > P. 1001:2001(1000) ack 1 + +.02 < . 1:1(0) ack 2001 win 2000 + +0 %{ assert 348000 <= tcpi_busy_time <= 360000, tcpi_busy_time }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt new file mode 100644 index 000000000000..f0de2acd0f8e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test send-buffer-limited time in tcp_info for client side. +`./defaults.sh` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8> + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [10000], 4) = 0 + +0 getsockopt(3, SOL_SOCKET, SO_SNDBUF, [20000], [4]) = 0 + + +.09...0.14 write(3, ..., 150000) = 150000 + + +.01 < . 1:1(0) ack 10001 win 10000 + + +.01 < . 1:1(0) ack 30001 win 10000 + +// cwnd goes from 40(60KB) to 80(120KB), and that we hit the tiny sndbuf limit 10KB + +.01 < . 1:1(0) ack 70001 win 10000 + + +.02 < . 1:1(0) ack 95001 win 10000 + +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited; \ + assert 49000 <= tcpi_busy_time <= 52000, tcpi_busy_time; \ + assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }% + +// This ack frees up enough buffer so we are no longer +// buffer limited (socket flag SOCK_NOSPACE is cleared) + +.02 < . 1:1(0) ack 150001 win 10000 + +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited;\ + assert 69000 <= tcpi_busy_time <= 73000, tcpi_busy_time;\ + assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt new file mode 100644 index 000000000000..2087ec0c746a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that tx timestamping sends timestamps only for +// the last byte of each sendmsg. +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Establish connection and verify that there was no error. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 20000 <mss 1000,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + + +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + + +0 write(3, ..., 11000) = 11000 + +0 > P. 1:10001(10000) ack 1 + +.01 < . 1:1(0) ack 10001 win 4000 + +0 > P. 10001:11001(1000) ack 1 + +.01 < . 1:1(0) ack 11001 win 4000 + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the last byte should be received almost immediately +// once 10001 is acked at t=20ms. +// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...) +// is called after when SYN is acked. So, we expect the last byte of the first +// chunk to have a timestamp key of 10999 (i.e., 11000 - 1). + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the last byte should be received almost immediately +// once 10001 is acked at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the last byte should be received at t=30ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt new file mode 100644 index 000000000000..876024a31110 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tx timestamping for partial writes (IPv4). +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Establish connection and verify that there was no error. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 2000 <mss 1000,sackOK,TS val 700 ecr 100,nop,wscale 7> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700> + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [1000], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + +// We have a partial write. + +0 write(3, ..., 10000) = 2964 + +0 > . 1:989(988) ack 1 <nop,nop,TS val 110 ecr 700> + +0 > P. 989:1977(988) ack 1 <nop,nop,TS val 110 ecr 700> + +.01 < . 1:1(0) ack 1977 win 92 <nop,nop,TS val 800 ecr 200> + +0 > P. 1977:2965(988) ack 1 <nop,nop,TS val 114 ecr 800> + +.01 < . 1:1(0) ack 2965 win 92 <nop,nop,TS val 800 ecr 200> + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately +// after the first ack at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the first chunk should be received almost immediately +// after the first ack at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the first chunk should be received after the last ack at +// t=30ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt new file mode 100644 index 000000000000..84d94780e6be --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tx timestamping for server-side (IPv4). +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + +// Write two 2KB chunks. +// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...) +// is called after when SYN is acked. So, we expect the last byte of the first +// and the second chunks to have timestamp keys of 1999 (i.e., 2000 - 1) and +// 3999 (i.e., 4000 - 1) respectively. + +0 write(4, ..., 2000) = 2000 + +0 write(4, ..., 2000) = 2000 + +0 > P. 1:2001(2000) ack 1 + +0 > P. 2001:4001(2000) ack 1 + +.01 < . 1:1(0) ack 2001 win 514 + +.01 < . 1:1(0) ack 4001 win 514 + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately +// after write at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the first chunk should be received almost immediately +// after write at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SCHED for the second chunk should be received almost immediately +// after that at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the second chunk should be received almost immediately +// after that at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the first chunk should be received at t=20ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the second chunk should be received at t=30ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt new file mode 100644 index 000000000000..e61424a7bd0a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we send FIN packet with correct TSval +--tcp_ts_tick_usecs=1000 +--tolerance_usecs=7000 + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0> + +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100> + +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100> + +0 accept(3, ..., ...) = 4 + + +1 close(4) = 0 +// Check that FIN TSval is updated properly, one second has passed since last sent packet. + +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1200 ecr 200> diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt new file mode 100644 index 000000000000..174ce9a1bfc0 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we reject TS val updates on a packet with invalid ACK sequence + +`./defaults.sh +` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +.1 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0> + +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100> + +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100> + +0 accept(3, ..., ...) = 4 + +// bad packet with high tsval (its ACK sequence is above our sndnxt) + +0 < F. 1:1(0) ack 9999 win 20000 <nop,nop,TS val 200000 ecr 100> + + + +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100> + +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201> diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt new file mode 100644 index 000000000000..2e3b3bb7493a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we send RST packet with correct TSval +--tcp_ts_tick_usecs=1000 + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0> + +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100> + +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100> + +0 accept(3, ..., ...) = 4 + + +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100> + +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201> + + +1 close(4) = 0 +// Check that RST TSval is updated properly, one second has passed since last sent packet. + +0 > R. 1:1(0) ack 1001 <nop,nop,TS val 1200 ecr 201> diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt new file mode 100644 index 000000000000..6882b8240a8a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + + +0 < S 0:0(0) win 0 <mss 1460> + +0 > S. 0:0(0) ack 1 <mss 1460> + + +.1 < . 1:1(0) ack 1 win 65530 + +0 accept(3, ..., ...) = 4 + + +0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +0 write(4, ..., 24) = 24 + +0 > P. 1:25(24) ack 1 + +.1 < . 1:1(0) ack 25 win 65530 + +0 %{ assert tcpi_probes == 0, tcpi_probes; \ + assert tcpi_backoff == 0, tcpi_backoff }% + +// install a qdisc dropping all packets + +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0` + + +0 write(4, ..., 24) = 24 + // When qdisc is congested we retry every 500ms + // (TCP_RESOURCE_PROBE_INTERVAL) and therefore + // we retry 6 times before hitting 3s timeout. + // First verify that the connection is alive: ++3 write(4, ..., 24) = 24 + + // Now verify that shortly after that the socket is dead: ++1 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out) + + +0 %{ assert tcpi_probes == 6, tcpi_probes; \ + assert tcpi_backoff == 0, tcpi_backoff }% + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt new file mode 100644 index 000000000000..2efe02bfba9c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +.1 < . 1:1(0) ack 1 win 32792 + + + +0 accept(3, ..., ...) = 4 + +// Okay, we received nothing, and decide to close this idle socket. +// We set TCP_USER_TIMEOUT to 3 seconds because really it is not worth +// trying hard to cleanly close this flow, at the price of keeping +// a TCP structure in kernel for about 1 minute ! + +2 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +0 close(4) = 0 + + +0 > F. 1:1(0) ack 1 + +.3~+.400 > F. 1:1(0) ack 1 + +.3~+.400 > F. 1:1(0) ack 1 + +.6~+.800 > F. 1:1(0) ack 1 + +// We finally receive something from the peer, but it is way too late +// Our socket vanished because TCP_USER_TIMEOUT was really small + +0 < . 1:2(1) ack 1 win 32792 + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt new file mode 100644 index 000000000000..8bd60226ccfc --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify that established connections drop a segment without the ACK flag set. + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 <mss 1000,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +.01 < . 1:1(0) ack 1 win 20000 + +0 accept(3, ..., ...) = 4 + +// Receive a segment with no flags set, verify that it's not enqueued. + +.01 < - 1:1001(1000) win 20000 + +0 ioctl(4, SIOCINQ, [0]) = 0 + +// Receive a segment with ACK flag set, verify that it is enqueued. + +.01 < . 1:1001(1000) ack 1 win 20000 + +0 ioctl(4, SIOCINQ, [1000]) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt new file mode 100644 index 000000000000..0a0700afdaa3 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +// basic zerocopy test: +// +// send a packet with MSG_ZEROCOPY and receive the notification ID +// repeat and verify IDs are consecutive + +--send_omit_free // do not reuse send buffers with zerocopy + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=1, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt new file mode 100644 index 000000000000..df91675d2991 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// batch zerocopy test: +// +// send multiple packets, then read one range of all notifications. + +--send_omit_free // do not reuse send buffers with zerocopy + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_MARK, [666], 4) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt new file mode 100644 index 000000000000..2963cfcb14df --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// Minimal client-side zerocopy test + +--send_omit_free // do not reuse send buffers with zerocopy + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0...0 connect(4, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8> + +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > . 1:1(0) ack 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt new file mode 100644 index 000000000000..ea0c2fa73c2d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +// send with MSG_ZEROCOPY on a non-established socket +// +// verify that a send in state TCP_CLOSE correctly aborts the zerocopy +// operation, specifically it does not increment the zerocopy counter. +// +// First send on a closed socket and wait for (absent) notification. +// Then connect and send and verify that notification nr. is zero. + +--send_omit_free // do not reuse send buffers with zerocopy + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = -1 EPIPE (Broken pipe) + + +0.1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable) + + +0...0 connect(4, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8> + +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > . 1:1(0) ack 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt new file mode 100644 index 000000000000..4df978a9b82e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +// epoll zerocopy test: +// +// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but +// it is not level-triggered either. +// +// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR +// is correctly fired only once, when EPOLLET is set. send another packet with +// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once. + +--send_omit_free // do not reuse send buffers with zerocopy + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 epoll_create(1) = 5 + +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, {events=EPOLLOUT|EPOLLET, fd=4}) = 0 + +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + +// receive only one EPOLLERR for the two sends above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 8001:12001(4000) ack 1 + +0 < . 1:1(0) ack 12001 win 257 + +// receive only one EPOLLERR for the third send above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=2}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt new file mode 100644 index 000000000000..36b6edc4858c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// epoll zerocopy test: +// +// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but +// it is not level-triggered either. this tests verify that the same behavior is +// maintained when we have EPOLLEXCLUSIVE. +// +// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR +// is correctly fired only once, when EPOLLET is set. send another packet with +// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once. + +--send_omit_free // do not reuse send buffers with zerocopy + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 epoll_create(1) = 5 + +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, + {events=EPOLLOUT|EPOLLET|EPOLLEXCLUSIVE, fd=4}) = 0 + +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + +// receive only one EPOLLERR for the two sends above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 8001:12001(4000) ack 1 + +0 < . 1:1(0) ack 12001 win 257 + +// receive only one EPOLLERR for the third send above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=2}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt new file mode 100644 index 000000000000..1bea6f3b4558 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0 +// epoll zerocopy test: +// +// This is a test to confirm that EPOLLERR is only fired once for an FD when +// EPOLLONESHOT is set. +// +// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR +// is correctly fired only once, when EPOLLONESHOT is set. send another packet +// with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and +// confirm that EPOLLERR is correctly set. + +--send_omit_free // do not reuse send buffers with zerocopy + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 epoll_create(1) = 5 + +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, + {events=EPOLLOUT|EPOLLET|EPOLLONESHOT, fd=4}) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + +// receive only one EPOLLERR for the two sends above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 8001:12001(4000) ack 1 + +0 < . 1:1(0) ack 12001 win 257 + +// receive no EPOLLERR for the third send above. + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + +// rearm the FD and verify the EPOLLERR is fired again. + +0 epoll_ctl(5, EPOLL_CTL_MOD, 4, {events=EPOLLOUT|EPOLLONESHOT, fd=4}) = 0 + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=2}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt new file mode 100644 index 000000000000..e27c21ff5d18 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0 +// Fastopen client zerocopy test: +// +// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the +// kernel returns the notification ID. +// +// Fastopen requires a stored cookie. Create two sockets. The first +// one will have no data in the initial send. On return 0 the +// zerocopy notification counter is not incremented. Verify this too. + +--send_omit_free // do not reuse send buffers with zerocopy + +`./defaults.sh` + +// Send a FastOpen request, no cookie yet so no data in SYN + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 sendto(3, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 1000 ecr 0,nop,wscale 8,FO,nop,nop> + +.01 < S. 123:123(0) ack 1 win 14600 <mss 940,TS val 2000 ecr 1000,sackOK,nop,wscale 6, FO abcd1234,nop,nop> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 1001 ecr 2000> + +// Read from error queue: no zerocopy notification + +1 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable) + + +.01 close(3) = 0 + +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1002 ecr 2000> + +.01 < F. 1:1(0) ack 2 win 92 <nop,nop,TS val 2001 ecr 1002> + +0 > . 2:2(0) ack 2 <nop,nop,TS val 1003 ecr 2001> + +// Send another Fastopen request, now SYN will have data + +.07 `sysctl -q net.ipv4.tcp_timestamps=0` + +.1 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5 + +0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(5, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 sendto(5, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = 500 + +0 > S 0:500(500) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO abcd1234,nop,nop> + +.05 < S. 5678:5678(0) ack 501 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6> + +0 > . 501:501(0) ack 1 + +// Read from error queue: now has first zerocopy notification + +0.5 recvmsg(5, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt new file mode 100644 index 000000000000..b1fa77c77dfa --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +// Fastopen server zerocopy test: +// +// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the +// kernel returns the notification ID. + +--send_omit_free // do not reuse send buffers with zerocopy + +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207` + +// Set up a TFO server listening socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +.1 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [2], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +// Client sends a SYN with data. + +.1 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK> + +// Server accepts and replies with data. ++.005 accept(3, ..., ...) = 4 + +0 read(4, ..., 1024) = 1000 + +0 sendto(4, ..., 1000, MSG_ZEROCOPY, ..., ...) = 1000 + +0 > P. 1:1001(1000) ack 1001 + +.05 < . 1001:1001(0) ack 1001 win 32792 + +// Read from error queue: now has first zerocopy notification + +0.1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt new file mode 100644 index 000000000000..2f5317d0a9fa --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0 +// tcp_MAX_SKB_FRAGS test +// +// Verify that sending an iovec of tcp_MAX_SKB_FRAGS + 1 elements will +// 1) fit in a single packet without zerocopy +// 2) spill over into a second packet with zerocopy, +// because each iovec element becomes a frag +// 3) the PSH bit is set on an skb when it runs out of fragments + +--send_omit_free // do not reuse send buffers with zerocopy + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + + // Each pinned zerocopy page is fully accounted to skb->truesize. + // This test generates a worst case packet with each frag storing + // one byte, but increasing truesize with a page (64KB on PPC). + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [2000000], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + // send an iov of 18 elements: just becomes a linear skb + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}], + msg_flags=0}, 0) = 18 + + +0 > P. 1:19(18) ack 1 + +0 < . 1:1(0) ack 19 win 257 + + // send a zerocopy iov of 18 elements: + +1 sendmsg(4, {msg_name(...)=..., + msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}], + msg_flags=0}, MSG_ZEROCOPY) = 18 + + // verify that it is split in one skb of 17 frags + 1 of 1 frag + // verify that both have the PSH bit set + +0 > P. 19:36(17) ack 1 + +0 < . 1:1(0) ack 36 win 257 + + +0 > P. 36:37(1) ack 1 + +0 < . 1:1(0) ack 37 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + + // send a zerocopy iov of 64 elements: + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(64)=[{..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}], + msg_flags=0}, MSG_ZEROCOPY) = 64 + + // verify that it is split in skbs with 17 frags + +0 > P. 37:54(17) ack 1 + +0 < . 1:1(0) ack 54 win 257 + + +0 > P. 54:71(17) ack 1 + +0 < . 1:1(0) ack 71 win 257 + + +0 > P. 71:88(17) ack 1 + +0 < . 1:1(0) ack 88 win 257 + + +0 > P. 88:101(13) ack 1 + +0 < . 1:1(0) ack 101 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=1, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt new file mode 100644 index 000000000000..9d5272c6b207 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +// small packet zerocopy test: +// +// verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy +// packets of all sizes, including the smallest payload, 1B. + +--send_omit_free // do not reuse send buffers with zerocopy + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + // send 1B + +0 send(4, ..., 1, MSG_ZEROCOPY) = 1 + +0 > P. 1:2(1) ack 1 + +0 < . 1:1(0) ack 2 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + + // send 1B again + +0 send(4, ..., 1, MSG_ZEROCOPY) = 1 + +0 > P. 2:3(1) ack 1 + +0 < . 1:1(0) ack 3 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=1, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index 5175c0c83a23..a3323c21f001 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -197,9 +197,14 @@ # # - pmtu_ipv6_route_change # Same as above but with IPv6 +# +# - pmtu_ipv4_mp_exceptions +# Use the same topology as in pmtu_ipv4, but add routeable addresses +# on host A and B on lo reachable via both routers. Host A and B +# addresses have multipath routes to each other, b_r1 mtu = 1500. +# Check that PMTU exceptions are created for both paths. source lib.sh -source net_helper.sh PAUSE_ON_FAIL=no VERBOSE=0 @@ -266,7 +271,8 @@ tests=" list_flush_ipv4_exception ipv4: list and flush cached exceptions 1 list_flush_ipv6_exception ipv6: list and flush cached exceptions 1 pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1 - pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1" + pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1 + pmtu_ipv4_mp_exceptions ipv4: PMTU multipath nh exceptions 1" # Addressing and routing for tests with routers: four network segments, with # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an @@ -343,6 +349,9 @@ tunnel6_a_addr="fd00:2::a" tunnel6_b_addr="fd00:2::b" tunnel6_mask="64" +host4_a_addr="192.168.99.99" +host4_b_addr="192.168.88.88" + dummy6_0_prefix="fc00:1000::" dummy6_1_prefix="fc00:1001::" dummy6_mask="64" @@ -681,13 +690,7 @@ setup_xfrm() { } setup_nettest_xfrm() { - if ! which nettest >/dev/null; then - PATH=$PWD:$PATH - if ! which nettest >/dev/null; then - echo "'nettest' command not found; skipping tests" - return 1 - fi - fi + check_gen_prog "nettest" [ ${1} -eq 6 ] && proto="-6" || proto="" port=${2} @@ -990,6 +993,52 @@ setup_ovs_bridge() { run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2 } +setup_multipath_new() { + # Set up host A with multipath routes to host B host4_b_addr + run_cmd ${ns_a} ip addr add ${host4_a_addr} dev lo + run_cmd ${ns_a} ip nexthop add id 401 via ${prefix4}.${a_r1}.2 dev veth_A-R1 + run_cmd ${ns_a} ip nexthop add id 402 via ${prefix4}.${a_r2}.2 dev veth_A-R2 + run_cmd ${ns_a} ip nexthop add id 403 group 401/402 + run_cmd ${ns_a} ip route add ${host4_b_addr} src ${host4_a_addr} nhid 403 + + # Set up host B with multipath routes to host A host4_a_addr + run_cmd ${ns_b} ip addr add ${host4_b_addr} dev lo + run_cmd ${ns_b} ip nexthop add id 401 via ${prefix4}.${b_r1}.2 dev veth_B-R1 + run_cmd ${ns_b} ip nexthop add id 402 via ${prefix4}.${b_r2}.2 dev veth_B-R2 + run_cmd ${ns_b} ip nexthop add id 403 group 401/402 + run_cmd ${ns_b} ip route add ${host4_a_addr} src ${host4_b_addr} nhid 403 +} + +setup_multipath_old() { + # Set up host A with multipath routes to host B host4_b_addr + run_cmd ${ns_a} ip addr add ${host4_a_addr} dev lo + run_cmd ${ns_a} ip route add ${host4_b_addr} \ + src ${host4_a_addr} \ + nexthop via ${prefix4}.${a_r1}.2 weight 1 \ + nexthop via ${prefix4}.${a_r2}.2 weight 1 + + # Set up host B with multipath routes to host A host4_a_addr + run_cmd ${ns_b} ip addr add ${host4_b_addr} dev lo + run_cmd ${ns_b} ip route add ${host4_a_addr} \ + src ${host4_b_addr} \ + nexthop via ${prefix4}.${b_r1}.2 weight 1 \ + nexthop via ${prefix4}.${b_r2}.2 weight 1 +} + +setup_multipath() { + if [ "$USE_NH" = "yes" ]; then + setup_multipath_new + else + setup_multipath_old + fi + + # Set up routers with routes to dummies + run_cmd ${ns_r1} ip route add ${host4_a_addr} via ${prefix4}.${a_r1}.1 + run_cmd ${ns_r2} ip route add ${host4_a_addr} via ${prefix4}.${a_r2}.1 + run_cmd ${ns_r1} ip route add ${host4_b_addr} via ${prefix4}.${b_r1}.1 + run_cmd ${ns_r2} ip route add ${host4_b_addr} via ${prefix4}.${b_r2}.1 +} + setup() { [ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip @@ -1040,10 +1089,11 @@ cleanup() { cleanup_all_ns - ip link del veth_A-C 2>/dev/null - ip link del veth_A-R1 2>/dev/null - cleanup_del_ovs_internal - cleanup_del_ovs_vswitchd + [ -e "/sys/class/net/veth_A-C" ] && ip link del veth_A-C + [ -e "/sys/class/net/veth_A-R1" ] && ip link del veth_A-R1 + [ -e "/sys/class/net/ovs_br0" ] && cleanup_del_ovs_internal + [ -e "/sys/class/net/ovs_br0" ] && cleanup_del_ovs_vswitchd + rm -f "$tmpoutfile" } @@ -1082,23 +1132,15 @@ link_get_mtu() { } route_get_dst_exception() { - ns_cmd="${1}" - dst="${2}" - dsfield="${3}" - - if [ -z "${dsfield}" ]; then - dsfield=0 - fi + ns_cmd="${1}"; shift - ${ns_cmd} ip route get "${dst}" dsfield "${dsfield}" + ${ns_cmd} ip route get "$@" } route_get_dst_pmtu_from_exception() { - ns_cmd="${1}" - dst="${2}" - dsfield="${3}" + ns_cmd="${1}"; shift - mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")" + mtu_parse "$(route_get_dst_exception "${ns_cmd}" "$@")" } check_pmtu_value() { @@ -1241,10 +1283,10 @@ test_pmtu_ipv4_dscp_icmp_exception() { run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}" # Check that exceptions have been created with the correct PMTU - pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" dsfield "${policy_mark}")" check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 - pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" dsfield "${policy_mark}")" check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 } @@ -1291,9 +1333,9 @@ test_pmtu_ipv4_dscp_udp_exception() { UDP:"${dst2}":50000,tos="${dsfield}" # Check that exceptions have been created with the correct PMTU - pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")" + pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" dsfield "${policy_mark}")" check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1 - pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")" + pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" dsfield "${policy_mark}")" check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1 } @@ -1447,7 +1489,7 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { size=$(du -sb $tmpoutfile) size=${size%%/tmp/*} - [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1 + [ $size -ne 1048576 ] && err "File size $size mismatches expected value in locally bridged vxlan test" && return 1 done rm -f "$tmpoutfile" @@ -2062,7 +2104,7 @@ check_running() { pid=${1} cmd=${2} - [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ] + [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "${cmd}" ] } test_cleanup_vxlanX_exception() { @@ -2335,6 +2377,36 @@ test_pmtu_ipv6_route_change() { test_pmtu_ipvX_route_change 6 } +test_pmtu_ipv4_mp_exceptions() { + setup namespaces routing multipath || return $ksft_skip + + trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \ + "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \ + "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \ + "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2 + + # Set up initial MTU values + mtu "${ns_a}" veth_A-R1 2000 + mtu "${ns_r1}" veth_R1-A 2000 + mtu "${ns_r1}" veth_R1-B 1500 + mtu "${ns_b}" veth_B-R1 1500 + + mtu "${ns_a}" veth_A-R2 2000 + mtu "${ns_r2}" veth_R2-A 2000 + mtu "${ns_r2}" veth_R2-B 1500 + mtu "${ns_b}" veth_B-R2 1500 + + # Ping and expect two nexthop exceptions for two routes + run_cmd ${ns_a} ping -q -M want -i 0.1 -c 1 -s 1800 "${host4_b_addr}" + + # Check that exceptions have been created with the correct PMTU + pmtu_a_R1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${host4_b_addr}" oif veth_A-R1)" + pmtu_a_R2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${host4_b_addr}" oif veth_A-R2)" + + check_pmtu_value "1500" "${pmtu_a_R1}" "exceeding MTU (veth_A-R1)" || return 1 + check_pmtu_value "1500" "${pmtu_a_R2}" "exceeding MTU (veth_A-R2)" || return 1 +} + usage() { echo echo "$0 [OPTIONS] [TEST]..." diff --git a/tools/testing/selftests/net/proc_net_pktgen.c b/tools/testing/selftests/net/proc_net_pktgen.c new file mode 100644 index 000000000000..fab3b5c2e25d --- /dev/null +++ b/tools/testing/selftests/net/proc_net_pktgen.c @@ -0,0 +1,690 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * proc_net_pktgen: kselftest for /proc/net/pktgen interface + * + * Copyright (c) 2025 Peter Seiderer <ps.report@gmx.net> + * + */ +#include <errno.h> +#include <fcntl.h> +#include <stdlib.h> +#include <unistd.h> + +#include "kselftest_harness.h" + +static const char ctrl_cmd_stop[] = "stop"; +static const char ctrl_cmd_start[] = "start"; +static const char ctrl_cmd_reset[] = "reset"; + +static const char wrong_ctrl_cmd[] = "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"; + +static const char thr_cmd_add_loopback_0[] = "add_device lo@0"; +static const char thr_cmd_rm_loopback_0[] = "rem_device_all"; + +static const char wrong_thr_cmd[] = "forsureawrongcommand"; +static const char legacy_thr_cmd[] = "max_before_softirq"; + +static const char wrong_dev_cmd[] = "forsurewrongcommand"; +static const char dev_cmd_min_pkt_size_0[] = "min_pkt_size"; +static const char dev_cmd_min_pkt_size_1[] = "min_pkt_size "; +static const char dev_cmd_min_pkt_size_2[] = "min_pkt_size 0"; +static const char dev_cmd_min_pkt_size_3[] = "min_pkt_size 1"; +static const char dev_cmd_min_pkt_size_4[] = "min_pkt_size 100"; +static const char dev_cmd_min_pkt_size_5[] = "min_pkt_size=1001"; +static const char dev_cmd_min_pkt_size_6[] = "min_pkt_size =2002"; +static const char dev_cmd_min_pkt_size_7[] = "min_pkt_size= 3003"; +static const char dev_cmd_min_pkt_size_8[] = "min_pkt_size = 4004"; +static const char dev_cmd_max_pkt_size_0[] = "max_pkt_size 200"; +static const char dev_cmd_pkt_size_0[] = "pkt_size 300"; +static const char dev_cmd_imix_weights_0[] = "imix_weights 0,7 576,4 1500,1"; +static const char dev_cmd_imix_weights_1[] = "imix_weights 101,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20"; +static const char dev_cmd_imix_weights_2[] = "imix_weights 100,1 102,2 103,3 104,4 105,5 106,6 107,7 108,8 109,9 110,10 111,11 112,12 113,13 114,14 115,15 116,16 117,17 118,18 119,19 120,20 121,21"; +static const char dev_cmd_imix_weights_3[] = "imix_weights"; +static const char dev_cmd_imix_weights_4[] = "imix_weights "; +static const char dev_cmd_imix_weights_5[] = "imix_weights 0"; +static const char dev_cmd_imix_weights_6[] = "imix_weights 0,"; +static const char dev_cmd_debug_0[] = "debug 1"; +static const char dev_cmd_debug_1[] = "debug 0"; +static const char dev_cmd_frags_0[] = "frags 100"; +static const char dev_cmd_delay_0[] = "delay 100"; +static const char dev_cmd_delay_1[] = "delay 2147483647"; +static const char dev_cmd_rate_0[] = "rate 0"; +static const char dev_cmd_rate_1[] = "rate 100"; +static const char dev_cmd_ratep_0[] = "ratep 0"; +static const char dev_cmd_ratep_1[] = "ratep 200"; +static const char dev_cmd_udp_src_min_0[] = "udp_src_min 1"; +static const char dev_cmd_udp_dst_min_0[] = "udp_dst_min 2"; +static const char dev_cmd_udp_src_max_0[] = "udp_src_max 3"; +static const char dev_cmd_udp_dst_max_0[] = "udp_dst_max 4"; +static const char dev_cmd_clone_skb_0[] = "clone_skb 1"; +static const char dev_cmd_clone_skb_1[] = "clone_skb 0"; +static const char dev_cmd_count_0[] = "count 100"; +static const char dev_cmd_src_mac_count_0[] = "src_mac_count 100"; +static const char dev_cmd_dst_mac_count_0[] = "dst_mac_count 100"; +static const char dev_cmd_burst_0[] = "burst 0"; +static const char dev_cmd_node_0[] = "node 100"; +static const char dev_cmd_xmit_mode_0[] = "xmit_mode start_xmit"; +static const char dev_cmd_xmit_mode_1[] = "xmit_mode netif_receive"; +static const char dev_cmd_xmit_mode_2[] = "xmit_mode queue_xmit"; +static const char dev_cmd_xmit_mode_3[] = "xmit_mode nonsense"; +static const char dev_cmd_flag_0[] = "flag UDPCSUM"; +static const char dev_cmd_flag_1[] = "flag !UDPCSUM"; +static const char dev_cmd_flag_2[] = "flag nonsense"; +static const char dev_cmd_dst_min_0[] = "dst_min 101.102.103.104"; +static const char dev_cmd_dst_0[] = "dst 101.102.103.104"; +static const char dev_cmd_dst_max_0[] = "dst_max 201.202.203.204"; +static const char dev_cmd_dst6_0[] = "dst6 2001:db38:1234:0000:0000:0000:0000:0000"; +static const char dev_cmd_dst6_min_0[] = "dst6_min 2001:db8:1234:0000:0000:0000:0000:0000"; +static const char dev_cmd_dst6_max_0[] = "dst6_max 2001:db8:1234:0000:0000:0000:0000:0000"; +static const char dev_cmd_src6_0[] = "src6 2001:db38:1234:0000:0000:0000:0000:0000"; +static const char dev_cmd_src_min_0[] = "src_min 101.102.103.104"; +static const char dev_cmd_src_max_0[] = "src_max 201.202.203.204"; +static const char dev_cmd_dst_mac_0[] = "dst_mac 01:02:03:04:05:06"; +static const char dev_cmd_src_mac_0[] = "src_mac 11:12:13:14:15:16"; +static const char dev_cmd_clear_counters_0[] = "clear_counters"; +static const char dev_cmd_flows_0[] = "flows 100"; +static const char dev_cmd_spi_0[] = "spi 100"; +static const char dev_cmd_flowlen_0[] = "flowlen 100"; +static const char dev_cmd_queue_map_min_0[] = "queue_map_min 1"; +static const char dev_cmd_queue_map_max_0[] = "queue_map_max 2"; +static const char dev_cmd_mpls_0[] = "mpls 00000001"; +static const char dev_cmd_mpls_1[] = "mpls 00000001,000000f2"; +static const char dev_cmd_mpls_2[] = "mpls 00000f00,00000f01,00000f02,00000f03,00000f04,00000f05,00000f06,00000f07,00000f08,00000f09,00000f0a,00000f0b,00000f0c,00000f0d,00000f0e,00000f0f"; +static const char dev_cmd_mpls_3[] = "mpls 00000f00,00000f01,00000f02,00000f03,00000f04,00000f05,00000f06,00000f07,00000f08,00000f09,00000f0a,00000f0b,00000f0c,00000f0d,00000f0e,00000f0f,00000f10"; +static const char dev_cmd_vlan_id_0[] = "vlan_id 1"; +static const char dev_cmd_vlan_p_0[] = "vlan_p 1"; +static const char dev_cmd_vlan_cfi_0[] = "vlan_cfi 1"; +static const char dev_cmd_vlan_id_1[] = "vlan_id 4096"; +static const char dev_cmd_svlan_id_0[] = "svlan_id 1"; +static const char dev_cmd_svlan_p_0[] = "svlan_p 1"; +static const char dev_cmd_svlan_cfi_0[] = "svlan_cfi 1"; +static const char dev_cmd_svlan_id_1[] = "svlan_id 4096"; +static const char dev_cmd_tos_0[] = "tos 0"; +static const char dev_cmd_tos_1[] = "tos 0f"; +static const char dev_cmd_tos_2[] = "tos 0ff"; +static const char dev_cmd_traffic_class_0[] = "traffic_class f0"; +static const char dev_cmd_skb_priority_0[] = "skb_priority 999"; + +FIXTURE(proc_net_pktgen) { + int ctrl_fd; + int thr_fd; + int dev_fd; +}; + +FIXTURE_SETUP(proc_net_pktgen) { + int r; + ssize_t len; + + r = system("modprobe pktgen"); + ASSERT_EQ(r, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?"); + + self->ctrl_fd = open("/proc/net/pktgen/pgctrl", O_RDWR); + ASSERT_GE(self->ctrl_fd, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?"); + + self->thr_fd = open("/proc/net/pktgen/kpktgend_0", O_RDWR); + ASSERT_GE(self->thr_fd, 0) TH_LOG("CONFIG_NET_PKTGEN not enabled, module pktgen not loaded?"); + + len = write(self->thr_fd, thr_cmd_add_loopback_0, sizeof(thr_cmd_add_loopback_0)); + ASSERT_EQ(len, sizeof(thr_cmd_add_loopback_0)) TH_LOG("device lo@0 already registered?"); + + self->dev_fd = open("/proc/net/pktgen/lo@0", O_RDWR); + ASSERT_GE(self->dev_fd, 0) TH_LOG("device entry for lo@0 missing?"); +} + +FIXTURE_TEARDOWN(proc_net_pktgen) { + int ret; + ssize_t len; + + ret = close(self->dev_fd); + EXPECT_EQ(ret, 0); + + len = write(self->thr_fd, thr_cmd_rm_loopback_0, sizeof(thr_cmd_rm_loopback_0)); + EXPECT_EQ(len, sizeof(thr_cmd_rm_loopback_0)); + + ret = close(self->thr_fd); + EXPECT_EQ(ret, 0); + + ret = close(self->ctrl_fd); + EXPECT_EQ(ret, 0); +} + +TEST_F(proc_net_pktgen, wrong_ctrl_cmd) { + for (int i = 0; i <= sizeof(wrong_ctrl_cmd); i++) { + ssize_t len; + + len = write(self->ctrl_fd, wrong_ctrl_cmd, i); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + } +} + +TEST_F(proc_net_pktgen, ctrl_cmd) { + ssize_t len; + + len = write(self->ctrl_fd, ctrl_cmd_stop, sizeof(ctrl_cmd_stop)); + EXPECT_EQ(len, sizeof(ctrl_cmd_stop)); + + len = write(self->ctrl_fd, ctrl_cmd_stop, sizeof(ctrl_cmd_stop) - 1); + EXPECT_EQ(len, sizeof(ctrl_cmd_stop) - 1); + + len = write(self->ctrl_fd, ctrl_cmd_start, sizeof(ctrl_cmd_start)); + EXPECT_EQ(len, sizeof(ctrl_cmd_start)); + + len = write(self->ctrl_fd, ctrl_cmd_start, sizeof(ctrl_cmd_start) - 1); + EXPECT_EQ(len, sizeof(ctrl_cmd_start) - 1); + + len = write(self->ctrl_fd, ctrl_cmd_reset, sizeof(ctrl_cmd_reset)); + EXPECT_EQ(len, sizeof(ctrl_cmd_reset)); + + len = write(self->ctrl_fd, ctrl_cmd_reset, sizeof(ctrl_cmd_reset) - 1); + EXPECT_EQ(len, sizeof(ctrl_cmd_reset) - 1); +} + +TEST_F(proc_net_pktgen, wrong_thr_cmd) { + for (int i = 0; i <= sizeof(wrong_thr_cmd); i++) { + ssize_t len; + + len = write(self->thr_fd, wrong_thr_cmd, i); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + } +} + +TEST_F(proc_net_pktgen, legacy_thr_cmd) { + for (int i = 0; i <= sizeof(legacy_thr_cmd); i++) { + ssize_t len; + + len = write(self->thr_fd, legacy_thr_cmd, i); + if (i < (sizeof(legacy_thr_cmd) - 1)) { + /* incomplete command string */ + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + } else { + /* complete command string without/with trailing '\0' */ + EXPECT_EQ(len, i); + } + } +} + +TEST_F(proc_net_pktgen, wrong_dev_cmd) { + for (int i = 0; i <= sizeof(wrong_dev_cmd); i++) { + ssize_t len; + + len = write(self->dev_fd, wrong_dev_cmd, i); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + } +} + +TEST_F(proc_net_pktgen, dev_cmd_min_pkt_size) { + ssize_t len; + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_0, sizeof(dev_cmd_min_pkt_size_0)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_0)); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_0, sizeof(dev_cmd_min_pkt_size_0) - 1); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_0) - 1); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_1, sizeof(dev_cmd_min_pkt_size_1)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_1)); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_1, sizeof(dev_cmd_min_pkt_size_1) - 1); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_1) - 1); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_2, sizeof(dev_cmd_min_pkt_size_2)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_2)); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_min_pkt_size_2, sizeof(dev_cmd_min_pkt_size_2) - 1); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_2) - 1); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_3, sizeof(dev_cmd_min_pkt_size_3)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_3)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_4, sizeof(dev_cmd_min_pkt_size_4)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_4)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_5, sizeof(dev_cmd_min_pkt_size_5)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_5)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_6, sizeof(dev_cmd_min_pkt_size_6)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_6)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_7, sizeof(dev_cmd_min_pkt_size_7)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_7)); + + len = write(self->dev_fd, dev_cmd_min_pkt_size_8, sizeof(dev_cmd_min_pkt_size_8)); + EXPECT_EQ(len, sizeof(dev_cmd_min_pkt_size_8)); +} + +TEST_F(proc_net_pktgen, dev_cmd_max_pkt_size) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_max_pkt_size_0, sizeof(dev_cmd_max_pkt_size_0)); + EXPECT_EQ(len, sizeof(dev_cmd_max_pkt_size_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_pkt_size) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_pkt_size_0, sizeof(dev_cmd_pkt_size_0)); + EXPECT_EQ(len, sizeof(dev_cmd_pkt_size_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_imix_weights) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_imix_weights_0, sizeof(dev_cmd_imix_weights_0)); + EXPECT_EQ(len, sizeof(dev_cmd_imix_weights_0)); + + len = write(self->dev_fd, dev_cmd_imix_weights_1, sizeof(dev_cmd_imix_weights_1)); + EXPECT_EQ(len, sizeof(dev_cmd_imix_weights_1)); + + len = write(self->dev_fd, dev_cmd_imix_weights_2, sizeof(dev_cmd_imix_weights_2)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, E2BIG); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_3, sizeof(dev_cmd_imix_weights_3) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_4, sizeof(dev_cmd_imix_weights_4) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_5, sizeof(dev_cmd_imix_weights_5) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* with trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + /* without trailing '\0' */ + len = write(self->dev_fd, dev_cmd_imix_weights_6, sizeof(dev_cmd_imix_weights_6) - 1); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); +} + +TEST_F(proc_net_pktgen, dev_cmd_debug) { + ssize_t len; + + /* debug on */ + len = write(self->dev_fd, dev_cmd_debug_0, sizeof(dev_cmd_debug_0)); + EXPECT_EQ(len, sizeof(dev_cmd_debug_0)); + + /* debug off */ + len = write(self->dev_fd, dev_cmd_debug_1, sizeof(dev_cmd_debug_1)); + EXPECT_EQ(len, sizeof(dev_cmd_debug_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_frags) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_frags_0, sizeof(dev_cmd_frags_0)); + EXPECT_EQ(len, sizeof(dev_cmd_frags_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_delay) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_delay_0, sizeof(dev_cmd_delay_0)); + EXPECT_EQ(len, sizeof(dev_cmd_delay_0)); + + len = write(self->dev_fd, dev_cmd_delay_1, sizeof(dev_cmd_delay_1)); + EXPECT_EQ(len, sizeof(dev_cmd_delay_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_rate) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_rate_0, sizeof(dev_cmd_rate_0)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + len = write(self->dev_fd, dev_cmd_rate_1, sizeof(dev_cmd_rate_1)); + EXPECT_EQ(len, sizeof(dev_cmd_rate_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_ratep) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_ratep_0, sizeof(dev_cmd_ratep_0)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EINVAL); + + len = write(self->dev_fd, dev_cmd_ratep_1, sizeof(dev_cmd_ratep_1)); + EXPECT_EQ(len, sizeof(dev_cmd_ratep_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_udp_src_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_udp_src_min_0, sizeof(dev_cmd_udp_src_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_udp_src_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_udp_dst_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_udp_dst_min_0, sizeof(dev_cmd_udp_dst_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_udp_dst_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_udp_src_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_udp_src_max_0, sizeof(dev_cmd_udp_src_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_udp_src_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_udp_dst_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_udp_dst_max_0, sizeof(dev_cmd_udp_dst_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_udp_dst_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_clone_skb) { + ssize_t len; + + /* clone_skb on (gives EOPNOTSUPP on lo device) */ + len = write(self->dev_fd, dev_cmd_clone_skb_0, sizeof(dev_cmd_clone_skb_0)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, EOPNOTSUPP); + + /* clone_skb off */ + len = write(self->dev_fd, dev_cmd_clone_skb_1, sizeof(dev_cmd_clone_skb_1)); + EXPECT_EQ(len, sizeof(dev_cmd_clone_skb_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_count) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_count_0, sizeof(dev_cmd_count_0)); + EXPECT_EQ(len, sizeof(dev_cmd_count_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src_mac_count) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src_mac_count_0, sizeof(dev_cmd_src_mac_count_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src_mac_count_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst_mac_count) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_mac_count_0, sizeof(dev_cmd_dst_mac_count_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_mac_count_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_burst) { + ssize_t len; + + /* burst off */ + len = write(self->dev_fd, dev_cmd_burst_0, sizeof(dev_cmd_burst_0)); + EXPECT_EQ(len, sizeof(dev_cmd_burst_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_node) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_node_0, sizeof(dev_cmd_node_0)); + EXPECT_EQ(len, sizeof(dev_cmd_node_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_xmit_mode) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_xmit_mode_0, sizeof(dev_cmd_xmit_mode_0)); + EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_0)); + + len = write(self->dev_fd, dev_cmd_xmit_mode_1, sizeof(dev_cmd_xmit_mode_1)); + EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_1)); + + len = write(self->dev_fd, dev_cmd_xmit_mode_2, sizeof(dev_cmd_xmit_mode_2)); + EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_2)); + + len = write(self->dev_fd, dev_cmd_xmit_mode_3, sizeof(dev_cmd_xmit_mode_3)); + EXPECT_EQ(len, sizeof(dev_cmd_xmit_mode_3)); +} + +TEST_F(proc_net_pktgen, dev_cmd_flag) { + ssize_t len; + + /* flag UDPCSUM on */ + len = write(self->dev_fd, dev_cmd_flag_0, sizeof(dev_cmd_flag_0)); + EXPECT_EQ(len, sizeof(dev_cmd_flag_0)); + + /* flag UDPCSUM off */ + len = write(self->dev_fd, dev_cmd_flag_1, sizeof(dev_cmd_flag_1)); + EXPECT_EQ(len, sizeof(dev_cmd_flag_1)); + + /* flag invalid */ + len = write(self->dev_fd, dev_cmd_flag_2, sizeof(dev_cmd_flag_2)); + EXPECT_EQ(len, sizeof(dev_cmd_flag_2)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_min_0, sizeof(dev_cmd_dst_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_0, sizeof(dev_cmd_dst_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_max_0, sizeof(dev_cmd_dst_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst6) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst6_0, sizeof(dev_cmd_dst6_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst6_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst6_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst6_min_0, sizeof(dev_cmd_dst6_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst6_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst6_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst6_max_0, sizeof(dev_cmd_dst6_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst6_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src6) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src6_0, sizeof(dev_cmd_src6_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src6_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src_min_0, sizeof(dev_cmd_src_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src_max_0, sizeof(dev_cmd_src_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_dst_mac) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_dst_mac_0, sizeof(dev_cmd_dst_mac_0)); + EXPECT_EQ(len, sizeof(dev_cmd_dst_mac_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_src_mac) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_src_mac_0, sizeof(dev_cmd_src_mac_0)); + EXPECT_EQ(len, sizeof(dev_cmd_src_mac_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_clear_counters) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_clear_counters_0, sizeof(dev_cmd_clear_counters_0)); + EXPECT_EQ(len, sizeof(dev_cmd_clear_counters_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_flows) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_flows_0, sizeof(dev_cmd_flows_0)); + EXPECT_EQ(len, sizeof(dev_cmd_flows_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_spi) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_spi_0, sizeof(dev_cmd_spi_0)); + EXPECT_EQ(len, sizeof(dev_cmd_spi_0)) TH_LOG("CONFIG_XFRM not enabled?"); +} + +TEST_F(proc_net_pktgen, dev_cmd_flowlen) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_flowlen_0, sizeof(dev_cmd_flowlen_0)); + EXPECT_EQ(len, sizeof(dev_cmd_flowlen_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_queue_map_min) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_queue_map_min_0, sizeof(dev_cmd_queue_map_min_0)); + EXPECT_EQ(len, sizeof(dev_cmd_queue_map_min_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_queue_map_max) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_queue_map_max_0, sizeof(dev_cmd_queue_map_max_0)); + EXPECT_EQ(len, sizeof(dev_cmd_queue_map_max_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_mpls) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_mpls_0, sizeof(dev_cmd_mpls_0)); + EXPECT_EQ(len, sizeof(dev_cmd_mpls_0)); + + len = write(self->dev_fd, dev_cmd_mpls_1, sizeof(dev_cmd_mpls_1)); + EXPECT_EQ(len, sizeof(dev_cmd_mpls_1)); + + len = write(self->dev_fd, dev_cmd_mpls_2, sizeof(dev_cmd_mpls_2)); + EXPECT_EQ(len, sizeof(dev_cmd_mpls_2)); + + len = write(self->dev_fd, dev_cmd_mpls_3, sizeof(dev_cmd_mpls_3)); + EXPECT_EQ(len, -1); + EXPECT_EQ(errno, E2BIG); +} + +TEST_F(proc_net_pktgen, dev_cmd_vlan_id) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_vlan_id_0, sizeof(dev_cmd_vlan_id_0)); + EXPECT_EQ(len, sizeof(dev_cmd_vlan_id_0)); + + len = write(self->dev_fd, dev_cmd_vlan_p_0, sizeof(dev_cmd_vlan_p_0)); + EXPECT_EQ(len, sizeof(dev_cmd_vlan_p_0)); + + len = write(self->dev_fd, dev_cmd_vlan_cfi_0, sizeof(dev_cmd_vlan_cfi_0)); + EXPECT_EQ(len, sizeof(dev_cmd_vlan_cfi_0)); + + len = write(self->dev_fd, dev_cmd_vlan_id_1, sizeof(dev_cmd_vlan_id_1)); + EXPECT_EQ(len, sizeof(dev_cmd_vlan_id_1)); +} + +TEST_F(proc_net_pktgen, dev_cmd_svlan_id) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_svlan_id_0, sizeof(dev_cmd_svlan_id_0)); + EXPECT_EQ(len, sizeof(dev_cmd_svlan_id_0)); + + len = write(self->dev_fd, dev_cmd_svlan_p_0, sizeof(dev_cmd_svlan_p_0)); + EXPECT_EQ(len, sizeof(dev_cmd_svlan_p_0)); + + len = write(self->dev_fd, dev_cmd_svlan_cfi_0, sizeof(dev_cmd_svlan_cfi_0)); + EXPECT_EQ(len, sizeof(dev_cmd_svlan_cfi_0)); + + len = write(self->dev_fd, dev_cmd_svlan_id_1, sizeof(dev_cmd_svlan_id_1)); + EXPECT_EQ(len, sizeof(dev_cmd_svlan_id_1)); +} + + +TEST_F(proc_net_pktgen, dev_cmd_tos) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_tos_0, sizeof(dev_cmd_tos_0)); + EXPECT_EQ(len, sizeof(dev_cmd_tos_0)); + + len = write(self->dev_fd, dev_cmd_tos_1, sizeof(dev_cmd_tos_1)); + EXPECT_EQ(len, sizeof(dev_cmd_tos_1)); + + len = write(self->dev_fd, dev_cmd_tos_2, sizeof(dev_cmd_tos_2)); + EXPECT_EQ(len, sizeof(dev_cmd_tos_2)); +} + + +TEST_F(proc_net_pktgen, dev_cmd_traffic_class) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_traffic_class_0, sizeof(dev_cmd_traffic_class_0)); + EXPECT_EQ(len, sizeof(dev_cmd_traffic_class_0)); +} + +TEST_F(proc_net_pktgen, dev_cmd_skb_priority) { + ssize_t len; + + len = write(self->dev_fd, dev_cmd_skb_priority_0, sizeof(dev_cmd_skb_priority_0)); + EXPECT_EQ(len, sizeof(dev_cmd_skb_priority_0)); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 1a736f700be4..ab8d8b7e6cb0 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -48,17 +48,45 @@ #include <string.h> #include <sys/mman.h> #include <sys/socket.h> +#include <sys/ioctl.h> #include <sys/stat.h> #include <sys/types.h> #include <unistd.h> #include "psock_lib.h" -#include "../kselftest.h" +#include "kselftest.h" #define RING_NUM_FRAMES 20 static uint32_t cfg_max_num_members; +static void loopback_set_up_down(int state_up) +{ + struct ifreq ifreq = {}; + int fd, err; + + fd = socket(AF_PACKET, SOCK_RAW, 0); + if (fd < 0) { + perror("socket loopback"); + exit(1); + } + strcpy(ifreq.ifr_name, "lo"); + err = ioctl(fd, SIOCGIFFLAGS, &ifreq); + if (err) { + perror("SIOCGIFFLAGS"); + exit(1); + } + if (state_up != !!(ifreq.ifr_flags & IFF_UP)) { + ifreq.ifr_flags ^= IFF_UP; + err = ioctl(fd, SIOCSIFFLAGS, &ifreq); + if (err) { + perror("SIOCSIFFLAGS"); + exit(1); + } + } + close(fd); +} + /* Open a socket in a given fanout mode. * @return -1 if mode is bad, a valid socket otherwise */ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id) @@ -165,9 +193,9 @@ static void sock_fanout_set_ebpf(int fd) attr.insns = (unsigned long) prog; attr.insn_cnt = ARRAY_SIZE(prog); attr.license = (unsigned long) "GPL"; - attr.log_buf = (unsigned long) log_buf, - attr.log_size = sizeof(log_buf), - attr.log_level = 1, + attr.log_buf = (unsigned long) log_buf; + attr.log_size = sizeof(log_buf); + attr.log_level = 1; pfd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); if (pfd < 0) { @@ -251,6 +279,41 @@ static int sock_fanout_read(int fds[], char *rings[], const int expect[]) return 0; } +/* Test that creating/joining a fanout group fails for unbound socket without + * a specified protocol + */ +static void test_unbound_fanout(void) +{ + int val, fd0, fd1, err; + + fprintf(stderr, "test: unbound fanout\n"); + fd0 = socket(PF_PACKET, SOCK_RAW, 0); + if (fd0 < 0) { + perror("socket packet"); + exit(1); + } + /* Try to create a new fanout group. Should fail. */ + val = (PACKET_FANOUT_HASH << 16) | 1; + err = setsockopt(fd0, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val)); + if (!err) { + fprintf(stderr, "ERROR: unbound socket fanout create\n"); + exit(1); + } + fd1 = sock_fanout_open(PACKET_FANOUT_HASH, 1); + if (fd1 == -1) { + fprintf(stderr, "ERROR: failed to open HASH socket\n"); + exit(1); + } + /* Try to join an existing fanout group. Should fail. */ + err = setsockopt(fd0, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val)); + if (!err) { + fprintf(stderr, "ERROR: unbound socket fanout join\n"); + exit(1); + } + close(fd0); + close(fd1); +} + /* Test illegal mode + flag combination */ static void test_control_single(void) { @@ -264,17 +327,22 @@ static void test_control_single(void) } /* Test illegal group with different modes or flags */ -static void test_control_group(void) +static void test_control_group(int toggle) { int fds[2]; - fprintf(stderr, "test: control multiple sockets\n"); + if (toggle) + fprintf(stderr, "test: control multiple sockets with link down toggle\n"); + else + fprintf(stderr, "test: control multiple sockets\n"); fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0); if (fds[0] == -1) { fprintf(stderr, "ERROR: failed to open HASH socket\n"); exit(1); } + if (toggle) + loopback_set_up_down(0); if (sock_fanout_open(PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG, 0) != -1) { fprintf(stderr, "ERROR: joined group with wrong flag defrag\n"); @@ -294,6 +362,8 @@ static void test_control_group(void) fprintf(stderr, "ERROR: failed to join group\n"); exit(1); } + if (toggle) + loopback_set_up_down(1); if (close(fds[1]) || close(fds[0])) { fprintf(stderr, "ERROR: closing sockets\n"); exit(1); @@ -488,8 +558,10 @@ int main(int argc, char **argv) const int expect_uniqueid[2][2] = { { 20, 20}, { 20, 20 } }; int port_off = 2, tries = 20, ret; + test_unbound_fanout(); test_control_single(); - test_control_group(); + test_control_group(0); + test_control_group(1); test_control_group_max_num_members(); test_unique_fanout_group_ids(); diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h index 6e4fef560873..067265b0a554 100644 --- a/tools/testing/selftests/net/psock_lib.h +++ b/tools/testing/selftests/net/psock_lib.h @@ -22,10 +22,6 @@ #define PORT_BASE 8000 -#ifndef __maybe_unused -# define __maybe_unused __attribute__ ((__unused__)) -#endif - static __maybe_unused void pair_udp_setfilter(int fd) { /* the filter below checks for all of the following conditions that diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c index 404a2ce759ab..7caf3135448d 100644 --- a/tools/testing/selftests/net/psock_tpacket.c +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -12,7 +12,7 @@ * * Datapath: * Open a pair of packet sockets and send resp. receive an a priori known - * packet pattern accross the sockets and check if it was received resp. + * packet pattern across the sockets and check if it was received resp. * sent correctly. Fanout in combination with RX_RING is currently not * tested here. * @@ -22,6 +22,7 @@ * - TPACKET_V3: RX_RING */ +#undef NDEBUG #include <stdio.h> #include <stdlib.h> #include <sys/types.h> @@ -33,7 +34,6 @@ #include <ctype.h> #include <fcntl.h> #include <unistd.h> -#include <bits/wordsize.h> #include <net/ethernet.h> #include <netinet/ip.h> #include <arpa/inet.h> @@ -46,7 +46,7 @@ #include "psock_lib.h" -#include "../kselftest.h" +#include "kselftest.h" #ifndef bug_on # define bug_on(cond) assert(!(cond)) @@ -785,7 +785,7 @@ static int test_kernel_bit_width(void) static int test_user_bit_width(void) { - return __WORDSIZE; + return sizeof(long) * 8; } static const char *tpacket_str[] = { diff --git a/tools/testing/selftests/net/rds/.gitignore b/tools/testing/selftests/net/rds/.gitignore new file mode 100644 index 000000000000..1c6f04e2aa11 --- /dev/null +++ b/tools/testing/selftests/net/rds/.gitignore @@ -0,0 +1 @@ +include.sh diff --git a/tools/testing/selftests/net/rds/Makefile b/tools/testing/selftests/net/rds/Makefile new file mode 100644 index 000000000000..762845cc973c --- /dev/null +++ b/tools/testing/selftests/net/rds/Makefile @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: GPL-2.0 + +all: + @echo mk_build_dir="$(shell pwd)" > include.sh + +TEST_PROGS := run.sh + +TEST_FILES := \ + include.sh \ + test.py \ +# end of TEST_FILES + +EXTRA_CLEAN := \ + include.sh \ + /tmp/rds_logs \ +# end of EXTRA_CLEAN + +include ../../lib.mk diff --git a/tools/testing/selftests/net/rds/README.txt b/tools/testing/selftests/net/rds/README.txt new file mode 100644 index 000000000000..cbde2951ab13 --- /dev/null +++ b/tools/testing/selftests/net/rds/README.txt @@ -0,0 +1,41 @@ +RDS self-tests +============== + +These scripts provide a coverage test for RDS-TCP by creating two +network namespaces and running rds packets between them. A loopback +network is provisioned with optional probability of packet loss or +corruption. A workload of 50000 hashes, each 64 characters in size, +are passed over an RDS socket on this test network. A passing test means +the RDS-TCP stack was able to recover properly. The provided config.sh +can be used to compile the kernel with the necessary gcov options. The +kernel may optionally be configured to omit the coverage report as well. + +USAGE: + run.sh [-d logdir] [-l packet_loss] [-c packet_corruption] + [-u packet_duplcate] + +OPTIONS: + -d Log directory. Defaults to tools/testing/selftests/net/rds/rds_logs + + -l Simulates a percentage of packet loss + + -c Simulates a percentage of packet corruption + + -u Simulates a percentage of packet duplication. + +EXAMPLE: + + # Create a suitable gcov enabled .config + tools/testing/selftests/net/rds/config.sh -g + + # Alternatly create a gcov disabled .config + tools/testing/selftests/net/rds/config.sh + + # build the kernel + vng --build --config tools/testing/selftests/net/config + + # launch the tests in a VM + vng -v --rwdir ./ --run . --user root --cpus 4 -- \ + "export PYTHONPATH=tools/testing/selftests/net/; tools/testing/selftests/net/rds/run.sh" + +An HTML coverage report will be output in tools/testing/selftests/net/rds/rds_logs/coverage/. diff --git a/tools/testing/selftests/net/rds/config.sh b/tools/testing/selftests/net/rds/config.sh new file mode 100755 index 000000000000..791c8dbe1095 --- /dev/null +++ b/tools/testing/selftests/net/rds/config.sh @@ -0,0 +1,53 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -u +set -x + +unset KBUILD_OUTPUT + +GENERATE_GCOV_REPORT=0 +while getopts "g" opt; do + case ${opt} in + g) + GENERATE_GCOV_REPORT=1 + ;; + :) + echo "USAGE: config.sh [-g]" + exit 1 + ;; + ?) + echo "Invalid option: -${OPTARG}." + exit 1 + ;; + esac +done + +CONF_FILE="tools/testing/selftests/net/config" + +# no modules +scripts/config --file "$CONF_FILE" --disable CONFIG_MODULES + +# enable RDS +scripts/config --file "$CONF_FILE" --enable CONFIG_RDS +scripts/config --file "$CONF_FILE" --enable CONFIG_RDS_TCP + +if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then + # instrument RDS and only RDS + scripts/config --file "$CONF_FILE" --enable CONFIG_GCOV_KERNEL + scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL + scripts/config --file "$CONF_FILE" --enable GCOV_PROFILE_RDS +else + scripts/config --file "$CONF_FILE" --disable CONFIG_GCOV_KERNEL + scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_ALL + scripts/config --file "$CONF_FILE" --disable GCOV_PROFILE_RDS +fi + +# need network namespaces to run tests with veth network interfaces +scripts/config --file "$CONF_FILE" --enable CONFIG_NET_NS +scripts/config --file "$CONF_FILE" --enable CONFIG_VETH + +# simulate packet loss +scripts/config --file "$CONF_FILE" --enable CONFIG_NET_SCH_NETEM + diff --git a/tools/testing/selftests/net/rds/run.sh b/tools/testing/selftests/net/rds/run.sh new file mode 100755 index 000000000000..8aee244f582a --- /dev/null +++ b/tools/testing/selftests/net/rds/run.sh @@ -0,0 +1,224 @@ +#! /bin/bash +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -u + +unset KBUILD_OUTPUT + +current_dir="$(realpath "$(dirname "$0")")" +build_dir="$current_dir" + +build_include="$current_dir/include.sh" +if test -f "$build_include"; then + # this include will define "$mk_build_dir" as the location the test was + # built. We will need this if the tests are installed in a location + # other than the kernel source + + source "$build_include" + build_dir="$mk_build_dir" +fi + +# This test requires kernel source and the *.gcda data therein +# Locate the top level of the kernel source, and the net/rds +# subfolder with the appropriate *.gcno object files +ksrc_dir="$(realpath "$build_dir"/../../../../../)" +kconfig="$ksrc_dir/.config" +obj_dir="$ksrc_dir/net/rds" + +GCOV_CMD=gcov + +#check to see if the host has the required packages to generate a gcov report +check_gcov_env() +{ + if ! which "$GCOV_CMD" > /dev/null 2>&1; then + echo "Warning: Could not find gcov. " + GENERATE_GCOV_REPORT=0 + return + fi + + # the gcov version must match the gcc version + GCC_VER=$(gcc -dumpfullversion) + GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| awk 'BEGIN {FS="-"}{print $1}') + if [ "$GCOV_VER" != "$GCC_VER" ]; then + #attempt to find a matching gcov version + GCOV_CMD=gcov-$(gcc -dumpversion) + + if ! which "$GCOV_CMD" > /dev/null 2>&1; then + echo "Warning: Could not find an appropriate gcov installation. \ + gcov version must match gcc version" + GENERATE_GCOV_REPORT=0 + return + fi + + #recheck version number of found gcov executable + GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| \ + awk 'BEGIN {FS="-"}{print $1}') + if [ "$GCOV_VER" != "$GCC_VER" ]; then + echo "Warning: Could not find an appropriate gcov installation. \ + gcov version must match gcc version" + GENERATE_GCOV_REPORT=0 + else + echo "Warning: Mismatched gcc and gcov detected. Using $GCOV_CMD" + fi + fi +} + +# Check to see if the kconfig has the required configs to generate a coverage report +check_gcov_conf() +{ + if ! grep -x "CONFIG_GCOV_PROFILE_RDS=y" "$kconfig" > /dev/null 2>&1; then + echo "INFO: CONFIG_GCOV_PROFILE_RDS should be enabled for coverage reports" + GENERATE_GCOV_REPORT=0 + fi + if ! grep -x "CONFIG_GCOV_KERNEL=y" "$kconfig" > /dev/null 2>&1; then + echo "INFO: CONFIG_GCOV_KERNEL should be enabled for coverage reports" + GENERATE_GCOV_REPORT=0 + fi + if grep -x "CONFIG_GCOV_PROFILE_ALL=y" "$kconfig" > /dev/null 2>&1; then + echo "INFO: CONFIG_GCOV_PROFILE_ALL should be disabled for coverage reports" + GENERATE_GCOV_REPORT=0 + fi + + if [ "$GENERATE_GCOV_REPORT" -eq 0 ]; then + echo "To enable gcov reports, please run "\ + "\"tools/testing/selftests/net/rds/config.sh -g\" and rebuild the kernel" + else + # if we have the required kernel configs, proceed to check the environment to + # ensure we have the required gcov packages + check_gcov_env + fi +} + +# Kselftest framework requirement - SKIP code is 4. +check_conf_enabled() { + if ! grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then + echo "selftests: [SKIP] This test requires $1 enabled" + echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel" + exit 4 + fi +} +check_conf_disabled() { + if grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then + echo "selftests: [SKIP] This test requires $1 disabled" + echo "Please run tools/testing/selftests/net/rds/config.sh and rebuild the kernel" + exit 4 + fi +} +check_conf() { + check_conf_enabled CONFIG_NET_SCH_NETEM + check_conf_enabled CONFIG_VETH + check_conf_enabled CONFIG_NET_NS + check_conf_enabled CONFIG_RDS_TCP + check_conf_enabled CONFIG_RDS + check_conf_disabled CONFIG_MODULES +} + +check_env() +{ + if ! test -d "$obj_dir"; then + echo "selftests: [SKIP] This test requires a kernel source tree" + exit 4 + fi + if ! test -e "$kconfig"; then + echo "selftests: [SKIP] This test requires a configured kernel source tree" + exit 4 + fi + if ! which strace > /dev/null 2>&1; then + echo "selftests: [SKIP] Could not run test without strace" + exit 4 + fi + if ! which tcpdump > /dev/null 2>&1; then + echo "selftests: [SKIP] Could not run test without tcpdump" + exit 4 + fi + + if ! which python3 > /dev/null 2>&1; then + echo "selftests: [SKIP] Could not run test without python3" + exit 4 + fi + + python_major=$(python3 -c "import sys; print(sys.version_info[0])") + python_minor=$(python3 -c "import sys; print(sys.version_info[1])") + if [[ python_major -lt 3 || ( python_major -eq 3 && python_minor -lt 9 ) ]] ; then + echo "selftests: [SKIP] Could not run test without at least python3.9" + python3 -V + exit 4 + fi +} + +LOG_DIR="$current_dir"/rds_logs +PLOSS=0 +PCORRUPT=0 +PDUP=0 +GENERATE_GCOV_REPORT=1 +while getopts "d:l:c:u:" opt; do + case ${opt} in + d) + LOG_DIR=${OPTARG} + ;; + l) + PLOSS=${OPTARG} + ;; + c) + PCORRUPT=${OPTARG} + ;; + u) + PDUP=${OPTARG} + ;; + :) + echo "USAGE: run.sh [-d logdir] [-l packet_loss] [-c packet_corruption]" \ + "[-u packet_duplcate] [-g]" + exit 1 + ;; + ?) + echo "Invalid option: -${OPTARG}." + exit 1 + ;; + esac +done + + +check_env +check_conf +check_gcov_conf + + +rm -fr "$LOG_DIR" +TRACE_FILE="${LOG_DIR}/rds-strace.txt" +COVR_DIR="${LOG_DIR}/coverage/" +mkdir -p "$LOG_DIR" +mkdir -p "$COVR_DIR" + +set +e +echo running RDS tests... +echo Traces will be logged to "$TRACE_FILE" +rm -f "$TRACE_FILE" +strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" --timeout 400 -d "$LOG_DIR" \ + -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP" + +test_rc=$? +dmesg > "${LOG_DIR}/dmesg.out" + +if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then + echo saving coverage data... + (set +x; cd /sys/kernel/debug/gcov; find ./* -name '*.gcda' | \ + while read -r f + do + cat < "/sys/kernel/debug/gcov/$f" > "/$f" + done) + + echo running gcovr... + gcovr -s --html-details --gcov-executable "$GCOV_CMD" --gcov-ignore-parse-errors \ + -o "${COVR_DIR}/gcovr" "${ksrc_dir}/net/rds/" +else + echo "Coverage report will be skipped" +fi + +if [ "$test_rc" -eq 0 ]; then + echo "PASS: Test completed successfully" +else + echo "FAIL: Test failed" +fi + +exit "$test_rc" diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py new file mode 100755 index 000000000000..4a7178d11193 --- /dev/null +++ b/tools/testing/selftests/net/rds/test.py @@ -0,0 +1,265 @@ +#! /usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import argparse +import ctypes +import errno +import hashlib +import os +import select +import signal +import socket +import subprocess +import sys +import atexit +from pwd import getpwuid +from os import stat + +# Allow utils module to be imported from different directory +this_dir = os.path.dirname(os.path.realpath(__file__)) +sys.path.append(os.path.join(this_dir, "../")) +from lib.py.utils import ip + +libc = ctypes.cdll.LoadLibrary('libc.so.6') +setns = libc.setns + +net0 = 'net0' +net1 = 'net1' + +veth0 = 'veth0' +veth1 = 'veth1' + +# Helper function for creating a socket inside a network namespace. +# We need this because otherwise RDS will detect that the two TCP +# sockets are on the same interface and use the loop transport instead +# of the TCP transport. +def netns_socket(netns, *args): + u0, u1 = socket.socketpair(socket.AF_UNIX, socket.SOCK_SEQPACKET) + + child = os.fork() + if child == 0: + # change network namespace + with open(f'/var/run/netns/{netns}') as f: + try: + ret = setns(f.fileno(), 0) + except IOError as e: + print(e.errno) + print(e) + + # create socket in target namespace + s = socket.socket(*args) + + # send resulting socket to parent + socket.send_fds(u0, [], [s.fileno()]) + + sys.exit(0) + + # receive socket from child + _, s, _, _ = socket.recv_fds(u1, 0, 1) + os.waitpid(child, 0) + u0.close() + u1.close() + return socket.fromfd(s[0], *args) + +def signal_handler(sig, frame): + print('Test timed out') + sys.exit(1) + +#Parse out command line arguments. We take an optional +# timeout parameter and an optional log output folder +parser = argparse.ArgumentParser(description="init script args", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument("-d", "--logdir", action="store", + help="directory to store logs", default="/tmp") +parser.add_argument('--timeout', help="timeout to terminate hung test", + type=int, default=0) +parser.add_argument('-l', '--loss', help="Simulate tcp packet loss", + type=int, default=0) +parser.add_argument('-c', '--corruption', help="Simulate tcp packet corruption", + type=int, default=0) +parser.add_argument('-u', '--duplicate', help="Simulate tcp packet duplication", + type=int, default=0) +args = parser.parse_args() +logdir=args.logdir +packet_loss=str(args.loss)+'%' +packet_corruption=str(args.corruption)+'%' +packet_duplicate=str(args.duplicate)+'%' + +ip(f"netns add {net0}") +ip(f"netns add {net1}") +ip(f"link add type veth") + +addrs = [ + # we technically don't need different port numbers, but this will + # help identify traffic in the network analyzer + ('10.0.0.1', 10000), + ('10.0.0.2', 20000), +] + +# move interfaces to separate namespaces so they can no longer be +# bound directly; this prevents rds from switching over from the tcp +# transport to the loop transport. +ip(f"link set {veth0} netns {net0} up") +ip(f"link set {veth1} netns {net1} up") + + + +# add addresses +ip(f"-n {net0} addr add {addrs[0][0]}/32 dev {veth0}") +ip(f"-n {net1} addr add {addrs[1][0]}/32 dev {veth1}") + +# add routes +ip(f"-n {net0} route add {addrs[1][0]}/32 dev {veth0}") +ip(f"-n {net1} route add {addrs[0][0]}/32 dev {veth1}") + +# sanity check that our two interfaces/addresses are correctly set up +# and communicating by doing a single ping +ip(f"netns exec {net0} ping -c 1 {addrs[1][0]}") + +# Start a packet capture on each network +for net in [net0, net1]: + tcpdump_pid = os.fork() + if tcpdump_pid == 0: + pcap = logdir+'/'+net+'.pcap' + subprocess.check_call(['touch', pcap]) + user = getpwuid(stat(pcap).st_uid).pw_name + ip(f"netns exec {net} /usr/sbin/tcpdump -Z {user} -i any -w {pcap}") + sys.exit(0) + +# simulate packet loss, duplication and corruption +for net, iface in [(net0, veth0), (net1, veth1)]: + ip(f"netns exec {net} /usr/sbin/tc qdisc add dev {iface} root netem \ + corrupt {packet_corruption} loss {packet_loss} duplicate \ + {packet_duplicate}") + +# add a timeout +if args.timeout > 0: + signal.alarm(args.timeout) + signal.signal(signal.SIGALRM, signal_handler) + +sockets = [ + netns_socket(net0, socket.AF_RDS, socket.SOCK_SEQPACKET), + netns_socket(net1, socket.AF_RDS, socket.SOCK_SEQPACKET), +] + +for s, addr in zip(sockets, addrs): + s.bind(addr) + s.setblocking(0) + +fileno_to_socket = { + s.fileno(): s for s in sockets +} + +addr_to_socket = { + addr: s for addr, s in zip(addrs, sockets) +} + +socket_to_addr = { + s: addr for addr, s in zip(addrs, sockets) +} + +send_hashes = {} +recv_hashes = {} + +ep = select.epoll() + +for s in sockets: + ep.register(s, select.EPOLLRDNORM) + +n = 50000 +nr_send = 0 +nr_recv = 0 + +while nr_send < n: + # Send as much as we can without blocking + print("sending...", nr_send, nr_recv) + while nr_send < n: + send_data = hashlib.sha256( + f'packet {nr_send}'.encode('utf-8')).hexdigest().encode('utf-8') + + # pseudo-random send/receive pattern + sender = sockets[nr_send % 2] + receiver = sockets[1 - (nr_send % 3) % 2] + + try: + sender.sendto(send_data, socket_to_addr[receiver]) + send_hashes.setdefault((sender.fileno(), receiver.fileno()), + hashlib.sha256()).update(f'<{send_data}>'.encode('utf-8')) + nr_send = nr_send + 1 + except BlockingIOError as e: + break + except OSError as e: + if e.errno in [errno.ENOBUFS, errno.ECONNRESET, errno.EPIPE]: + break + raise + + # Receive as much as we can without blocking + print("receiving...", nr_send, nr_recv) + while nr_recv < nr_send: + for fileno, eventmask in ep.poll(): + receiver = fileno_to_socket[fileno] + + if eventmask & select.EPOLLRDNORM: + while True: + try: + recv_data, address = receiver.recvfrom(1024) + sender = addr_to_socket[address] + recv_hashes.setdefault((sender.fileno(), + receiver.fileno()), hashlib.sha256()).update( + f'<{recv_data}>'.encode('utf-8')) + nr_recv = nr_recv + 1 + except BlockingIOError as e: + break + + # exercise net/rds/tcp.c:rds_tcp_sysctl_reset() + for net in [net0, net1]: + ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_rcvbuf=10000") + ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_sndbuf=10000") + +print("done", nr_send, nr_recv) + +# the Python socket module doesn't know these +RDS_INFO_FIRST = 10000 +RDS_INFO_LAST = 10017 + +nr_success = 0 +nr_error = 0 + +for s in sockets: + for optname in range(RDS_INFO_FIRST, RDS_INFO_LAST + 1): + # Sigh, the Python socket module doesn't allow us to pass + # buffer lengths greater than 1024 for some reason. RDS + # wants multiple pages. + try: + s.getsockopt(socket.SOL_RDS, optname, 1024) + nr_success = nr_success + 1 + except OSError as e: + nr_error = nr_error + 1 + if e.errno == errno.ENOSPC: + # ignore + pass + +print(f"getsockopt(): {nr_success}/{nr_error}") + +print("Stopping network packet captures") +subprocess.check_call(['killall', '-q', 'tcpdump']) + +# We're done sending and receiving stuff, now let's check if what +# we received is what we sent. +for (sender, receiver), send_hash in send_hashes.items(): + recv_hash = recv_hashes.get((sender, receiver)) + + if recv_hash is None: + print("FAIL: No data received") + sys.exit(1) + + if send_hash.hexdigest() != recv_hash.hexdigest(): + print("FAIL: Send/recv mismatch") + print("hash expected:", send_hash.hexdigest()) + print("hash received:", recv_hash.hexdigest()) + sys.exit(1) + + print(f"{sender}/{receiver}: ok") + +print("Success") +sys.exit(0) diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c index 066efd30e294..5aad27a0d13a 100644 --- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c @@ -22,7 +22,7 @@ #include <sys/socket.h> #include <sys/types.h> #include <unistd.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" struct reuse_opts { int reuseaddr[2]; @@ -112,7 +112,7 @@ TEST(reuseaddr_ports_exhausted_reusable_same_euid) ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); if (opts->reuseport[0] && opts->reuseport[1]) { - EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened."); + EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets successfully listened."); } else { EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations."); } diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c index b8475cb29be7..1c43401a1c80 100644 --- a/tools/testing/selftests/net/reuseport_addr_any.c +++ b/tools/testing/selftests/net/reuseport_addr_any.c @@ -9,7 +9,6 @@ #include <arpa/inet.h> #include <errno.h> #include <error.h> -#include <linux/dccp.h> #include <linux/in.h> #include <linux/unistd.h> #include <stdbool.h> @@ -21,10 +20,6 @@ #include <sys/socket.h> #include <unistd.h> -#ifndef SOL_DCCP -#define SOL_DCCP 269 -#endif - static const char *IP4_ADDR = "127.0.0.1"; static const char *IP6_ADDR = "::1"; static const char *IP4_MAPPED6 = "::ffff:127.0.0.1"; @@ -86,15 +81,6 @@ static void build_rcv_fd(int family, int proto, int *rcv_fds, int count, if (proto == SOCK_STREAM && listen(rcv_fds[i], 10)) error(1, errno, "tcp: failed to listen on receive port"); - else if (proto == SOCK_DCCP) { - if (setsockopt(rcv_fds[i], SOL_DCCP, - DCCP_SOCKOPT_SERVICE, - &(int) {htonl(42)}, sizeof(int))) - error(1, errno, "failed to setsockopt"); - - if (listen(rcv_fds[i], 10)) - error(1, errno, "dccp: failed to listen on receive port"); - } } } @@ -148,11 +134,6 @@ static int connect_and_send(int family, int proto) if (fd < 0) error(1, errno, "failed to create send socket"); - if (proto == SOCK_DCCP && - setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE, - &(int){htonl(42)}, sizeof(int))) - error(1, errno, "failed to setsockopt"); - if (bind(fd, saddr, sz)) error(1, errno, "failed to bind send socket"); @@ -175,7 +156,7 @@ static int receive_once(int epfd, int proto) if (i < 0) error(1, errno, "epoll_wait failed"); - if (proto == SOCK_STREAM || proto == SOCK_DCCP) { + if (proto == SOCK_STREAM) { fd = accept(ev.data.fd, NULL, NULL); if (fd < 0) error(1, errno, "failed to accept"); @@ -243,20 +224,6 @@ static void run_one_test(int fam_send, int fam_rcv, int proto, static void test_proto(int proto, const char *proto_str) { - if (proto == SOCK_DCCP) { - int test_fd; - - test_fd = socket(AF_INET, proto, 0); - if (test_fd < 0) { - if (errno == ESOCKTNOSUPPORT) { - fprintf(stderr, "DCCP not supported: skipping DCCP tests\n"); - return; - } else - error(1, errno, "failed to create a DCCP socket"); - } - close(test_fd); - } - fprintf(stderr, "%s IPv4 ... ", proto_str); run_one_test(AF_INET, AF_INET, proto, IP4_ADDR); @@ -271,7 +238,6 @@ int main(void) { test_proto(SOCK_DGRAM, "UDP"); test_proto(SOCK_STREAM, "TCP"); - test_proto(SOCK_DCCP, "DCCP"); fprintf(stderr, "SUCCESS\n"); return 0; diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c index 65aea27d761c..b6634d6da3d6 100644 --- a/tools/testing/selftests/net/reuseport_bpf.c +++ b/tools/testing/selftests/net/reuseport_bpf.c @@ -24,7 +24,7 @@ #include <sys/resource.h> #include <unistd.h> -#include "../kselftest.h" +#include "kselftest.h" struct test_params { int recv_family; diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c index c9ba36aa688e..2ffd957ffb15 100644 --- a/tools/testing/selftests/net/reuseport_bpf_numa.c +++ b/tools/testing/selftests/net/reuseport_bpf_numa.c @@ -23,7 +23,7 @@ #include <unistd.h> #include <numa.h> -#include "../kselftest.h" +#include "kselftest.h" static const int PORT = 8888; diff --git a/tools/testing/selftests/net/route_hint.sh b/tools/testing/selftests/net/route_hint.sh new file mode 100755 index 000000000000..2db01ece0cc1 --- /dev/null +++ b/tools/testing/selftests/net/route_hint.sh @@ -0,0 +1,79 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test ensures directed broadcast routes use dst hint mechanism + +source lib.sh + +CLIENT_IP4="192.168.0.1" +SERVER_IP4="192.168.0.2" +BROADCAST_ADDRESS="192.168.0.255" + +setup() { + setup_ns CLIENT_NS SERVER_NS + + ip -net "${SERVER_NS}" link add link1 type veth peer name link0 netns "${CLIENT_NS}" + + ip -net "${CLIENT_NS}" link set link0 up + ip -net "${CLIENT_NS}" addr add "${CLIENT_IP4}/24" dev link0 + + ip -net "${SERVER_NS}" link set link1 up + ip -net "${SERVER_NS}" addr add "${SERVER_IP4}/24" dev link1 + + ip netns exec "${CLIENT_NS}" ethtool -K link0 tcp-segmentation-offload off + ip netns exec "${SERVER_NS}" sh -c "echo 500000000 > /sys/class/net/link1/gro_flush_timeout" + ip netns exec "${SERVER_NS}" sh -c "echo 1 > /sys/class/net/link1/napi_defer_hard_irqs" + ip netns exec "${SERVER_NS}" ethtool -K link1 generic-receive-offload on +} + +cleanup() { + ip -net "${SERVER_NS}" link del link1 + cleanup_ns "${CLIENT_NS}" "${SERVER_NS}" +} + +directed_bcast_hint_test() +{ + local rc=0 + + echo "Testing for directed broadcast route hint" + + orig_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd') + ip netns exec "${CLIENT_NS}" mausezahn link0 -a own -b bcast -A "${CLIENT_IP4}" \ + -B "${BROADCAST_ADDRESS}" -c1 -t tcp "sp=1-100,dp=1234,s=1,a=0" -p 5 -q + sleep 1 + new_in_brd=$(ip netns exec "${SERVER_NS}" lnstat -j -i1 -c1 | jq '.in_brd') + + res=$(echo "${new_in_brd} - ${orig_in_brd}" | bc) + + if [ "${res}" -lt 100 ]; then + echo "[ OK ]" + rc="${ksft_pass}" + else + echo "[FAIL] expected in_brd to be under 100, got ${res}" + rc="${ksft_fail}" + fi + + return "${rc}" +} + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit "${ksft_skip}" +fi + +if [ ! -x "$(command -v jq)" ]; then + echo "SKIP: Could not run test without jq tool" + exit "${ksft_skip}" +fi + +if [ ! -x "$(command -v bc)" ]; then + echo "SKIP: Could not run test without bc tool" + exit "${ksft_skip}" +fi + +trap cleanup EXIT + +setup + +directed_bcast_hint_test +exit $? diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh index 4287a8529890..b200019b3c80 100755 --- a/tools/testing/selftests/net/rps_default_mask.sh +++ b/tools/testing/selftests/net/rps_default_mask.sh @@ -54,16 +54,16 @@ cleanup echo 1 > /proc/sys/net/core/rps_default_mask setup -chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK +chk_rps "changing rps_default_mask doesn't affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK echo 3 > /proc/sys/net/core/rps_default_mask -chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0 +chk_rps "changing rps_default_mask doesn't affect existing netns" $NETNS lo 0 ip link add name $VETH type veth peer netns $NETNS name $VETH ip link set dev $VETH up ip -n $NETNS link set dev $VETH up -chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3 -chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0 +chk_rps "changing rps_default_mask affects newly created devices" "" $VETH 3 +chk_rps "changing rps_default_mask doesn't affect newly child netns[II]" $NETNS $VETH 0 ip link del dev $VETH ip netns del $NETNS @@ -72,8 +72,8 @@ chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0 ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1 ip link add name $VETH type veth peer netns $NETNS name $VETH -chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK +chk_rps "changing rps_default_mask in child ns doesn't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1 -chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0 +chk_rps "changing rps_default_mask in child ns doesn't affect existing devices" $NETNS lo 0 exit $ret diff --git a/tools/testing/selftests/net/rtnetlink.py b/tools/testing/selftests/net/rtnetlink.py new file mode 100755 index 000000000000..e9ad5e88da97 --- /dev/null +++ b/tools/testing/selftests/net/rtnetlink.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +from lib.py import ksft_exit, ksft_run, ksft_ge, RtnlAddrFamily +import socket + +IPV4_ALL_HOSTS_MULTICAST = b'\xe0\x00\x00\x01' + +def dump_mcaddr_check(rtnl: RtnlAddrFamily) -> None: + """ + Verify that at least one interface has the IPv4 all-hosts multicast address. + At least the loopback interface should have this address. + """ + + addresses = rtnl.getmulticast({"ifa-family": socket.AF_INET}, dump=True) + + all_host_multicasts = [ + addr for addr in addresses if addr['multicast'] == IPV4_ALL_HOSTS_MULTICAST + ] + + ksft_ge(len(all_host_multicasts), 1, + "No interface found with the IPv4 all-hosts multicast address") + +def main() -> None: + rtnl = RtnlAddrFamily() + ksft_run([dump_mcaddr_check], args=(rtnl, )) + ksft_exit() + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index bdf6f10d0558..248c2b91fe42 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -8,6 +8,7 @@ ALL_TESTS=" kci_test_polrouting kci_test_route_get kci_test_addrlft + kci_test_addrlft_route_cleanup kci_test_promote_secondaries kci_test_tc kci_test_gre @@ -21,14 +22,17 @@ ALL_TESTS=" kci_test_vrf kci_test_encap kci_test_macsec - kci_test_macsec_offload + kci_test_macsec_vlan kci_test_ipsec kci_test_ipsec_offload kci_test_fdb_get + kci_test_fdb_del kci_test_neigh_get kci_test_bridge_parent_id kci_test_address_proto kci_test_enslave_bonding + kci_test_mngtmpaddr + kci_test_operstate " devdummy="test-dummy0" @@ -44,6 +48,7 @@ check_err() if [ $ret -eq 0 ]; then ret=$1 fi + [ -n "$2" ] && echo "$2" } # same but inverted -- used when command must fail for test to pass @@ -289,6 +294,17 @@ kci_test_route_get() end_test "PASS: route get" } +check_addr_not_exist() +{ + dev=$1 + addr=$2 + if ip addr show dev $dev | grep -q $addr; then + return 1 + else + return 0 + fi +} + kci_test_addrlft() { for i in $(seq 10 100) ;do @@ -296,9 +312,10 @@ kci_test_addrlft() run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1)) done - sleep 5 - run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy" - if [ $? -eq 0 ]; then + slowwait 5 check_addr_not_exist "$devdummy" "10.23.11." + if [ $? -eq 1 ]; then + # troubleshoot the reason for our failure + run_cmd ip addr show dev "$devdummy" check_err 1 end_test "FAIL: preferred_lft addresses remaining" return @@ -307,8 +324,32 @@ kci_test_addrlft() end_test "PASS: preferred_lft addresses have expired" } +kci_test_addrlft_route_cleanup() +{ + local ret=0 + local test_addr="2001:db8:99::1/64" + local test_prefix="2001:db8:99::/64" + + run_cmd ip -6 addr add $test_addr dev "$devdummy" valid_lft 300 preferred_lft 300 + run_cmd_grep "$test_prefix proto kernel" ip -6 route show dev "$devdummy" + run_cmd ip -6 addr del $test_addr dev "$devdummy" + run_cmd_grep_fail "$test_prefix" ip -6 route show dev "$devdummy" + + if [ $ret -ne 0 ]; then + end_test "FAIL: route not cleaned up when address with valid_lft deleted" + return 1 + fi + + end_test "PASS: route cleaned up when address with valid_lft deleted" +} + kci_test_promote_secondaries() { + run_cmd ifconfig "$devdummy" + if [ $ret -ne 0 ]; then + end_test "SKIP: ifconfig not installed" + return $ksft_skip + fi promote=$(sysctl -n net.ipv4.conf.$devdummy.promote_secondaries) sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=1 @@ -505,7 +546,7 @@ kci_test_encap_fou() run_cmd_fail ip -netns "$testns" fou del port 9999 run_cmd ip -netns "$testns" fou del port 7777 if [ $ret -ne 0 ]; then - end_test "FAIL: fou"s + end_test "FAIL: fou" return 1 fi @@ -559,71 +600,39 @@ kci_test_macsec() end_test "PASS: macsec" } -kci_test_macsec_offload() +# Test __dev_set_rx_mode call from dev_uc_add under addr_list_lock spinlock. +# Make sure __dev_set_promiscuity is not grabbing (sleeping) netdev instance +# lock. +# https://lore.kernel.org/netdev/2aff4342b0f5b1539c02ffd8df4c7e58dd9746e7.camel@nvidia.com/ +kci_test_macsec_vlan() { - sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/ - sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ - probed=false + msname="test_macsec1" + vlanname="test_vlan1" local ret=0 run_cmd_grep "^Usage: ip macsec" ip macsec help if [ $? -ne 0 ]; then end_test "SKIP: macsec: iproute2 too old" return $ksft_skip fi - - if ! mount | grep -q debugfs; then - mount -t debugfs none /sys/kernel/debug/ &> /dev/null - fi - - # setup netdevsim since dummydev doesn't have offload support - if [ ! -w /sys/bus/netdevsim/new_device ] ; then - run_cmd modprobe -q netdevsim - - if [ $ret -ne 0 ]; then - end_test "SKIP: macsec_offload can't load netdevsim" - return $ksft_skip - fi - probed=true - fi - - echo "0" > /sys/bus/netdevsim/new_device - while [ ! -d $sysfsnet ] ; do :; done - udevadm settle - dev=`ls $sysfsnet` - - ip link set $dev up - if [ ! -d $sysfsd ] ; then - end_test "FAIL: macsec_offload can't create device $dev" - return 1 - fi - run_cmd_grep 'macsec-hw-offload: on' ethtool -k $dev - if [ $? -eq 1 ] ; then - end_test "FAIL: macsec_offload netdevsim doesn't support MACsec offload" + run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on + if [ $ret -ne 0 ];then + end_test "FAIL: can't add macsec interface, skipping test" return 1 fi - run_cmd ip link add link $dev kci_macsec1 type macsec port 4 offload mac - run_cmd ip link add link $dev kci_macsec2 type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac - run_cmd ip link add link $dev kci_macsec3 type macsec sci abbacdde01020304 offload mac - run_cmd_fail ip link add link $dev kci_macsec4 type macsec port 8 offload mac - msname=kci_macsec1 - run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012 - run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" - run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \ - key 00 0123456789abcdef0123456789abcdef - run_cmd_fail ip macsec add "$msname" rx port 1235 address "1c:ed:de:ad:be:ef" - # clean up any leftovers - for msdev in kci_macsec{1,2,3,4} ; do - ip link del $msdev 2> /dev/null - done - echo 0 > /sys/bus/netdevsim/del_device - $probed && rmmod netdevsim + run_cmd ip link set dev "$msname" up + ip link add link "$msname" name "$vlanname" type vlan id 1 + ip link set dev "$vlanname" address 00:11:22:33:44:88 + ip link set dev "$vlanname" up + run_cmd ip link del dev "$vlanname" + run_cmd ip link del dev "$msname" - if [ $ret -ne 0 ]; then - end_test "FAIL: macsec_offload" + if [ $ret -ne 0 ];then + end_test "FAIL: macsec_vlan" return 1 fi - end_test "PASS: macsec_offload" + + end_test "PASS: macsec_vlan" } #------------------------------------------------------------------- @@ -738,6 +747,11 @@ kci_test_ipsec_offload() sysfsf=$sysfsd/ipsec sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ probed=false + esp4_offload_probed_default=false + + if lsmod | grep -q esp4_offload; then + esp4_offload_probed_default=true + fi if ! mount | grep -q debugfs; then mount -t debugfs none /sys/kernel/debug/ &> /dev/null @@ -809,10 +823,10 @@ kci_test_ipsec_offload() # does driver have correct offload info run_cmd diff $sysfsf - << EOF SA count=2 tx=3 -sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000 +sa[0] tx ipaddr=$dstip sa[0] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1 sa[0] key=0x34333231 38373635 32313039 36353433 -sa[1] rx ipaddr=0x00000000 00000000 00000000 037ba8c0 +sa[1] rx ipaddr=$srcip sa[1] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1 sa[1] key=0x34333231 38373635 32313039 36353433 EOF @@ -831,6 +845,7 @@ EOF fi # clean up any leftovers + ! "$esp4_offload_probed_default" && lsmod | grep -q esp4_offload && rmmod esp4_offload echo 0 > /sys/bus/netdevsim/del_device $probed && rmmod netdevsim @@ -1065,6 +1080,45 @@ kci_test_fdb_get() end_test "PASS: bridge fdb get" } +kci_test_fdb_del() +{ + local test_mac=de:ad:be:ef:13:37 + local dummydev="dummy1" + local brdev="test-br0" + local ret=0 + + run_cmd_grep 'bridge fdb get' bridge fdb help + if [ $? -ne 0 ]; then + end_test "SKIP: fdb del tests: iproute2 too old" + return $ksft_skip + fi + + setup_ns testns + if [ $? -ne 0 ]; then + end_test "SKIP fdb del tests: cannot add net namespace $testns" + return $ksft_skip + fi + IP="ip -netns $testns" + BRIDGE="bridge -netns $testns" + run_cmd $IP link add $dummydev type dummy + run_cmd $IP link add name $brdev type bridge vlan_filtering 1 + run_cmd $IP link set dev $dummydev master $brdev + run_cmd $BRIDGE fdb add $test_mac dev $dummydev master static vlan 1 + run_cmd $BRIDGE vlan del vid 1 dev $dummydev + run_cmd $BRIDGE fdb get $test_mac br $brdev vlan 1 + run_cmd $BRIDGE fdb del $test_mac dev $dummydev master vlan 1 + run_cmd_fail $BRIDGE fdb get $test_mac br $brdev vlan 1 + + ip netns del $testns &>/dev/null + + if [ $ret -ne 0 ]; then + end_test "FAIL: bridge fdb del" + return 1 + fi + + end_test "PASS: bridge fdb del" +} + kci_test_neigh_get() { dstmac=de:ad:be:ef:13:37 @@ -1174,6 +1228,12 @@ do_test_address_proto() local ret=0 local err + run_cmd_grep 'proto' ip address help + if [ $? -ne 0 ];then + end_test "SKIP: addr proto ${what}: iproute2 too old" + return $ksft_skip + fi + ip address add dev "$devdummy" "$addr3" check_err $? proto=$(address_get_proto "$addr3") @@ -1267,6 +1327,132 @@ kci_test_enslave_bonding() ip netns del "$testns" } +# Called to validate the addresses on $IFNAME: +# +# 1. Every `temporary` address must have a matching `mngtmpaddr` +# 2. Every `mngtmpaddr` address must have some un`deprecated` `temporary` +# +# If the mngtmpaddr or tempaddr checking failed, return 0 and stop slowwait +validate_mngtmpaddr() +{ + local dev=$1 + local prefix="" + local addr_list=$(ip -j -n $testns addr show dev ${dev}) + local temp_addrs=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.temporary == true) | .local') + local mng_prefixes=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.mngtmpaddr == true) | .local' | \ + cut -d: -f1-4 | tr '\n' ' ') + local undep_prefixes=$(echo ${addr_list} | \ + jq -r '.[].addr_info[] | select(.temporary == true and .deprecated != true) | .local' | \ + cut -d: -f1-4 | tr '\n' ' ') + + # 1. All temporary addresses (temp and dep) must have a matching mngtmpaddr + for address in ${temp_addrs}; do + prefix=$(echo ${address} | cut -d: -f1-4) + if [[ ! " ${mng_prefixes} " =~ " $prefix " ]]; then + check_err 1 "FAIL: Temporary $address with no matching mngtmpaddr!"; + return 0 + fi + done + + # 2. All mngtmpaddr addresses must have a temporary address (not dep) + for prefix in ${mng_prefixes}; do + if [[ ! " ${undep_prefixes} " =~ " $prefix " ]]; then + check_err 1 "FAIL: No undeprecated temporary in $prefix!"; + return 0 + fi + done + + return 1 +} + +kci_test_mngtmpaddr() +{ + local ret=0 + + setup_ns testns + if [ $? -ne 0 ]; then + end_test "SKIP mngtmpaddr tests: cannot add net namespace $testns" + return $ksft_skip + fi + + # 1. Create a dummy Ethernet interface + run_cmd ip -n $testns link add ${devdummy} type dummy + run_cmd ip -n $testns link set ${devdummy} up + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.use_tempaddr=1 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.temp_prefered_lft=10 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.temp_valid_lft=25 + run_cmd ip netns exec $testns sysctl -w net.ipv6.conf.${devdummy}.max_desync_factor=1 + + # 2. Create several mngtmpaddr addresses on that interface. + # with temp_*_lft configured to be pretty short (10 and 35 seconds + # for prefer/valid respectively) + for i in $(seq 1 9); do + run_cmd ip -n $testns addr add 2001:db8:7e57:${i}::1/64 mngtmpaddr dev ${devdummy} + done + + # 3. Confirm that a preferred temporary address exists for each mngtmpaddr + # address at all times, polling once per second for 30 seconds. + slowwait 30 validate_mngtmpaddr ${devdummy} + + # 4. Delete each mngtmpaddr address, one at a time (alternating between + # deleting and merely un-mngtmpaddr-ing), and confirm that the other + # mngtmpaddr addresses still have preferred temporaries. + for i in $(seq 1 9); do + (( $i % 4 == 0 )) && mng_flag="mngtmpaddr" || mng_flag="" + if (( $i % 2 == 0 )); then + run_cmd ip -n $testns addr del 2001:db8:7e57:${i}::1/64 $mng_flag dev ${devdummy} + else + run_cmd ip -n $testns addr change 2001:db8:7e57:${i}::1/64 dev ${devdummy} + fi + # the temp addr should be deleted + validate_mngtmpaddr ${devdummy} + done + + if [ $ret -ne 0 ]; then + end_test "FAIL: mngtmpaddr add/remove incorrect" + else + end_test "PASS: mngtmpaddr add/remove correctly" + fi + + ip netns del "$testns" + return $ret +} + +kci_test_operstate() +{ + local ret=0 + + # Check that it is possible to set operational state during device + # creation and that it is preserved when the administrative state of + # the device is toggled. + run_cmd ip link add name vx0 up state up type vxlan id 10010 dstport 4789 + run_cmd_grep "state UP" ip link show dev vx0 + run_cmd ip link set dev vx0 down + run_cmd_grep "state DOWN" ip link show dev vx0 + run_cmd ip link set dev vx0 up + run_cmd_grep "state UP" ip link show dev vx0 + + run_cmd ip link del dev vx0 + + # Check that it is possible to set the operational state of the device + # after creation. + run_cmd ip link add name vx0 up type vxlan id 10010 dstport 4789 + run_cmd_grep "state UNKNOWN" ip link show dev vx0 + run_cmd ip link set dev vx0 state up + run_cmd_grep "state UP" ip link show dev vx0 + + run_cmd ip link del dev vx0 + + if [ "$ret" -ne 0 ]; then + end_test "FAIL: operstate" + return 1 + fi + + end_test "PASS: operstate" +} + kci_test_rtnl() { local current_test @@ -1300,6 +1486,8 @@ usage: ${0##*/} OPTS EOF } +require_command jq + #check for needed privileges if [ "$(id -u)" -ne 0 ];then end_test "SKIP: Need root privileges" diff --git a/tools/testing/selftests/net/rtnetlink_notification.sh b/tools/testing/selftests/net/rtnetlink_notification.sh new file mode 100755 index 000000000000..3f9780232bd6 --- /dev/null +++ b/tools/testing/selftests/net/rtnetlink_notification.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test is for checking rtnetlink notification callpaths, and get as much +# coverage as possible. +# +# set -e + +ALL_TESTS=" + kci_test_mcast_addr_notification + kci_test_anycast_addr_notification +" + +source lib.sh +test_dev="test-dummy1" + +kci_test_mcast_addr_notification() +{ + RET=0 + local tmpfile + local monitor_pid + local match_result + + tmpfile=$(mktemp) + defer rm "$tmpfile" + + ip monitor maddr > $tmpfile & + monitor_pid=$! + defer kill_process "$monitor_pid" + + sleep 1 + + if [ ! -e "/proc/$monitor_pid" ]; then + RET=$ksft_skip + log_test "mcast addr notification: iproute2 too old" + return $RET + fi + + ip link add name "$test_dev" type dummy + check_err $? "failed to add dummy interface" + ip link set "$test_dev" up + check_err $? "failed to set dummy interface up" + ip link del dev "$test_dev" + check_err $? "Failed to delete dummy interface" + sleep 1 + + # There should be 4 line matches as follows. + # 13: test-dummy1 inet6 mcast ff02::1 scope global + # 13: test-dummy1 inet mcast 224.0.0.1 scope global + # Deleted 13: test-dummy1 inet mcast 224.0.0.1 scope global + # Deleted 13: test-dummy1 inet6 mcast ff02::1 scope global + match_result=$(grep -cE "$test_dev.*(224.0.0.1|ff02::1)" "$tmpfile") + if [ "$match_result" -ne 4 ]; then + RET=$ksft_fail + fi + log_test "mcast addr notification: Expected 4 matches, got $match_result" + return $RET +} + +kci_test_anycast_addr_notification() +{ + RET=0 + local tmpfile + local monitor_pid + local match_result + + tmpfile=$(mktemp) + defer rm "$tmpfile" + + ip monitor acaddress > "$tmpfile" & + monitor_pid=$! + defer kill_process "$monitor_pid" + sleep 1 + + if [ ! -e "/proc/$monitor_pid" ]; then + RET=$ksft_skip + log_test "anycast addr notification: iproute2 too old" + return "$RET" + fi + + ip link add name "$test_dev" type dummy + check_err $? "failed to add dummy interface" + ip link set "$test_dev" up + check_err $? "failed to set dummy interface up" + sysctl -qw net.ipv6.conf."$test_dev".forwarding=1 + ip link del dev "$test_dev" + check_err $? "Failed to delete dummy interface" + sleep 1 + + # There should be 2 line matches as follows. + # 9: dummy2 inet6 any fe80:: scope global + # Deleted 9: dummy2 inet6 any fe80:: scope global + match_result=$(grep -cE "$test_dev.*(fe80::)" "$tmpfile") + if [ "$match_result" -ne 2 ]; then + RET=$ksft_fail + fi + log_test "anycast addr notification: Expected 2 matches, got $match_result" + return "$RET" +} + +#check for needed privileges +if [ "$(id -u)" -ne 0 ];then + RET=$ksft_skip + log_test "need root privileges" + exit $RET +fi + +require_command ip + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c index 9eb42570294d..b81ed0352d6c 100644 --- a/tools/testing/selftests/net/rxtimestamp.c +++ b/tools/testing/selftests/net/rxtimestamp.c @@ -18,7 +18,7 @@ #include <linux/net_tstamp.h> #include <linux/errqueue.h> -#include "../kselftest.h" +#include "kselftest.h" struct options { int so_timestamp; @@ -57,6 +57,8 @@ static struct sof_flag sof_flags[] = { SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE), SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE), SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE), + SOF_FLAG(SOF_TIMESTAMPING_OPT_RX_FILTER), + SOF_FLAG(SOF_TIMESTAMPING_RAW_HARDWARE), }; static struct socket_type socket_types[] = { @@ -98,6 +100,22 @@ static struct test_case test_cases[] = { {} }, { + { .so_timestamping = SOF_TIMESTAMPING_RAW_HARDWARE + | SOF_TIMESTAMPING_OPT_RX_FILTER }, + {} + }, + { + { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE + | SOF_TIMESTAMPING_OPT_RX_FILTER }, + {} + }, + { + { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE + | SOF_TIMESTAMPING_RX_SOFTWARE + | SOF_TIMESTAMPING_OPT_RX_FILTER }, + { .swtstamp = true } + }, + { { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE }, { .swtstamp = true } diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c index f02f1f95d227..a04dac0b8027 100644 --- a/tools/testing/selftests/net/sctp_hello.c +++ b/tools/testing/selftests/net/sctp_hello.c @@ -29,7 +29,6 @@ static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len static int do_client(int argc, char *argv[]) { struct sockaddr_storage ss; - char buf[] = "hello"; int csk, ret, len; if (argc < 5) { @@ -56,16 +55,10 @@ static int do_client(int argc, char *argv[]) set_addr(&ss, argv[3], argv[4], &len); ret = connect(csk, (struct sockaddr *)&ss, len); - if (ret < 0) { - printf("failed to connect to peer\n"); + if (ret < 0) return -1; - } - ret = send(csk, buf, strlen(buf) + 1, 0); - if (ret < 0) { - printf("failed to send msg %d\n", ret); - return -1; - } + recv(csk, NULL, 0, 0); close(csk); return 0; @@ -75,7 +68,6 @@ int main(int argc, char *argv[]) { struct sockaddr_storage ss; int lsk, csk, ret, len; - char buf[20]; if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { printf("%s server|client ...\n", argv[0]); @@ -125,11 +117,6 @@ int main(int argc, char *argv[]) return -1; } - ret = recv(csk, buf, sizeof(buf), 0); - if (ret <= 0) { - printf("failed to recv msg %d\n", ret); - return -1; - } close(csk); close(lsk); diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh index c854034b6aa1..667b211aa8a1 100755 --- a/tools/testing/selftests/net/sctp_vrf.sh +++ b/tools/testing/selftests/net/sctp_vrf.sh @@ -20,9 +20,9 @@ setup() { modprobe sctp_diag setup_ns CLIENT_NS1 CLIENT_NS2 SERVER_NS - ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null - ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null - ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null + ip net exec $CLIENT_NS1 sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip net exec $CLIENT_NS2 sysctl -wq net.ipv6.conf.default.accept_dad=0 + ip net exec $SERVER_NS sysctl -wq net.ipv6.conf.default.accept_dad=0 ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1 ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2 @@ -62,17 +62,40 @@ setup() { } cleanup() { - ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null + wait_client $CLIENT_NS1 + wait_client $CLIENT_NS2 + stop_server cleanup_ns $CLIENT_NS1 $CLIENT_NS2 $SERVER_NS } -wait_server() { +start_server() { local IFACE=$1 local CNT=0 - until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \ - grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do - [ $((CNT++)) = "20" ] && { RET=3; return $RET; } + ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP $SERVER_PORT $IFACE & + disown + until ip netns exec $SERVER_NS ss -SlH | grep -q "$IFACE"; do + [ $((CNT++)) -eq 30 ] && { RET=3; return $RET; } + sleep 0.1 + done +} + +stop_server() { + local CNT=0 + + ip netns exec $SERVER_NS pkill sctp_hello + while ip netns exec $SERVER_NS ss -SaH | grep -q .; do + [ $((CNT++)) -eq 30 ] && break + sleep 0.1 + done +} + +wait_client() { + local CLIENT_NS=$1 + local CNT=0 + + while ip netns exec $CLIENT_NS ss -SaH | grep -q .; do + [ $((CNT++)) -eq 30 ] && break sleep 0.1 done } @@ -81,14 +104,12 @@ do_test() { local CLIENT_NS=$1 local IFACE=$2 - ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null - ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ - $SERVER_PORT $IFACE 2>&1 >/dev/null & - disown - wait_server $IFACE || return $RET + start_server $IFACE || return $RET timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \ - $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT RET=$? + wait_client $CLIENT_NS + stop_server return $RET } @@ -96,25 +117,21 @@ do_testx() { local IFACE1=$1 local IFACE2=$2 - ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null - ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ - $SERVER_PORT $IFACE1 2>&1 >/dev/null & - disown - wait_server $IFACE1 || return $RET - ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \ - $SERVER_PORT $IFACE2 2>&1 >/dev/null & - disown - wait_server $IFACE2 || return $RET + start_server $IFACE1 || return $RET + start_server $IFACE2 || return $RET timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \ - $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT && \ timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \ - $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT RET=$? + wait_client $CLIENT_NS1 + wait_client $CLIENT_NS2 + stop_server return $RET } testup() { - ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null + ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=1 echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y " do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; } echo "[PASS]" @@ -123,7 +140,7 @@ testup() { do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; } echo "[PASS]" - ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null + ip netns exec $SERVER_NS sysctl -wq net.sctp.l3mdev_accept=0 echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N " do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; } echo "[PASS]" @@ -160,7 +177,7 @@ testup() { do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; } echo "[PASS]" - echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N " + echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, Y " do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; } echo "[PASS]" } diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh deleted file mode 100644 index 2070b57849de..000000000000 --- a/tools/testing/selftests/net/setup_loopback.sh +++ /dev/null @@ -1,120 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout" -readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs" -readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})" -readonly HARD_IRQS="$(< ${IRQ_PATH})" -readonly server_ns=$(mktemp -u server-XXXXXXXX) -readonly client_ns=$(mktemp -u client-XXXXXXXX) - -netdev_check_for_carrier() { - local -r dev="$1" - - for i in {1..5}; do - carrier="$(cat /sys/class/net/${dev}/carrier)" - if [[ "${carrier}" -ne 1 ]] ; then - echo "carrier not ready yet..." >&2 - sleep 1 - else - echo "carrier ready" >&2 - break - fi - done - echo "${carrier}" -} - -# Assumes that there is no existing ipvlan device on the physical device -setup_loopback_environment() { - local dev="$1" - - # Fail hard if cannot turn on loopback mode for current NIC - ethtool -K "${dev}" loopback on || exit 1 - sleep 1 - - # Check for the carrier - carrier=$(netdev_check_for_carrier ${dev}) - if [[ "${carrier}" -ne 1 ]] ; then - echo "setup_loopback_environment failed" - exit 1 - fi -} - -setup_macvlan_ns(){ - local -r link_dev="$1" - local -r ns_name="$2" - local -r ns_dev="$3" - local -r ns_mac="$4" - local -r addr="$5" - - ip link add link "${link_dev}" dev "${ns_dev}" \ - address "${ns_mac}" type macvlan - exit_code=$? - if [[ "${exit_code}" -ne 0 ]]; then - echo "setup_macvlan_ns failed" - exit $exit_code - fi - - [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - ip link set dev "${ns_dev}" netns "${ns_name}" - ip -netns "${ns_name}" link set dev "${ns_dev}" up - if [[ -n "${addr}" ]]; then - ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}" - fi - - sleep 1 -} - -cleanup_macvlan_ns(){ - while (( $# >= 2 )); do - ns_name="$1" - ns_dev="$2" - ip -netns "${ns_name}" link del dev "${ns_dev}" - ip netns del "${ns_name}" - shift 2 - done -} - -cleanup_loopback(){ - local -r dev="$1" - - ethtool -K "${dev}" loopback off - sleep 1 - - # Check for the carrier - carrier=$(netdev_check_for_carrier ${dev}) - if [[ "${carrier}" -ne 1 ]] ; then - echo "setup_loopback_environment failed" - exit 1 - fi -} - -setup_interrupt() { - # Use timer on host to trigger the network stack - # Also disable device interrupt to not depend on NIC interrupt - # Reduce test flakiness caused by unexpected interrupts - echo 100000 >"${FLUSH_PATH}" - echo 50 >"${IRQ_PATH}" -} - -setup_ns() { - # Set up server_ns namespace and client_ns namespace - setup_macvlan_ns "${dev}" ${server_ns} server "${SERVER_MAC}" - setup_macvlan_ns "${dev}" ${client_ns} client "${CLIENT_MAC}" -} - -cleanup_ns() { - cleanup_macvlan_ns ${server_ns} server ${client_ns} client -} - -setup() { - setup_loopback_environment "${dev}" - setup_interrupt -} - -cleanup() { - cleanup_loopback "${dev}" - - echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}" - echo "${HARD_IRQS}" >"${IRQ_PATH}" -} diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh deleted file mode 100644 index 1f78a87f6f37..000000000000 --- a/tools/testing/selftests/net/setup_veth.sh +++ /dev/null @@ -1,44 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -readonly server_ns=$(mktemp -u server-XXXXXXXX) -readonly client_ns=$(mktemp -u client-XXXXXXXX) - -setup_veth_ns() { - local -r link_dev="$1" - local -r ns_name="$2" - local -r ns_dev="$3" - local -r ns_mac="$4" - - [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" - ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535 - ip -netns "${ns_name}" link set dev "${ns_dev}" up - - ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off -} - -setup_ns() { - # Set up server_ns namespace and client_ns namespace - ip link add name server type veth peer name client - - setup_veth_ns "${dev}" ${server_ns} server "${SERVER_MAC}" - setup_veth_ns "${dev}" ${client_ns} client "${CLIENT_MAC}" -} - -cleanup_ns() { - local ns_name - - for ns_name in ${client_ns} ${server_ns}; do - [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}" - done -} - -setup() { - # no global init setup step needed - : -} - -cleanup() { - cleanup_ns -} diff --git a/tools/testing/selftests/net/sk_so_peek_off.c b/tools/testing/selftests/net/sk_so_peek_off.c new file mode 100644 index 000000000000..2a3f5c604f52 --- /dev/null +++ b/tools/testing/selftests/net/sk_so_peek_off.c @@ -0,0 +1,202 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <errno.h> +#include <sys/types.h> +#include <netinet/in.h> +#include <arpa/inet.h> +#include "kselftest.h" + +static char *afstr(int af, int proto) +{ + if (proto == IPPROTO_TCP) + return af == AF_INET ? "TCP/IPv4" : "TCP/IPv6"; + else + return af == AF_INET ? "UDP/IPv4" : "UDP/IPv6"; +} + +int sk_peek_offset_probe(sa_family_t af, int proto) +{ + int type = (proto == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM); + int optv = 0; + int ret = 0; + int s; + + s = socket(af, type, proto); + if (s < 0) { + ksft_perror("Temporary TCP socket creation failed"); + } else { + if (!setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &optv, sizeof(int))) + ret = 1; + else + printf("%s does not support SO_PEEK_OFF\n", afstr(af, proto)); + close(s); + } + return ret; +} + +static void sk_peek_offset_set(int s, int offset) +{ + if (setsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, sizeof(offset))) + ksft_perror("Failed to set SO_PEEK_OFF value\n"); +} + +static int sk_peek_offset_get(int s) +{ + int offset; + socklen_t len = sizeof(offset); + + if (getsockopt(s, SOL_SOCKET, SO_PEEK_OFF, &offset, &len)) + ksft_perror("Failed to get SO_PEEK_OFF value\n"); + return offset; +} + +static int sk_peek_offset_test(sa_family_t af, int proto) +{ + int type = (proto == IPPROTO_TCP ? SOCK_STREAM : SOCK_DGRAM); + union { + struct sockaddr sa; + struct sockaddr_in a4; + struct sockaddr_in6 a6; + } a; + int res = 0; + int s[2] = {0, 0}; + int recv_sock = 0; + int offset = 0; + ssize_t len; + char buf[2]; + + memset(&a, 0, sizeof(a)); + a.sa.sa_family = af; + + s[0] = recv_sock = socket(af, type, proto); + s[1] = socket(af, type, proto); + + if (s[0] < 0 || s[1] < 0) { + ksft_perror("Temporary socket creation failed\n"); + goto out; + } + if (bind(s[0], &a.sa, sizeof(a)) < 0) { + ksft_perror("Temporary socket bind() failed\n"); + goto out; + } + if (getsockname(s[0], &a.sa, &((socklen_t) { sizeof(a) })) < 0) { + ksft_perror("Temporary socket getsockname() failed\n"); + goto out; + } + if (proto == IPPROTO_TCP && listen(s[0], 0) < 0) { + ksft_perror("Temporary socket listen() failed\n"); + goto out; + } + if (connect(s[1], &a.sa, sizeof(a)) < 0) { + ksft_perror("Temporary socket connect() failed\n"); + goto out; + } + if (proto == IPPROTO_TCP) { + recv_sock = accept(s[0], NULL, NULL); + if (recv_sock <= 0) { + ksft_perror("Temporary socket accept() failed\n"); + goto out; + } + } + + /* Some basic tests of getting/setting offset */ + offset = sk_peek_offset_get(recv_sock); + if (offset != -1) { + ksft_perror("Initial value of socket offset not -1\n"); + goto out; + } + sk_peek_offset_set(recv_sock, 0); + offset = sk_peek_offset_get(recv_sock); + if (offset != 0) { + ksft_perror("Failed to set socket offset to 0\n"); + goto out; + } + + /* Transfer a message */ + if (send(s[1], (char *)("ab"), 2, 0) != 2) { + ksft_perror("Temporary probe socket send() failed\n"); + goto out; + } + /* Read first byte */ + len = recv(recv_sock, buf, 1, MSG_PEEK); + if (len != 1 || buf[0] != 'a') { + ksft_perror("Failed to read first byte of message\n"); + goto out; + } + offset = sk_peek_offset_get(recv_sock); + if (offset != 1) { + ksft_perror("Offset not forwarded correctly at first byte\n"); + goto out; + } + /* Try to read beyond last byte */ + len = recv(recv_sock, buf, 2, MSG_PEEK); + if (len != 1 || buf[0] != 'b') { + ksft_perror("Failed to read last byte of message\n"); + goto out; + } + offset = sk_peek_offset_get(recv_sock); + if (offset != 2) { + ksft_perror("Offset not forwarded correctly at last byte\n"); + goto out; + } + /* Flush message */ + len = recv(recv_sock, buf, 2, MSG_TRUNC); + if (len != 2) { + ksft_perror("Failed to flush message\n"); + goto out; + } + offset = sk_peek_offset_get(recv_sock); + if (offset != 0) { + ksft_perror("Offset not reverted correctly after flush\n"); + goto out; + } + + printf("%s with MSG_PEEK_OFF works correctly\n", afstr(af, proto)); + res = 1; +out: + if (proto == IPPROTO_TCP && recv_sock >= 0) + close(recv_sock); + if (s[1] >= 0) + close(s[1]); + if (s[0] >= 0) + close(s[0]); + return res; +} + +static int do_test(int proto) +{ + int res4, res6; + + res4 = sk_peek_offset_probe(AF_INET, proto); + res6 = sk_peek_offset_probe(AF_INET6, proto); + + if (!res4 && !res6) + return KSFT_SKIP; + + if (res4) + res4 = sk_peek_offset_test(AF_INET, proto); + + if (res6) + res6 = sk_peek_offset_test(AF_INET6, proto); + + if (!res4 || !res6) + return KSFT_FAIL; + + return KSFT_PASS; +} + +int main(void) +{ + int restcp, resudp; + + restcp = do_test(IPPROTO_TCP); + resudp = do_test(IPPROTO_UDP); + if (restcp == KSFT_FAIL || resudp == KSFT_FAIL) + return KSFT_FAIL; + + return KSFT_PASS; +} diff --git a/tools/testing/selftests/net/skf_net_off.c b/tools/testing/selftests/net/skf_net_off.c new file mode 100644 index 000000000000..1fdf61d6cd7f --- /dev/null +++ b/tools/testing/selftests/net/skf_net_off.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Open a tun device. + * + * [modifications: use IFF_NAPI_FRAGS, add sk filter] + * + * Expects the device to have been configured previously, e.g.: + * sudo ip tuntap add name tap1 mode tap + * sudo ip link set tap1 up + * sudo ip link set dev tap1 addr 02:00:00:00:00:01 + * sudo ip -6 addr add fdab::1 peer fdab::2 dev tap1 nodad + * + * And to avoid premature pskb_may_pull: + * + * sudo ethtool -K tap1 gro off + * sudo bash -c 'echo 0 > /proc/sys/net/ipv4/ip_early_demux' + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <errno.h> +#include <error.h> +#include <fcntl.h> +#include <getopt.h> +#include <linux/filter.h> +#include <linux/if.h> +#include <linux/if_packet.h> +#include <linux/if_tun.h> +#include <linux/ipv6.h> +#include <netinet/if_ether.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/ip6.h> +#include <netinet/udp.h> +#include <poll.h> +#include <signal.h> +#include <stdbool.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/ioctl.h> +#include <sys/socket.h> +#include <sys/poll.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <unistd.h> + +static bool cfg_do_filter; +static bool cfg_do_frags; +static int cfg_dst_port = 8000; +static char *cfg_ifname; + +static int tun_open(const char *tun_name) +{ + struct ifreq ifr = {0}; + int fd, ret; + + fd = open("/dev/net/tun", O_RDWR); + if (fd == -1) + error(1, errno, "open /dev/net/tun"); + + ifr.ifr_flags = IFF_TAP; + if (cfg_do_frags) + ifr.ifr_flags |= IFF_NAPI | IFF_NAPI_FRAGS; + + strncpy(ifr.ifr_name, tun_name, IFNAMSIZ - 1); + + ret = ioctl(fd, TUNSETIFF, &ifr); + if (ret) + error(1, ret, "ioctl TUNSETIFF"); + + return fd; +} + +static void sk_set_filter(int fd) +{ + const int offset_proto = offsetof(struct ip6_hdr, ip6_nxt); + const int offset_dport = sizeof(struct ip6_hdr) + offsetof(struct udphdr, dest); + + /* Filter UDP packets with destination port cfg_dst_port */ + struct sock_filter filter_code[] = { + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_NET_OFF + offset_proto), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_UDP, 0, 2), + BPF_STMT(BPF_LD + BPF_H + BPF_ABS, SKF_NET_OFF + offset_dport), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dst_port, 1, 0), + BPF_STMT(BPF_RET + BPF_K, 0), + BPF_STMT(BPF_RET + BPF_K, 0xFFFF), + }; + + struct sock_fprog filter = { + sizeof(filter_code) / sizeof(filter_code[0]), + filter_code, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &filter, sizeof(filter))) + error(1, errno, "setsockopt attach filter"); +} + +static int raw_open(void) +{ + int fd; + + fd = socket(PF_INET6, SOCK_RAW, IPPROTO_UDP); + if (fd == -1) + error(1, errno, "socket raw (udp)"); + + if (cfg_do_filter) + sk_set_filter(fd); + + return fd; +} + +static void tun_write(int fd) +{ + const char eth_src[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x02 }; + const char eth_dst[] = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x01 }; + struct tun_pi pi = {0}; + struct ipv6hdr ip6h = {0}; + struct udphdr uh = {0}; + struct ethhdr eth = {0}; + uint32_t payload; + struct iovec iov[5]; + int ret; + + pi.proto = htons(ETH_P_IPV6); + + memcpy(eth.h_source, eth_src, sizeof(eth_src)); + memcpy(eth.h_dest, eth_dst, sizeof(eth_dst)); + eth.h_proto = htons(ETH_P_IPV6); + + ip6h.version = 6; + ip6h.payload_len = htons(sizeof(uh) + sizeof(uint32_t)); + ip6h.nexthdr = IPPROTO_UDP; + ip6h.hop_limit = 8; + if (inet_pton(AF_INET6, "fdab::2", &ip6h.saddr) != 1) + error(1, errno, "inet_pton src"); + if (inet_pton(AF_INET6, "fdab::1", &ip6h.daddr) != 1) + error(1, errno, "inet_pton src"); + + uh.source = htons(8000); + uh.dest = htons(cfg_dst_port); + uh.len = ip6h.payload_len; + uh.check = 0; + + payload = htonl(0xABABABAB); /* Covered in IPv6 length */ + + iov[0].iov_base = π + iov[0].iov_len = sizeof(pi); + iov[1].iov_base = ð + iov[1].iov_len = sizeof(eth); + iov[2].iov_base = &ip6h; + iov[2].iov_len = sizeof(ip6h); + iov[3].iov_base = &uh; + iov[3].iov_len = sizeof(uh); + iov[4].iov_base = &payload; + iov[4].iov_len = sizeof(payload); + + ret = writev(fd, iov, sizeof(iov) / sizeof(iov[0])); + if (ret <= 0) + error(1, errno, "writev"); +} + +static void raw_read(int fd) +{ + struct timeval tv = { .tv_usec = 100 * 1000 }; + struct msghdr msg = {0}; + struct iovec iov[2]; + struct udphdr uh; + uint32_t payload[2]; + int ret; + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) + error(1, errno, "setsockopt rcvtimeo udp"); + + iov[0].iov_base = &uh; + iov[0].iov_len = sizeof(uh); + + iov[1].iov_base = payload; + iov[1].iov_len = sizeof(payload); + + msg.msg_iov = iov; + msg.msg_iovlen = sizeof(iov) / sizeof(iov[0]); + + ret = recvmsg(fd, &msg, 0); + if (ret <= 0) + error(1, errno, "read raw"); + if (ret != sizeof(uh) + sizeof(payload[0])) + error(1, errno, "read raw: len=%d\n", ret); + + fprintf(stderr, "raw recv: 0x%x\n", payload[0]); +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "fFi:")) != -1) { + switch (c) { + case 'f': + cfg_do_filter = true; + printf("bpf filter enabled\n"); + break; + case 'F': + cfg_do_frags = true; + printf("napi frags mode enabled\n"); + break; + case 'i': + cfg_ifname = optarg; + break; + default: + error(1, 0, "unknown option %c", optopt); + break; + } + } + + if (!cfg_ifname) + error(1, 0, "must specify tap interface name (-i)"); +} + +int main(int argc, char **argv) +{ + int fdt, fdr; + + parse_opts(argc, argv); + + fdr = raw_open(); + fdt = tun_open(cfg_ifname); + + tun_write(fdt); + raw_read(fdr); + + if (close(fdt)) + error(1, errno, "close tun"); + if (close(fdr)) + error(1, errno, "close udp"); + + fprintf(stderr, "OK\n"); + return 0; +} + diff --git a/tools/testing/selftests/net/skf_net_off.sh b/tools/testing/selftests/net/skf_net_off.sh new file mode 100755 index 000000000000..5da5066fb465 --- /dev/null +++ b/tools/testing/selftests/net/skf_net_off.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +readonly NS="ns-$(mktemp -u XXXXXX)" + +cleanup() { + ip netns del $NS +} + +ip netns add $NS +trap cleanup EXIT + +ip -netns $NS link set lo up +ip -netns $NS tuntap add name tap1 mode tap +ip -netns $NS link set tap1 up +ip -netns $NS link set dev tap1 addr 02:00:00:00:00:01 +ip -netns $NS -6 addr add fdab::1 peer fdab::2 dev tap1 nodad +ip netns exec $NS ethtool -K tap1 gro off + +# disable early demux, else udp_v6_early_demux pulls udp header into linear +ip netns exec $NS sysctl -w net.ipv4.ip_early_demux=0 + +echo "no filter" +ip netns exec $NS ./skf_net_off -i tap1 + +echo "filter, linear skb (-f)" +ip netns exec $NS ./skf_net_off -i tap1 -f + +echo "filter, fragmented skb (-f) (-F)" +ip netns exec $NS ./skf_net_off -i tap1 -f -F diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c index e9fa14e10732..4740701f1a9a 100644 --- a/tools/testing/selftests/net/so_incoming_cpu.c +++ b/tools/testing/selftests/net/so_incoming_cpu.c @@ -9,7 +9,7 @@ #include <sys/socket.h> #include <sys/sysinfo.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" FIXTURE(so_incoming_cpu) { diff --git a/tools/testing/selftests/net/so_rcv_listener.c b/tools/testing/selftests/net/so_rcv_listener.c new file mode 100644 index 000000000000..bc5841192aa6 --- /dev/null +++ b/tools/testing/selftests/net/so_rcv_listener.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <netdb.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <linux/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <arpa/inet.h> + +#ifndef SO_RCVPRIORITY +#define SO_RCVPRIORITY 82 +#endif + +struct options { + __u32 val; + int name; + int rcvname; + const char *host; + const char *service; +} opt; + +static void __attribute__((noreturn)) usage(const char *bin) +{ + printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin); + printf("Options:\n" + "\t\t-M val Test SO_RCVMARK\n" + "\t\t-P val Test SO_RCVPRIORITY\n" + ""); + exit(EXIT_FAILURE); +} + +static void parse_args(int argc, char *argv[]) +{ + int o; + + while ((o = getopt(argc, argv, "M:P:")) != -1) { + switch (o) { + case 'M': + opt.val = atoi(optarg); + opt.name = SO_MARK; + opt.rcvname = SO_RCVMARK; + break; + case 'P': + opt.val = atoi(optarg); + opt.name = SO_PRIORITY; + opt.rcvname = SO_RCVPRIORITY; + break; + default: + usage(argv[0]); + break; + } + } + + if (optind != argc - 2) + usage(argv[0]); + + opt.host = argv[optind]; + opt.service = argv[optind + 1]; +} + +int main(int argc, char *argv[]) +{ + int err = 0; + int recv_fd = -1; + int ret_value = 0; + __u32 recv_val; + struct cmsghdr *cmsg; + char cbuf[CMSG_SPACE(sizeof(__u32))]; + char recv_buf[CMSG_SPACE(sizeof(__u32))]; + struct iovec iov[1]; + struct msghdr msg; + struct sockaddr_in recv_addr4; + struct sockaddr_in6 recv_addr6; + + parse_args(argc, argv); + + int family = strchr(opt.host, ':') ? AF_INET6 : AF_INET; + + recv_fd = socket(family, SOCK_DGRAM, IPPROTO_UDP); + if (recv_fd < 0) { + perror("Can't open recv socket"); + ret_value = -errno; + goto cleanup; + } + + err = setsockopt(recv_fd, SOL_SOCKET, opt.rcvname, &opt.val, sizeof(opt.val)); + if (err < 0) { + perror("Recv setsockopt error"); + ret_value = -errno; + goto cleanup; + } + + if (family == AF_INET) { + memset(&recv_addr4, 0, sizeof(recv_addr4)); + recv_addr4.sin_family = family; + recv_addr4.sin_port = htons(atoi(opt.service)); + + if (inet_pton(family, opt.host, &recv_addr4.sin_addr) <= 0) { + perror("Invalid IPV4 address"); + ret_value = -errno; + goto cleanup; + } + + err = bind(recv_fd, (struct sockaddr *)&recv_addr4, sizeof(recv_addr4)); + } else { + memset(&recv_addr6, 0, sizeof(recv_addr6)); + recv_addr6.sin6_family = family; + recv_addr6.sin6_port = htons(atoi(opt.service)); + + if (inet_pton(family, opt.host, &recv_addr6.sin6_addr) <= 0) { + perror("Invalid IPV6 address"); + ret_value = -errno; + goto cleanup; + } + + err = bind(recv_fd, (struct sockaddr *)&recv_addr6, sizeof(recv_addr6)); + } + + if (err < 0) { + perror("Recv bind error"); + ret_value = -errno; + goto cleanup; + } + + iov[0].iov_base = recv_buf; + iov[0].iov_len = sizeof(recv_buf); + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = iov; + msg.msg_iovlen = 1; + msg.msg_control = cbuf; + msg.msg_controllen = sizeof(cbuf); + + err = recvmsg(recv_fd, &msg, 0); + if (err < 0) { + perror("Message receive error"); + ret_value = -errno; + goto cleanup; + } + + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == opt.name) { + recv_val = *(__u32 *)CMSG_DATA(cmsg); + printf("Received value: %u\n", recv_val); + + if (recv_val != opt.val) { + fprintf(stderr, "Error: expected value: %u, got: %u\n", + opt.val, recv_val); + ret_value = -EINVAL; + } + goto cleanup; + } + } + + fprintf(stderr, "Error: No matching cmsg received\n"); + ret_value = -ENOMSG; + +cleanup: + if (recv_fd >= 0) + close(recv_fd); + + return ret_value; +} diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 8457b7ccbc09..b76df1efc2ef 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -174,7 +174,7 @@ static int do_recv_errqueue_timeout(int fdt) msg.msg_controllen = sizeof(control); while (1) { - const char *reason; + const char *reason = NULL; ret = recvmsg(fdt, &msg, MSG_ERRQUEUE); if (ret == -1 && errno == EAGAIN) diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c index db1aeb8c5d1e..9e270548dad8 100644 --- a/tools/testing/selftests/net/socket.c +++ b/tools/testing/selftests/net/socket.c @@ -7,7 +7,7 @@ #include <sys/socket.h> #include <netinet/in.h> -#include "../kselftest.h" +#include "kselftest.h" struct socket_testcase { int domain; @@ -39,6 +39,7 @@ static int run_tests(void) { char err_string1[ERR_STRING_SZ]; char err_string2[ERR_STRING_SZ]; + const char *msg1, *msg2; int i, err; err = 0; @@ -56,13 +57,13 @@ static int run_tests(void) errno == -s->expect) continue; - strerror_r(-s->expect, err_string1, ERR_STRING_SZ); - strerror_r(errno, err_string2, ERR_STRING_SZ); + msg1 = strerror_r(-s->expect, err_string1, ERR_STRING_SZ); + msg2 = strerror_r(errno, err_string2, ERR_STRING_SZ); fprintf(stderr, "socket(%d, %d, %d) expected " "err (%s) got (%s)\n", s->domain, s->type, s->protocol, - err_string1, err_string2); + msg1, msg2); err = -1; break; @@ -70,12 +71,12 @@ static int run_tests(void) close(fd); if (s->expect < 0) { - strerror_r(errno, err_string1, ERR_STRING_SZ); + msg1 = strerror_r(errno, err_string1, ERR_STRING_SZ); fprintf(stderr, "socket(%d, %d, %d) expected " "success got err (%s)\n", s->domain, s->type, s->protocol, - err_string1); + msg1); err = -1; break; diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh index 02d617040793..a5e959a080bb 100755 --- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh @@ -285,11 +285,6 @@ setup_hs() ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0 - ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh index 79fb81e63c59..a649dba3cb77 100755 --- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh @@ -250,11 +250,6 @@ setup_hs() eval local rtname=\${rt_${rid}} local rtveth=veth-t${tid} - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0 - ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0 diff --git a/tools/testing/selftests/net/srv6_end_flavors_test.sh b/tools/testing/selftests/net/srv6_end_flavors_test.sh index 50563443a4ad..318487eda671 100755 --- a/tools/testing/selftests/net/srv6_end_flavors_test.sh +++ b/tools/testing/selftests/net/srv6_end_flavors_test.sh @@ -399,7 +399,7 @@ __get_srv6_rtcfg_id() # Given the description of a router <id:op> as an input, the function returns # the <op> token which represents the operation (e.g. End behavior with or -# withouth flavors) configured for the node. +# without flavors) configured for the node. # Note that when the operation represents an End behavior with a list of # flavors, the output is the ordered version of that list. @@ -480,7 +480,7 @@ setup_rt_local_sids() # all SIDs start with a common locator. Routes and SRv6 Endpoint - # behavior instaces are grouped together in the 'localsid' table. + # behavior instances are grouped together in the 'localsid' table. ip -netns "${nsname}" -6 rule \ add to "${LOCATOR_SERVICE}::/16" \ lookup "${LOCALSID_TABLE_ID}" prio 999 diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh index 87e414cc417c..4bc135e5c22c 100755 --- a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh @@ -245,10 +245,8 @@ # that adopted in the use cases already examined (of course, it is necessary to # consider the different SIDs/C-SIDs). -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly DUMMY_DEVNAME="dum0" readonly VRF_TID=100 readonly VRF_DEVNAME="vrf-${VRF_TID}" @@ -376,32 +374,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -410,8 +394,7 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } create_host() @@ -420,28 +403,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, @@ -462,10 +429,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -497,7 +464,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -518,9 +485,6 @@ setup_rt_networking() ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -596,7 +560,7 @@ setup_rt_local_sids() local lcnode_func_prefix local lcblock_prefix - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -630,7 +594,7 @@ setup_rt_local_sids() dev "${DUMMY_DEVNAME}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behavior instaces are grouped together in the 'localsid' + # Endpoint behavior instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule \ add to "${VPN_LOCATOR_SERVICE}::/16" \ @@ -668,8 +632,8 @@ __setup_l3vpn() local rtsrc_nsname local rtdst_nsname - rtsrc_nsname="$(get_rtname "${src}")" - rtdst_nsname="$(get_rtname "${dst}")" + eval rtsrc_nsname=\${$(get_rtname "${src}")} + eval rtdst_nsname=\${$(get_rtname "${dst}")} container="${LCBLOCK_ADDR}" @@ -744,8 +708,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -791,11 +755,6 @@ setup_hs() ip netns exec "${rtname}" \ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 - ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } @@ -880,7 +839,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -903,7 +862,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -915,7 +874,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 @@ -1025,7 +984,7 @@ rt_x_nextcsid_end_behavior_test() local nsname local ret - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} __nextcsid_end_behavior_test "${nsname}" "add" "${blen}" "${flen}" ret="$?" diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh index c79cb8ede17f..34b781a2ae74 100755 --- a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh @@ -72,6 +72,9 @@ # Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in # the selftest network. # +# In addition, every router interface connecting rt-x to rt-y is assigned an +# IPv6 link-local address fe80::x:y/64. +# # Local SID/C-SID table # ===================== # @@ -287,10 +290,8 @@ # packet using the SRv6 End.DT46 behavior (associated with the SID fcff:1::d46) # and sends it to the host hs-1. -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly DUMMY_DEVNAME="dum0" readonly VRF_TID=100 readonly VRF_DEVNAME="vrf-${VRF_TID}" @@ -418,32 +419,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -452,15 +439,12 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" + setup_ns "${nsname}" - __create_namespace "${nsname}" - + eval nsname=\${$(get_rtname "${rtid}")} ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -470,29 +454,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, # the selftest is considered as "skipped". @@ -512,10 +479,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -547,7 +514,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -557,6 +524,9 @@ setup_rt_networking() ip -netns "${nsname}" addr \ add "${net_prefix}::${rt}/64" dev "${devname}" nodad + ip -netns "${nsname}" addr \ + add "fe80::${rt}:${neigh}/64" dev "${devname}" nodad + ip -netns "${nsname}" link set "${devname}" up done @@ -631,7 +601,7 @@ set_end_x_nextcsid() local rt="$1" local adj="$2" - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} net_prefix="$(get_network_prefix "${rt}" "${adj}")" lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")" @@ -645,12 +615,33 @@ set_end_x_nextcsid() nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}" } +set_end_x_ll_nextcsid() +{ + local rt="$1" + local adj="$2" + + eval nsname=\${$(get_rtname "${rt}")} + lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")" + nh6_ll_addr="fe80::${adj}:${rt}" + oifname="veth-rt-${rt}-${adj}" + + # enabled NEXT-C-SID SRv6 End.X behavior via an IPv6 link-local nexthop + # address (note that "dev" is the dummy dum0 device chosen for the sake + # of simplicity). + ip -netns "${nsname}" -6 route \ + replace "${lcnode_func_prefix}" \ + table "${LOCALSID_TABLE_ID}" \ + encap seg6local action End.X nh6 "${nh6_ll_addr}" \ + oif "${oifname}" flavors next-csid lblen "${LCBLOCK_BLEN}" \ + nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}" +} + set_underlay_sids_reachability() { local rt="$1" local rt_neighs="$2" - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -685,12 +676,12 @@ setup_rt_local_sids() local lcnode_func_prefix local lcblock_prefix - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} set_underlay_sids_reachability "${rt}" "${rt_neighs}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behavior instaces are grouped together in the 'localsid' + # Endpoint behavior instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule \ add to "${VPN_LOCATOR_SERVICE}::/16" \ @@ -728,8 +719,8 @@ __setup_l3vpn() local rtsrc_nsname local rtdst_nsname - rtsrc_nsname="$(get_rtname "${src}")" - rtdst_nsname="$(get_rtname "${dst}")" + eval rtsrc_nsname=\${$(get_rtname "${src}")} + eval rtdst_nsname=\${$(get_rtname "${dst}")} container="${LCBLOCK_ADDR}" @@ -804,8 +795,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -851,11 +842,6 @@ setup_hs() ip netns exec "${rtname}" \ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 - ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } @@ -947,7 +933,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -970,7 +956,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -982,7 +968,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 @@ -1057,6 +1043,27 @@ host_vpn_tests() check_and_log_hs_ipv4_connectivity 1 2 check_and_log_hs_ipv4_connectivity 2 1 + + # Setup the adjacencies in the SRv6 aware routers using IPv6 link-local + # addresses. + # - rt-3 SRv6 End.X adjacency with rt-4 + # - rt-4 SRv6 End.X adjacency with rt-1 + set_end_x_ll_nextcsid 3 4 + set_end_x_ll_nextcsid 4 1 + + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6), link-local" + + check_and_log_hs_ipv6_connectivity 1 2 + check_and_log_hs_ipv6_connectivity 2 1 + + log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4), link-local" + + check_and_log_hs_ipv4_connectivity 1 2 + check_and_log_hs_ipv4_connectivity 2 1 + + # Restore the previous adjacencies. + set_end_x_nextcsid 3 4 + set_end_x_nextcsid 4 1 } __nextcsid_end_x_behavior_test() @@ -1093,7 +1100,7 @@ rt_x_nextcsid_end_x_behavior_test() local nsname local ret - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} __nextcsid_end_x_behavior_test "${nsname}" "add" "${blen}" "${flen}" ret="$?" diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh index 28a775654b92..6a68c7eff1dc 100755 --- a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh @@ -166,10 +166,8 @@ # hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d) # -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly VRF_TID=100 readonly VRF_DEVNAME="vrf-${VRF_TID}" readonly RT2HS_DEVNAME="veth-t${VRF_TID}" @@ -248,32 +246,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -282,8 +266,7 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } create_host() @@ -292,29 +275,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, # the selftest is considered as "skipped". @@ -334,10 +300,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -369,7 +335,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -387,9 +353,6 @@ setup_rt_networking() ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -403,7 +366,7 @@ setup_rt_local_sids() local nsname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -432,7 +395,7 @@ setup_rt_local_sids() dev "${VRF_DEVNAME}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behavior instaces are grouped together in the 'localsid' + # Endpoint behavior instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule \ add to "${VPN_LOCATOR_SERVICE}::/16" \ @@ -469,7 +432,7 @@ __setup_rt_policy() local policy='' local n - nsname="$(get_rtname "${encap_rt}")" + eval nsname=\${$(get_rtname "${encap_rt}")} for n in ${end_rts}; do policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," @@ -516,8 +479,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -555,11 +518,6 @@ setup_hs() ip netns exec "${rtname}" \ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1 - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 - ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode" } @@ -656,7 +614,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -679,7 +637,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -691,7 +649,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh index cb4177d41b21..0979b5316fdf 100755 --- a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh +++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh @@ -116,10 +116,8 @@ # hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b) # -# Kselftest framework requirement - SKIP code is 4. -readonly ksft_skip=4 +source lib.sh -readonly RDMSUFF="$(mktemp -u XXXXXXXX)" readonly DUMMY_DEVNAME="dum0" readonly RT2HS_DEVNAME="veth-hs" readonly HS_VETH_NAME="veth0" @@ -199,32 +197,18 @@ test_command_or_ksft_skip() fi } -get_nodename() -{ - local name="$1" - - echo "${name}-${RDMSUFF}" -} - get_rtname() { local rtid="$1" - get_nodename "rt-${rtid}" + echo "rt_${rtid}" } get_hsname() { local hsid="$1" - get_nodename "hs-${hsid}" -} - -__create_namespace() -{ - local name="$1" - - ip netns add "${name}" + echo "hs_${hsid}" } create_router() @@ -233,8 +217,7 @@ create_router() local nsname nsname="$(get_rtname "${rtid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } create_host() @@ -243,28 +226,12 @@ create_host() local nsname nsname="$(get_hsname "${hsid}")" - - __create_namespace "${nsname}" + setup_ns "${nsname}" } cleanup() { - local nsname - local i - - # destroy routers - for i in ${ROUTERS}; do - nsname="$(get_rtname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done - - # destroy hosts - for i in ${HOSTS}; do - nsname="$(get_hsname "${i}")" - - ip netns del "${nsname}" &>/dev/null || true - done + cleanup_all_ns # check whether the setup phase was completed successfully or not. In # case of an error during the setup phase of the testing environment, @@ -285,10 +252,10 @@ add_link_rt_pairs() local nsname local neigh_nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do - neigh_nsname="$(get_rtname "${neigh}")" + eval neigh_nsname=\${$(get_rtname "${neigh}")} ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \ type veth peer name "veth-rt-${neigh}-${rt}" \ @@ -320,7 +287,7 @@ setup_rt_networking() local devname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -341,9 +308,6 @@ setup_rt_networking() ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1 - - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0 - ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0 ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1 } @@ -357,7 +321,7 @@ setup_rt_local_sids() local nsname local neigh - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} for neigh in ${rt_neighs}; do devname="veth-rt-${rt}-${neigh}" @@ -379,7 +343,7 @@ setup_rt_local_sids() encap seg6local action End dev "${DUMMY_DEVNAME}" # all SIDs for VPNs start with a common locator. Routes and SRv6 - # Endpoint behaviors instaces are grouped together in the 'localsid' + # Endpoint behaviors instances are grouped together in the 'localsid' # table. ip -netns "${nsname}" -6 rule add \ to "${VPN_LOCATOR_SERVICE}::/16" \ @@ -407,7 +371,7 @@ __setup_rt_policy() local policy='' local n - nsname="$(get_rtname "${encap_rt}")" + eval nsname=\${$(get_rtname "${encap_rt}")} for n in ${end_rts}; do policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC}," @@ -446,7 +410,7 @@ setup_decap() local rt="$1" local nsname - nsname="$(get_rtname "${rt}")" + eval nsname=\${$(get_rtname "${rt}")} # Local End.DX2 behavior ip -netns "${nsname}" -6 route \ @@ -463,8 +427,8 @@ setup_hs() local hsname local rtname - hsname="$(get_hsname "${hs}")" - rtname="$(get_rtname "${rt}")" + eval hsname=\${$(get_hsname "${hs}")} + eval rtname=\${$(get_rtname "${rt}")} ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -486,11 +450,6 @@ setup_hs() add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}" ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up - - # disable the rp_filter otherwise the kernel gets confused about how - # to route decap ipv4 packets. - ip netns exec "${rtname}" \ - sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0 } # set an auto-generated mac address @@ -508,7 +467,7 @@ set_mac_address() local ifname="$4" local nsname - nsname=$(get_nodename "${nodename}") + eval nsname=\${${nodename}} ip -netns "${nsname}" link set dev "${ifname}" down @@ -532,7 +491,7 @@ set_host_l2peer() local hssrc_name local ipaddr - hssrc_name="$(get_hsname "${hssrc}")" + eval hssrc_name=\${$(get_hsname "${hssrc}")} if [ "${proto}" -eq 6 ]; then ipaddr="${ipprefix}::${hsdst}" @@ -562,7 +521,7 @@ setup_l2vpn() local rtdst="${hsdst}" # set fixed mac for source node and the neigh MAC address - set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}" + set_mac_address "hs_${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}" set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6 set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4 @@ -570,7 +529,7 @@ setup_l2vpn() # to the mac address of the remote peer (L2 VPN destination host). # Otherwise, traffic coming from the source host is dropped at the # ingress router. - set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}" + set_mac_address "rt_${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}" # set the SRv6 Policies at the ingress router setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \ @@ -647,7 +606,7 @@ check_rt_connectivity() local prefix local rtsrc_nsname - rtsrc_nsname="$(get_rtname "${rtsrc}")" + eval rtsrc_nsname=\${$(get_rtname "${rtsrc}")} prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")" @@ -670,7 +629,7 @@ check_hs_ipv6_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1 @@ -682,7 +641,7 @@ check_hs_ipv4_connectivity() local hsdst="$2" local hssrc_nsname - hssrc_nsname="$(get_hsname "${hssrc}")" + eval hssrc_nsname=\${$(get_hsname "${hssrc}")} ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1 diff --git a/tools/testing/selftests/net/tap.c b/tools/testing/selftests/net/tap.c index 247c3b3ac1c9..9ec1c9b50e77 100644 --- a/tools/testing/selftests/net/tap.c +++ b/tools/testing/selftests/net/tap.c @@ -17,7 +17,7 @@ #include <linux/virtio_net.h> #include <netinet/ip.h> #include <netinet/udp.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" static const char param_dev_tap_name[] = "xmacvtap0"; static const char param_dev_dummy_name[] = "xdummy0"; diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile index 522d991e310e..5b0205c70c39 100644 --- a/tools/testing/selftests/net/tcp_ao/Makefile +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -26,12 +26,13 @@ LIB := $(LIBDIR)/libaotst.a LDLIBS += $(LIB) -pthread LIBDEPS := lib/aolib.h Makefile -CFLAGS := -Wall -O2 -g -D_GNU_SOURCE -fno-strict-aliasing +CFLAGS += -Wall -O2 -g -fno-strict-aliasing CFLAGS += $(KHDR_INCLUDES) CFLAGS += -iquote ./lib/ -I ../../../../include/ # Library -LIBSRC := kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c +LIBSRC := ftrace.c ftrace-tcp.c kconfig.c netlink.c +LIBSRC += proc.c repair.c setup.c sock.c utils.c LIBOBJ := $(LIBSRC:%.c=$(LIBDIR)/%.o) EXTRA_CLEAN += $(LIBOBJ) $(LIB) diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c index a1e6e007c291..6736484996a3 100644 --- a/tools/testing/selftests/net/tcp_ao/bench-lookups.c +++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c @@ -355,6 +355,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(30, server_fn, client_fn); + test_init(31, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config index d3277a9de987..971cb6fa2d63 100644 --- a/tools/testing/selftests/net/tcp_ao/config +++ b/tools/testing/selftests/net/tcp_ao/config @@ -1,10 +1,11 @@ CONFIG_CRYPTO_HMAC=y CONFIG_CRYPTO_RMD160=y CONFIG_CRYPTO_SHA1=y -CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_L3_MASTER_DEV=y CONFIG_NET_VRF=y CONFIG_TCP_AO=y CONFIG_TCP_MD5SIG=y +CONFIG_TRACEPOINTS=y CONFIG_VETH=m diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c index 185a2f6e5ff3..93b61e9a36f1 100644 --- a/tools/testing/selftests/net/tcp_ao/connect-deny.c +++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c @@ -4,6 +4,7 @@ #include "aolib.h" #define fault(type) (inj == FAULT_ ## type) +static volatile int sk_pair; static inline int test_add_key_maclen(int sk, const char *key, uint8_t maclen, union tcp_addr in_addr, uint8_t prefix, @@ -34,10 +35,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, const char *cnt_name, test_cnt cnt_expected, fault_t inj) { - struct tcp_ao_counters ao_cnt1, ao_cnt2; + struct tcp_counters cnt1, cnt2; uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */ + test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected; int lsk, err, sk = 0; - time_t timeout; lsk = test_listen_socket(this_ip_addr, port, 1); @@ -46,21 +47,24 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, if (cnt_name) before_cnt = netstat_get_one(cnt_name, NULL); - if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (pwd && test_get_tcp_counters(lsk, &cnt1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - err = test_wait_fd(lsk, timeout, 0); + err = test_skpair_wait_poll(lsk, 0, poll_cnt, &sk_pair); if (err == -ETIMEDOUT) { + sk_pair = err; if (!fault(TIMEOUT)) - test_fail("timed out for accept()"); + test_fail("%s: timed out for accept()", tst_name); + } else if (err == -EKEYREJECTED) { + if (!fault(KEYREJECT)) + test_fail("%s: key was rejected", tst_name); } else if (err < 0) { - test_error("test_wait_fd()"); + test_error("test_skpair_wait_poll()"); } else { if (fault(TIMEOUT)) - test_fail("ready to accept"); + test_fail("%s: ready to accept", tst_name); sk = accept(lsk, NULL, NULL); if (sk < 0) { @@ -71,12 +75,14 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, } } - if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2)) - test_error("test_get_tcp_ao_counters()"); + synchronize_threads(); /* before counter checks */ + if (pwd && test_get_tcp_counters(lsk, &cnt2)) + test_error("test_get_tcp_counters()"); close(lsk); + if (pwd) - test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); if (!cnt_name) goto out; @@ -84,10 +90,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *pwd, after_cnt = netstat_get_one(cnt_name, NULL); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %zu <= %zu", + test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %zu => %zu", + test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } @@ -107,7 +113,7 @@ static void *server_fn(void *arg) try_accept("Non-AO server + AO client", port++, NULL, this_ip_dest, -1, 100, 100, 0, - "TCPAOKeyNotFound", 0, FAULT_TIMEOUT); + "TCPAOKeyNotFound", TEST_CNT_NS_KEY_NOT_FOUND, FAULT_TIMEOUT); try_accept("AO server + Non-AO client", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, @@ -133,8 +139,9 @@ static void *server_fn(void *arg) wrong_addr, -1, 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT); + /* Key rejected by the other side, failing short through skpair */ try_accept("Client: Wrong addr", port++, NULL, - this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_TIMEOUT); + this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_KEYREJECT); try_accept("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 200, 100, 0, @@ -161,8 +168,7 @@ static void try_connect(const char *tst_name, unsigned int port, uint8_t sndid, uint8_t rcvid, test_cnt cnt_expected, fault_t inj) { - struct tcp_ao_counters ao_cnt1, ao_cnt2; - time_t timeout; + struct tcp_counters cnt1, cnt2; int sk, ret; sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); @@ -172,15 +178,15 @@ static void try_connect(const char *tst_name, unsigned int port, if (pwd && test_add_key(sk, pwd, addr, prefix, sndid, rcvid)) test_error("setsockopt(TCP_AO_ADD_KEY)"); - if (pwd && test_get_tcp_ao_counters(sk, &ao_cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (pwd && test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - ret = _test_connect_socket(sk, this_ip_dest, port, timeout); - + ret = test_skpair_connect_poll(sk, this_ip_dest, port, cnt_expected, &sk_pair); + synchronize_threads(); /* before counter checks */ if (ret < 0) { + sk_pair = ret; if (fault(KEYREJECT) && ret == -EKEYREJECTED) { test_ok("%s: connect() was prevented", tst_name); } else if (ret == -ETIMEDOUT && fault(TIMEOUT)) { @@ -199,9 +205,11 @@ static void try_connect(const char *tst_name, unsigned int port, else test_ok("%s: connected", tst_name); if (pwd && ret > 0) { - if (test_get_tcp_ao_counters(sk, &ao_cnt2)) - test_error("test_get_tcp_ao_counters()"); - test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); + } else if (pwd) { + test_tcp_counters_free(&cnt1); } out: synchronize_threads(); /* close() */ @@ -212,30 +220,49 @@ out: static void *client_fn(void *arg) { - union tcp_addr wrong_addr, network_addr; + union tcp_addr wrong_addr, network_addr, addr_any = {}; unsigned int port = test_server_port; if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1) test_error("Can't convert ip address %s", TEST_WRONG_IP); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Non-AO server + AO client", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_hash_event_expect(TCP_HASH_AO_REQUIRED, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO server + Non-AO client", port++, NULL, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Wrong password", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + /* + * XXX: The test doesn't increase any counters, see tcp_make_synack(). + * Potentially, it can be speed up by setting sk_pair = -ETIMEDOUT + * but the price would be increased complexity of the tracer thread. + */ + trace_ao_event_sk_expect(TCP_AO_SYNACK_NO_KEY, this_ip_dest, addr_any, + port, 0, 100, 100); try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_expect(TCP_AO_WRONG_MACLEN, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Different maclen", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); @@ -259,6 +286,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(21, server_fn, client_fn); + test_init(22, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c index 81653b47f303..340f00e979ea 100644 --- a/tools/testing/selftests/net/tcp_ao/connect.c +++ b/tools/testing/selftests/net/tcp_ao/connect.c @@ -35,7 +35,7 @@ static void *client_fn(void *arg) uint64_t before_aogood, after_aogood; const size_t nr_packets = 20; struct netstat *ns_before, *ns_after; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters ao1, ao2; if (sk < 0) test_error("socket()"); @@ -50,41 +50,41 @@ static void *client_fn(void *arg) ns_before = netstat_read(); before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &ao1)) + test_error("test_get_tcp_counters()"); - if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, 100, nr_packets)) { test_fail("verify failed"); return NULL; } ns_after = netstat_read(); after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &ao2)) + test_error("test_get_tcp_counters()"); netstat_print_diff(ns_before, ns_after); netstat_free(ns_before); netstat_free(ns_after); if (nr_packets > (after_aogood - before_aogood)) { - test_fail("TCPAOGood counter mismatch: %zu > (%zu - %zu)", + test_fail("TCPAOGood counter mismatch: %zu > (%" PRIu64 " - %" PRIu64 ")", nr_packets, after_aogood, before_aogood); return NULL; } - if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD)) + if (test_assert_counters("connect", &ao1, &ao2, TEST_CNT_GOOD)) return NULL; - test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %" PRIu64, - before_aogood, ao1.ao_info_pkt_good, - ao1.key_cnts[0].pkt_good, - after_aogood, ao2.ao_info_pkt_good, - ao2.key_cnts[0].pkt_good, + test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %zu", + before_aogood, ao1.ao.ao_info_pkt_good, + ao1.ao.key_cnts[0].pkt_good, + after_aogood, ao2.ao.ao_info_pkt_good, + ao2.ao.key_cnts[0].pkt_good, nr_packets); return NULL; } int main(int argc, char *argv[]) { - test_init(1, server_fn, client_fn); + test_init(2, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c index d69bcba3c929..85c1a1e958c6 100644 --- a/tools/testing/selftests/net/tcp_ao/icmps-discard.c +++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c @@ -53,7 +53,7 @@ static void serve_interfered(int sk) ssize_t test_quota = packet_size * packets_nr * 10; uint64_t dest_unreach_a, dest_unreach_b; uint64_t icmp_ignored_a, icmp_ignored_b; - struct tcp_ao_counters ao_cnt1, ao_cnt2; + struct tcp_counters cnt1, cnt2; bool counter_not_found; struct netstat *ns_after, *ns_before; ssize_t bytes; @@ -61,16 +61,16 @@ static void serve_interfered(int sk) ns_before = netstat_read(); dest_unreach_a = netstat_get(ns_before, dst_unreach, NULL); icmp_ignored_a = netstat_get(ns_before, tcpao_icmps, NULL); - if (test_get_tcp_ao_counters(sk, &ao_cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); bytes = test_server_run(sk, test_quota, 0); ns_after = netstat_read(); netstat_print_diff(ns_before, ns_after); dest_unreach_b = netstat_get(ns_after, dst_unreach, NULL); icmp_ignored_b = netstat_get(ns_after, tcpao_icmps, &counter_not_found); - if (test_get_tcp_ao_counters(sk, &ao_cnt2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); netstat_free(ns_before); netstat_free(ns_after); @@ -91,9 +91,9 @@ static void serve_interfered(int sk) return; } #ifdef TEST_ICMPS_ACCEPT - test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD); + test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD); #else - test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP); + test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP); #endif if (icmp_ignored_a >= icmp_ignored_b) { test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64, @@ -395,7 +395,6 @@ static void icmp_interfere(const size_t nr, uint32_t rcv_nxt, void *src, void *d static void send_interfered(int sk) { - const unsigned int timeout = TEST_TIMEOUT_SEC; struct sockaddr_in6 src, dst; socklen_t addr_sz; @@ -409,7 +408,7 @@ static void send_interfered(int sk) while (1) { uint32_t rcv_nxt; - if (test_client_verify(sk, packet_size, packets_nr, timeout)) { + if (test_client_verify(sk, packet_size, packets_nr)) { test_fail("client: connection is broken"); return; } @@ -444,6 +443,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(3, server_fn, client_fn); + test_init(4, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c index 24e62120b792..69d9a7a05d5c 100644 --- a/tools/testing/selftests/net/tcp_ao/key-management.c +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -629,11 +629,11 @@ static int key_collection_socket(bool server, unsigned int port) } static void verify_counters(const char *tst_name, bool is_listen_sk, bool server, - struct tcp_ao_counters *a, struct tcp_ao_counters *b) + struct tcp_counters *a, struct tcp_counters *b) { unsigned int i; - __test_tcp_ao_counters_cmp(tst_name, a, b, TEST_CNT_GOOD); + test_assert_counters_sk(tst_name, a, b, TEST_CNT_GOOD); for (i = 0; i < collection.nr_keys; i++) { struct test_key *key = &collection.keys[i]; @@ -652,12 +652,12 @@ static void verify_counters(const char *tst_name, bool is_listen_sk, bool server rx_cnt_expected = key->used_on_server_tx; } - test_tcp_ao_key_counters_cmp(tst_name, a, b, - rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0, - sndid, rcvid); + test_assert_counters_key(tst_name, &a->ao, &b->ao, + rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0, + sndid, rcvid); } - test_tcp_ao_counters_free(a); - test_tcp_ao_counters_free(b); + test_tcp_counters_free(a); + test_tcp_counters_free(b); test_ok("%s: passed counters checks", tst_name); } @@ -791,17 +791,17 @@ out: } static int start_server(const char *tst_name, unsigned int port, size_t quota, - struct tcp_ao_counters *begin, + struct tcp_counters *begin, unsigned int current_index, unsigned int rnext_index) { - struct tcp_ao_counters lsk_c1, lsk_c2; + struct tcp_counters lsk_c1, lsk_c2; ssize_t bytes; int sk, lsk; synchronize_threads(); /* 1: key collection initialized */ lsk = key_collection_socket(true, port); - if (test_get_tcp_ao_counters(lsk, &lsk_c1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(lsk, &lsk_c1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 2: MKTs added => connect() */ if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) test_error("test_wait_fd()"); @@ -809,12 +809,12 @@ static int start_server(const char *tst_name, unsigned int port, size_t quota, sk = accept(lsk, NULL, NULL); if (sk < 0) test_error("accept()"); - if (test_get_tcp_ao_counters(sk, begin)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, begin)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 3: accepted => send data */ - if (test_get_tcp_ao_counters(lsk, &lsk_c2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(lsk, &lsk_c2)) + test_error("test_get_tcp_counters()"); verify_keys(tst_name, lsk, true, true); close(lsk); @@ -830,12 +830,12 @@ static int start_server(const char *tst_name, unsigned int port, size_t quota, } static void end_server(const char *tst_name, int sk, - struct tcp_ao_counters *begin) + struct tcp_counters *begin) { - struct tcp_ao_counters end; + struct tcp_counters end; - if (test_get_tcp_ao_counters(sk, &end)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &end)) + test_error("test_get_tcp_counters()"); verify_keys(tst_name, sk, false, true); synchronize_threads(); /* 4: verified => closed */ @@ -848,7 +848,7 @@ static void end_server(const char *tst_name, int sk, static void try_server_run(const char *tst_name, unsigned int port, size_t quota, unsigned int current_index, unsigned int rnext_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; int sk; sk = start_server(tst_name, port, quota, &tmp, @@ -860,7 +860,7 @@ static void server_rotations(const char *tst_name, unsigned int port, size_t quota, unsigned int rotations, unsigned int current_index, unsigned int rnext_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; unsigned int i; int sk; @@ -886,7 +886,7 @@ static void server_rotations(const char *tst_name, unsigned int port, static int run_client(const char *tst_name, unsigned int port, unsigned int nr_keys, int current_index, int rnext_index, - struct tcp_ao_counters *before, + struct tcp_counters *before, const size_t msg_sz, const size_t msg_nr) { int sk; @@ -904,8 +904,8 @@ static int run_client(const char *tst_name, unsigned int port, if (test_set_key(sk, sndid, rcvid)) test_error("failed to set current/rnext keys"); } - if (before && test_get_tcp_ao_counters(sk, before)) - test_error("test_get_tcp_ao_counters()"); + if (before && test_get_tcp_counters(sk, before)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 2: MKTs added => connect() */ if (test_connect_socket(sk, this_ip_dest, port++) <= 0) @@ -918,11 +918,11 @@ static int run_client(const char *tst_name, unsigned int port, collection.keys[rnext_index].used_on_server_tx = 1; synchronize_threads(); /* 3: accepted => send data */ - if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, msg_sz, msg_nr)) { test_fail("verify failed"); close(sk); if (before) - test_tcp_ao_counters_free(before); + test_tcp_counters_free(before); return -1; } @@ -931,7 +931,7 @@ static int run_client(const char *tst_name, unsigned int port, static int start_client(const char *tst_name, unsigned int port, unsigned int nr_keys, int current_index, int rnext_index, - struct tcp_ao_counters *before, + struct tcp_counters *before, const size_t msg_sz, const size_t msg_nr) { if (init_default_key_collection(nr_keys, true)) @@ -943,9 +943,9 @@ static int start_client(const char *tst_name, unsigned int port, static void end_client(const char *tst_name, int sk, unsigned int nr_keys, int current_index, int rnext_index, - struct tcp_ao_counters *start) + struct tcp_counters *start) { - struct tcp_ao_counters end; + struct tcp_counters end; /* Some application may become dependent on this kernel choice */ if (current_index < 0) @@ -955,8 +955,8 @@ static void end_client(const char *tst_name, int sk, unsigned int nr_keys, verify_current_rnext(tst_name, sk, collection.keys[current_index].client_keyid, collection.keys[rnext_index].server_keyid); - if (start && test_get_tcp_ao_counters(sk, &end)) - test_error("test_get_tcp_ao_counters()"); + if (start && test_get_tcp_counters(sk, &end)) + test_error("test_get_tcp_counters()"); verify_keys(tst_name, sk, false, false); synchronize_threads(); /* 4: verify => closed */ close(sk); @@ -965,7 +965,7 @@ static void end_client(const char *tst_name, int sk, unsigned int nr_keys, synchronize_threads(); /* 5: counters */ } -static void try_unmatched_keys(int sk, int *rnext_index) +static void try_unmatched_keys(int sk, int *rnext_index, unsigned int port) { struct test_key *key; unsigned int i = 0; @@ -1013,7 +1013,10 @@ static void try_unmatched_keys(int sk, int *rnext_index) test_error("all keys on server match the client"); if (test_set_key(sk, -1, key->server_keyid)) test_error("Can't change the current key"); - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, + -1, key->server_keyid, -1); + if (test_client_verify(sk, msg_len, nr_packets)) test_fail("verify failed"); *rnext_index = i; } @@ -1045,7 +1048,7 @@ static void check_current_back(const char *tst_name, unsigned int port, unsigned int current_index, unsigned int rnext_index, unsigned int rotate_to_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; int sk; sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, @@ -1054,7 +1057,11 @@ static void check_current_back(const char *tst_name, unsigned int port, return; if (test_set_key(sk, collection.keys[rotate_to_index].client_keyid, -1)) test_error("Can't change the current key"); - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, this_ip_dest, this_ip_addr, + port, -1, 0, -1, -1, -1, -1, -1, + collection.keys[rotate_to_index].client_keyid, + collection.keys[current_index].client_keyid, -1); + if (test_client_verify(sk, msg_len, nr_packets)) test_fail("verify failed"); /* There is a race here: between setting the current_key with * setsockopt(TCP_AO_INFO) and starting to send some data - there @@ -1074,7 +1081,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port, unsigned int nr_keys, unsigned int rotations, unsigned int current_index, unsigned int rnext_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; unsigned int i; int sk; @@ -1085,12 +1092,17 @@ static void roll_over_keys(const char *tst_name, unsigned int port, for (i = rnext_index + 1; rotations > 0; i++, rotations--) { if (i >= collection.nr_keys) i = 0; + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, + this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, + i == 0 ? -1 : collection.keys[i - 1].server_keyid, + collection.keys[i].server_keyid, -1); if (test_set_key(sk, -1, collection.keys[i].server_keyid)) test_error("Can't change the Rnext key"); - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, msg_len, nr_packets)) { test_fail("verify failed"); close(sk); - test_tcp_ao_counters_free(&tmp); + test_tcp_counters_free(&tmp); return; } verify_current_rnext(tst_name, sk, -1, @@ -1104,7 +1116,7 @@ static void roll_over_keys(const char *tst_name, unsigned int port, static void try_client_run(const char *tst_name, unsigned int port, unsigned int nr_keys, int current_index, int rnext_index) { - struct tcp_ao_counters tmp; + struct tcp_counters tmp; int sk; sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, @@ -1124,7 +1136,7 @@ static void try_client_match(const char *tst_name, unsigned int port, rnext_index, msg_len, nr_packets); if (sk < 0) return; - try_unmatched_keys(sk, &rnext_index); + try_unmatched_keys(sk, &rnext_index, port); end_client(tst_name, sk, nr_keys, current_index, rnext_index, NULL); } @@ -1181,6 +1193,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(120, server_fn, client_fn); + test_init(121, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h index fbc7f6111815..ebb2899c12fe 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/aolib.h +++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h @@ -37,17 +37,59 @@ extern void __test_xfail(const char *buf); extern void __test_error(const char *buf); extern void __test_skip(const char *buf); -__attribute__((__format__(__printf__, 2, 3))) -static inline void __test_print(void (*fn)(const char *), const char *fmt, ...) +static inline char *test_snprintf(const char *fmt, va_list vargs) { -#define TEST_MSG_BUFFER_SIZE 4096 - char buf[TEST_MSG_BUFFER_SIZE]; - va_list arg; - - va_start(arg, fmt); - vsnprintf(buf, sizeof(buf), fmt, arg); - va_end(arg); - fn(buf); + char *ret = NULL; + size_t size = 0; + va_list tmp; + int n = 0; + + va_copy(tmp, vargs); + n = vsnprintf(ret, size, fmt, tmp); + va_end(tmp); + if (n < 0) + return NULL; + + size = n + 1; + ret = malloc(size); + if (!ret) + return NULL; + + n = vsnprintf(ret, size, fmt, vargs); + if (n < 0 || n > size - 1) { + free(ret); + return NULL; + } + return ret; +} + +static __printf(1, 2) inline char *test_sprintf(const char *fmt, ...) +{ + va_list vargs; + char *ret; + + va_start(vargs, fmt); + ret = test_snprintf(fmt, vargs); + va_end(vargs); + + return ret; +} + +static __printf(2, 3) inline void __test_print(void (*fn)(const char *), + const char *fmt, ...) +{ + va_list vargs; + char *msg; + + va_start(vargs, fmt); + msg = test_snprintf(fmt, vargs); + va_end(vargs); + + if (!msg) + return; + + fn(msg); + free(msg); } #define test_print(fmt, ...) \ @@ -103,6 +145,7 @@ enum test_needs_kconfig { KCONFIG_TCP_AO, /* required */ KCONFIG_TCP_MD5, /* optional, for TCP-MD5 features */ KCONFIG_NET_VRF, /* optional, for L3/VRF testing */ + KCONFIG_FTRACE, /* optional, for tracepoints checks */ __KCONFIG_LAST__ }; extern bool kernel_config_has(enum test_needs_kconfig k); @@ -142,6 +185,8 @@ static inline void test_init2(unsigned int ntests, __test_init(ntests, family, prefix, taddr1, taddr2, peer1, peer2); } extern void test_add_destructor(void (*d)(void)); +extern void test_init_ftrace(int nsfd1, int nsfd2); +extern int test_setup_tracing(void); /* To adjust optmem socket limit, approximately estimate a number, * that is bigger than sizeof(struct tcp_ao_key). @@ -216,12 +261,17 @@ static inline void test_init(unsigned int ntests, } extern void synchronize_threads(void); extern void switch_ns(int fd); +extern int switch_save_ns(int fd); +extern void switch_close_ns(int fd); extern __thread union tcp_addr this_ip_addr; extern __thread union tcp_addr this_ip_dest; extern int test_family; extern void randomize_buffer(void *buf, size_t buflen); +extern __printf(3, 4) int test_echo(const char *fname, bool append, + const char *fmt, ...); + extern int open_netns(void); extern int unshare_open_netns(void); extern const char veth_name[]; @@ -239,7 +289,7 @@ extern int link_set_up(const char *intf); extern const unsigned int test_server_port; extern int test_wait_fd(int sk, time_t sec, bool write); extern int __test_connect_socket(int sk, const char *device, - void *addr, size_t addr_sz, time_t timeout); + void *addr, size_t addr_sz, bool async); extern int __test_listen_socket(int backlog, void *addr, size_t addr_sz); static inline int test_listen_socket(const union tcp_addr taddr, @@ -281,25 +331,26 @@ static inline int test_listen_socket(const union tcp_addr taddr, * If set to 0 - kernel will try to retransmit SYN number of times, set in * /proc/sys/net/ipv4/tcp_syn_retries * By default set to 1 to make tests pass faster on non-busy machine. + * [in process of removal, don't use in new tests] */ #ifndef TEST_RETRANSMIT_SEC #define TEST_RETRANSMIT_SEC 1 #endif static inline int _test_connect_socket(int sk, const union tcp_addr taddr, - unsigned int port, time_t timeout) + unsigned int port, bool async) { sockaddr_af addr; tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port)); return __test_connect_socket(sk, veth_name, - (void *)&addr, sizeof(addr), timeout); + (void *)&addr, sizeof(addr), async); } static inline int test_connect_socket(int sk, const union tcp_addr taddr, unsigned int port) { - return _test_connect_socket(sk, taddr, port, TEST_TIMEOUT_SEC); + return _test_connect_socket(sk, taddr, port, false); } extern int __test_set_md5(int sk, void *addr, size_t addr_sz, @@ -433,10 +484,7 @@ static inline int test_set_ao_flags(int sk, bool ao_required, bool accept_icmps) } extern ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec); -extern ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, - const size_t msg_len, time_t timeout_sec); -extern int test_client_verify(int sk, const size_t msg_len, const size_t nr, - time_t timeout_sec); +extern int test_client_verify(int sk, const size_t msg_len, const size_t nr); struct tcp_ao_key_counters { uint8_t sndid; @@ -462,7 +510,15 @@ struct tcp_ao_counters { size_t nr_keys; struct tcp_ao_key_counters *key_cnts; }; -extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out); + +struct tcp_counters { + struct tcp_ao_counters ao; + uint64_t netns_md5_notfound; + uint64_t netns_md5_unexpected; + uint64_t netns_md5_failure; +}; + +extern int test_get_tcp_counters(int sk, struct tcp_counters *out); #define TEST_CNT_KEY_GOOD BIT(0) #define TEST_CNT_KEY_BAD BIT(1) @@ -476,8 +532,31 @@ extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out); #define TEST_CNT_NS_KEY_NOT_FOUND BIT(9) #define TEST_CNT_NS_AO_REQUIRED BIT(10) #define TEST_CNT_NS_DROPPED_ICMP BIT(11) +#define TEST_CNT_NS_MD5_NOT_FOUND BIT(12) +#define TEST_CNT_NS_MD5_UNEXPECTED BIT(13) +#define TEST_CNT_NS_MD5_FAILURE BIT(14) typedef uint16_t test_cnt; +#define _for_each_counter(f) \ +do { \ + /* per-netns */ \ + f(ao.netns_ao_good, TEST_CNT_NS_GOOD); \ + f(ao.netns_ao_bad, TEST_CNT_NS_BAD); \ + f(ao.netns_ao_key_not_found, TEST_CNT_NS_KEY_NOT_FOUND); \ + f(ao.netns_ao_required, TEST_CNT_NS_AO_REQUIRED); \ + f(ao.netns_ao_dropped_icmp, TEST_CNT_NS_DROPPED_ICMP); \ + /* per-socket */ \ + f(ao.ao_info_pkt_good, TEST_CNT_SOCK_GOOD); \ + f(ao.ao_info_pkt_bad, TEST_CNT_SOCK_BAD); \ + f(ao.ao_info_pkt_key_not_found, TEST_CNT_SOCK_KEY_NOT_FOUND); \ + f(ao.ao_info_pkt_ao_required, TEST_CNT_SOCK_AO_REQUIRED); \ + f(ao.ao_info_pkt_dropped_icmp, TEST_CNT_SOCK_DROPPED_ICMP); \ + /* non-AO */ \ + f(netns_md5_notfound, TEST_CNT_NS_MD5_NOT_FOUND); \ + f(netns_md5_unexpected, TEST_CNT_NS_MD5_UNEXPECTED); \ + f(netns_md5_failure, TEST_CNT_NS_MD5_FAILURE); \ +} while (0) + #define TEST_CNT_AO_GOOD (TEST_CNT_SOCK_GOOD | TEST_CNT_NS_GOOD) #define TEST_CNT_AO_BAD (TEST_CNT_SOCK_BAD | TEST_CNT_NS_BAD) #define TEST_CNT_AO_KEY_NOT_FOUND (TEST_CNT_SOCK_KEY_NOT_FOUND | \ @@ -489,34 +568,71 @@ typedef uint16_t test_cnt; #define TEST_CNT_GOOD (TEST_CNT_KEY_GOOD | TEST_CNT_AO_GOOD) #define TEST_CNT_BAD (TEST_CNT_KEY_BAD | TEST_CNT_AO_BAD) -extern int __test_tcp_ao_counters_cmp(const char *tst_name, - struct tcp_ao_counters *before, struct tcp_ao_counters *after, +extern test_cnt test_cmp_counters(struct tcp_counters *before, + struct tcp_counters *after); +extern int test_assert_counters_sk(const char *tst_name, + struct tcp_counters *before, struct tcp_counters *after, test_cnt expected); -extern int test_tcp_ao_key_counters_cmp(const char *tst_name, +extern int test_assert_counters_key(const char *tst_name, struct tcp_ao_counters *before, struct tcp_ao_counters *after, test_cnt expected, int sndid, int rcvid); -extern void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts); +extern void test_tcp_counters_free(struct tcp_counters *cnts); + +/* + * Polling for netns and socket counters during select()/connect() and also + * client/server messaging. Instead of constant timeout on underlying select(), + * check the counters and return early. This allows to pass the tests where + * timeout is expected without waiting for that fixing timeout (tests speed-up). + * Previously shorter timeouts were used for tests expecting to time out, + * but that leaded to sporadic false positives on counter checks failures, + * as one second timeouts aren't enough for TCP retransmit. + * + * Two sides of the socketpair (client/server) should synchronize failures + * using a shared variable *err, so that they can detect the other side's + * failure. + */ +extern int test_skpair_wait_poll(int sk, bool write, test_cnt cond, + volatile int *err); +extern int _test_skpair_connect_poll(int sk, const char *device, + void *addr, size_t addr_sz, + test_cnt cond, volatile int *err); +static inline int test_skpair_connect_poll(int sk, const union tcp_addr taddr, + unsigned int port, + test_cnt cond, volatile int *err) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port)); + return _test_skpair_connect_poll(sk, veth_name, + (void *)&addr, sizeof(addr), cond, err); +} + +extern int test_skpair_client(int sk, const size_t msg_len, const size_t nr, + test_cnt cond, volatile int *err); +extern int test_skpair_server(int sk, ssize_t quota, + test_cnt cond, volatile int *err); + /* - * Frees buffers allocated in test_get_tcp_ao_counters(). + * Frees buffers allocated in test_get_tcp_counters(). * The function doesn't expect new keys or keys removed between calls - * to test_get_tcp_ao_counters(). Check key counters manually if they + * to test_get_tcp_counters(). Check key counters manually if they * may change. */ -static inline int test_tcp_ao_counters_cmp(const char *tst_name, - struct tcp_ao_counters *before, - struct tcp_ao_counters *after, - test_cnt expected) +static inline int test_assert_counters(const char *tst_name, + struct tcp_counters *before, + struct tcp_counters *after, + test_cnt expected) { int ret; - ret = __test_tcp_ao_counters_cmp(tst_name, before, after, expected); + ret = test_assert_counters_sk(tst_name, before, after, expected); if (ret) goto out; - ret = test_tcp_ao_key_counters_cmp(tst_name, before, after, - expected, -1, -1); + ret = test_assert_counters_key(tst_name, &before->ao, &after->ao, + expected, -1, -1); out: - test_tcp_ao_counters_free(before); - test_tcp_ao_counters_free(after); + test_tcp_counters_free(before); + test_tcp_counters_free(after); return ret; } @@ -602,4 +718,115 @@ static inline int test_add_repaired_key(int sk, return test_verify_socket_key(sk, &tmp); } +#define DEFAULT_FTRACE_BUFFER_KB 10000 +#define DEFAULT_TRACER_LINES_ARR 200 +struct test_ftracer; +extern uint64_t ns_cookie1, ns_cookie2; + +enum ftracer_op { + FTRACER_LINE_DISCARD = 0, + FTRACER_LINE_PRESERVE, + FTRACER_EXIT, +}; + +extern struct test_ftracer *create_ftracer(const char *name, + enum ftracer_op (*process_line)(const char *line), + void (*destructor)(struct test_ftracer *tracer), + bool (*expecting_more)(void), + size_t lines_buf_sz, size_t buffer_size_kb); +extern int setup_trace_event(struct test_ftracer *tracer, + const char *event, const char *filter); +extern void destroy_ftracer(struct test_ftracer *tracer); +extern const size_t tracer_get_savedlines_nr(struct test_ftracer *tracer); +extern const char **tracer_get_savedlines(struct test_ftracer *tracer); + +enum trace_events { + /* TCP_HASH_EVENT */ + TCP_HASH_BAD_HEADER = 0, + TCP_HASH_MD5_REQUIRED, + TCP_HASH_MD5_UNEXPECTED, + TCP_HASH_MD5_MISMATCH, + TCP_HASH_AO_REQUIRED, + /* TCP_AO_EVENT */ + TCP_AO_HANDSHAKE_FAILURE, + TCP_AO_WRONG_MACLEN, + TCP_AO_MISMATCH, + TCP_AO_KEY_NOT_FOUND, + TCP_AO_RNEXT_REQUEST, + /* TCP_AO_EVENT_SK */ + TCP_AO_SYNACK_NO_KEY, + /* TCP_AO_EVENT_SNE */ + TCP_AO_SND_SNE_UPDATE, + TCP_AO_RCV_SNE_UPDATE, + __MAX_TRACE_EVENTS +}; + +extern int __trace_event_expect(enum trace_events type, int family, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int L3index, + int fin, int syn, int rst, int psh, int ack, + int keyid, int rnext, int maclen, int sne); + +static inline void trace_hash_event_expect(enum trace_events type, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int L3index, + int fin, int syn, int rst, int psh, int ack) +{ + int err; + + err = __trace_event_expect(type, TEST_FAMILY, src, dst, + src_port, dst_port, L3index, + fin, syn, rst, psh, ack, + -1, -1, -1, -1); + if (err) + test_error("Couldn't add a trace event: %d", err); +} + +static inline void trace_ao_event_expect(enum trace_events type, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int L3index, + int fin, int syn, int rst, int psh, int ack, + int keyid, int rnext, int maclen) +{ + int err; + + err = __trace_event_expect(type, TEST_FAMILY, src, dst, + src_port, dst_port, L3index, + fin, syn, rst, psh, ack, + keyid, rnext, maclen, -1); + if (err) + test_error("Couldn't add a trace event: %d", err); +} + +static inline void trace_ao_event_sk_expect(enum trace_events type, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, + int keyid, int rnext) +{ + int err; + + err = __trace_event_expect(type, TEST_FAMILY, src, dst, + src_port, dst_port, -1, + -1, -1, -1, -1, -1, + keyid, rnext, -1, -1); + if (err) + test_error("Couldn't add a trace event: %d", err); +} + +static inline void trace_ao_event_sne_expect(enum trace_events type, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int sne) +{ + int err; + + err = __trace_event_expect(type, TEST_FAMILY, src, dst, + src_port, dst_port, -1, + -1, -1, -1, -1, -1, + -1, -1, -1, sne); + if (err) + test_error("Couldn't add a trace event: %d", err); +} + +extern int setup_aolib_ftracer(void); + #endif /* _AOLIB_H_ */ diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c new file mode 100644 index 000000000000..27403f875054 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace-tcp.c @@ -0,0 +1,556 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <inttypes.h> +#include <pthread.h> +#include "aolib.h" + +static const char *trace_event_names[__MAX_TRACE_EVENTS] = { + /* TCP_HASH_EVENT */ + "tcp_hash_bad_header", + "tcp_hash_md5_required", + "tcp_hash_md5_unexpected", + "tcp_hash_md5_mismatch", + "tcp_hash_ao_required", + /* TCP_AO_EVENT */ + "tcp_ao_handshake_failure", + "tcp_ao_wrong_maclen", + "tcp_ao_mismatch", + "tcp_ao_key_not_found", + "tcp_ao_rnext_request", + /* TCP_AO_EVENT_SK */ + "tcp_ao_synack_no_key", + /* TCP_AO_EVENT_SNE */ + "tcp_ao_snd_sne_update", + "tcp_ao_rcv_sne_update" +}; + +struct expected_trace_point { + /* required */ + enum trace_events type; + int family; + union tcp_addr src; + union tcp_addr dst; + + /* optional */ + int src_port; + int dst_port; + int L3index; + + int fin; + int syn; + int rst; + int psh; + int ack; + + int keyid; + int rnext; + int maclen; + int sne; + + size_t matched; +}; + +static struct expected_trace_point *exp_tps; +static size_t exp_tps_nr; +static size_t exp_tps_size; +static pthread_mutex_t exp_tps_mutex = PTHREAD_MUTEX_INITIALIZER; + +int __trace_event_expect(enum trace_events type, int family, + union tcp_addr src, union tcp_addr dst, + int src_port, int dst_port, int L3index, + int fin, int syn, int rst, int psh, int ack, + int keyid, int rnext, int maclen, int sne) +{ + struct expected_trace_point new_tp = { + .type = type, + .family = family, + .src = src, + .dst = dst, + .src_port = src_port, + .dst_port = dst_port, + .L3index = L3index, + .fin = fin, + .syn = syn, + .rst = rst, + .psh = psh, + .ack = ack, + .keyid = keyid, + .rnext = rnext, + .maclen = maclen, + .sne = sne, + .matched = 0, + }; + int ret = 0; + + if (!kernel_config_has(KCONFIG_FTRACE)) + return 0; + + pthread_mutex_lock(&exp_tps_mutex); + if (exp_tps_nr == exp_tps_size) { + struct expected_trace_point *tmp; + + if (exp_tps_size == 0) + exp_tps_size = 10; + else + exp_tps_size = exp_tps_size * 1.6; + + tmp = reallocarray(exp_tps, exp_tps_size, sizeof(exp_tps[0])); + if (!tmp) { + ret = -ENOMEM; + goto out; + } + exp_tps = tmp; + } + exp_tps[exp_tps_nr] = new_tp; + exp_tps_nr++; +out: + pthread_mutex_unlock(&exp_tps_mutex); + return ret; +} + +static void free_expected_events(void) +{ + /* We're from the process destructor - not taking the mutex */ + exp_tps_size = 0; + exp_tps = NULL; + free(exp_tps); +} + +struct trace_point { + int family; + union tcp_addr src; + union tcp_addr dst; + unsigned int src_port; + unsigned int dst_port; + int L3index; + unsigned int fin:1, + syn:1, + rst:1, + psh:1, + ack:1; + + unsigned int keyid; + unsigned int rnext; + unsigned int maclen; + + unsigned int sne; +}; + +static bool lookup_expected_event(int event_type, struct trace_point *e) +{ + size_t i; + + pthread_mutex_lock(&exp_tps_mutex); + for (i = 0; i < exp_tps_nr; i++) { + struct expected_trace_point *p = &exp_tps[i]; + size_t sk_size; + + if (p->type != event_type) + continue; + if (p->family != e->family) + continue; + if (p->family == AF_INET) + sk_size = sizeof(p->src.a4); + else + sk_size = sizeof(p->src.a6); + if (memcmp(&p->src, &e->src, sk_size)) + continue; + if (memcmp(&p->dst, &e->dst, sk_size)) + continue; + if (p->src_port >= 0 && p->src_port != e->src_port) + continue; + if (p->dst_port >= 0 && p->dst_port != e->dst_port) + continue; + if (p->L3index >= 0 && p->L3index != e->L3index) + continue; + + if (p->fin >= 0 && p->fin != e->fin) + continue; + if (p->syn >= 0 && p->syn != e->syn) + continue; + if (p->rst >= 0 && p->rst != e->rst) + continue; + if (p->psh >= 0 && p->psh != e->psh) + continue; + if (p->ack >= 0 && p->ack != e->ack) + continue; + + if (p->keyid >= 0 && p->keyid != e->keyid) + continue; + if (p->rnext >= 0 && p->rnext != e->rnext) + continue; + if (p->maclen >= 0 && p->maclen != e->maclen) + continue; + if (p->sne >= 0 && p->sne != e->sne) + continue; + p->matched++; + pthread_mutex_unlock(&exp_tps_mutex); + return true; + } + pthread_mutex_unlock(&exp_tps_mutex); + return false; +} + +static int check_event_type(const char *line) +{ + size_t i; + + /* + * This should have been a set or hashmap, but it's a selftest, + * so... KISS. + */ + for (i = 0; i < __MAX_TRACE_EVENTS; i++) { + if (!strncmp(trace_event_names[i], line, strlen(trace_event_names[i]))) + return i; + } + return -1; +} + +static bool event_has_flags(enum trace_events event) +{ + switch (event) { + case TCP_HASH_BAD_HEADER: + case TCP_HASH_MD5_REQUIRED: + case TCP_HASH_MD5_UNEXPECTED: + case TCP_HASH_MD5_MISMATCH: + case TCP_HASH_AO_REQUIRED: + case TCP_AO_HANDSHAKE_FAILURE: + case TCP_AO_WRONG_MACLEN: + case TCP_AO_MISMATCH: + case TCP_AO_KEY_NOT_FOUND: + case TCP_AO_RNEXT_REQUEST: + return true; + default: + return false; + } +} + +static int tracer_ip_split(int family, char *src, char **addr, char **port) +{ + char *p; + + if (family == AF_INET) { + /* fomat is <addr>:port, i.e.: 10.0.254.1:7015 */ + *addr = src; + p = strchr(src, ':'); + if (!p) { + test_print("Couldn't parse trace event addr:port %s", src); + return -EINVAL; + } + *p++ = '\0'; + *port = p; + return 0; + } + if (family != AF_INET6) + return -EAFNOSUPPORT; + + /* format is [<addr>]:port, i.e.: [2001:db8:254::1]:7013 */ + *addr = strchr(src, '['); + p = strchr(src, ']'); + + if (!p || !*addr) { + test_print("Couldn't parse trace event [addr]:port %s", src); + return -EINVAL; + } + + *addr = *addr + 1; /* '[' */ + *p++ = '\0'; /* ']' */ + if (*p != ':') { + test_print("Couldn't parse trace event :port %s", p); + return -EINVAL; + } + *p++ = '\0'; /* ':' */ + *port = p; + return 0; +} + +static int tracer_scan_address(int family, char *src, + union tcp_addr *dst, unsigned int *port) +{ + char *addr, *port_str; + int ret; + + ret = tracer_ip_split(family, src, &addr, &port_str); + if (ret) + return ret; + + if (inet_pton(family, addr, dst) != 1) { + test_print("Couldn't parse trace event addr %s", addr); + return -EINVAL; + } + errno = 0; + *port = (unsigned int)strtoul(port_str, NULL, 10); + if (errno != 0) { + test_print("Couldn't parse trace event port %s", port_str); + return -errno; + } + return 0; +} + +static int tracer_scan_event(const char *line, enum trace_events event, + struct trace_point *out) +{ + char *src = NULL, *dst = NULL, *family = NULL; + char fin, syn, rst, psh, ack; + int nr_matched, ret = 0; + uint64_t netns_cookie; + + switch (event) { + case TCP_HASH_BAD_HEADER: + case TCP_HASH_MD5_REQUIRED: + case TCP_HASH_MD5_UNEXPECTED: + case TCP_HASH_MD5_MISMATCH: + case TCP_HASH_AO_REQUIRED: { + nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms L3index=%d [%c%c%c%c%c]", + &netns_cookie, &family, + &src, &dst, &out->L3index, + &fin, &syn, &rst, &psh, &ack); + if (nr_matched != 10) + test_print("Couldn't parse trace event, matched = %d/10", + nr_matched); + break; + } + case TCP_AO_HANDSHAKE_FAILURE: + case TCP_AO_WRONG_MACLEN: + case TCP_AO_MISMATCH: + case TCP_AO_KEY_NOT_FOUND: + case TCP_AO_RNEXT_REQUEST: { + nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms L3index=%d [%c%c%c%c%c] keyid=%u rnext=%u maclen=%u", + &netns_cookie, &family, + &src, &dst, &out->L3index, + &fin, &syn, &rst, &psh, &ack, + &out->keyid, &out->rnext, &out->maclen); + if (nr_matched != 13) + test_print("Couldn't parse trace event, matched = %d/13", + nr_matched); + break; + } + case TCP_AO_SYNACK_NO_KEY: { + nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms keyid=%u rnext=%u", + &netns_cookie, &family, + &src, &dst, &out->keyid, &out->rnext); + if (nr_matched != 6) + test_print("Couldn't parse trace event, matched = %d/6", + nr_matched); + break; + } + case TCP_AO_SND_SNE_UPDATE: + case TCP_AO_RCV_SNE_UPDATE: { + nr_matched = sscanf(line, "%*s net=%" PRIu64 " state%*s family=%ms src=%ms dest=%ms sne=%u", + &netns_cookie, &family, + &src, &dst, &out->sne); + if (nr_matched != 5) + test_print("Couldn't parse trace event, matched = %d/5", + nr_matched); + break; + } + default: + return -1; + } + + if (family) { + if (!strcmp(family, "AF_INET")) { + out->family = AF_INET; + } else if (!strcmp(family, "AF_INET6")) { + out->family = AF_INET6; + } else { + test_print("Couldn't parse trace event family %s", family); + ret = -EINVAL; + goto out_free; + } + } + + if (event_has_flags(event)) { + out->fin = (fin == 'F'); + out->syn = (syn == 'S'); + out->rst = (rst == 'R'); + out->psh = (psh == 'P'); + out->ack = (ack == '.'); + + if ((fin != 'F' && fin != ' ') || + (syn != 'S' && syn != ' ') || + (rst != 'R' && rst != ' ') || + (psh != 'P' && psh != ' ') || + (ack != '.' && ack != ' ')) { + test_print("Couldn't parse trace event flags %c%c%c%c%c", + fin, syn, rst, psh, ack); + ret = -EINVAL; + goto out_free; + } + } + + if (src && tracer_scan_address(out->family, src, &out->src, &out->src_port)) { + ret = -EINVAL; + goto out_free; + } + + if (dst && tracer_scan_address(out->family, dst, &out->dst, &out->dst_port)) { + ret = -EINVAL; + goto out_free; + } + + if (netns_cookie != ns_cookie1 && netns_cookie != ns_cookie2) { + test_print("Net namespace filter for trace event didn't work: %" PRIu64 " != %" PRIu64 " OR %" PRIu64, + netns_cookie, ns_cookie1, ns_cookie2); + ret = -EINVAL; + } + +out_free: + free(src); + free(dst); + free(family); + return ret; +} + +static enum ftracer_op aolib_tracer_process_event(const char *line) +{ + int event_type = check_event_type(line); + struct trace_point tmp = {}; + + if (event_type < 0) + return FTRACER_LINE_PRESERVE; + + if (tracer_scan_event(line, event_type, &tmp)) + return FTRACER_LINE_PRESERVE; + + return lookup_expected_event(event_type, &tmp) ? + FTRACER_LINE_DISCARD : FTRACER_LINE_PRESERVE; +} + +static void dump_trace_event(struct expected_trace_point *e) +{ + char src[INET6_ADDRSTRLEN], dst[INET6_ADDRSTRLEN]; + + if (!inet_ntop(e->family, &e->src, src, INET6_ADDRSTRLEN)) + test_error("inet_ntop()"); + if (!inet_ntop(e->family, &e->dst, dst, INET6_ADDRSTRLEN)) + test_error("inet_ntop()"); + test_print("trace event filter %s [%s:%d => %s:%d, L3index %d, flags: %s%s%s%s%s, keyid: %d, rnext: %d, maclen: %d, sne: %d] = %zu", + trace_event_names[e->type], + src, e->src_port, dst, e->dst_port, e->L3index, + e->fin ? "F" : "", e->syn ? "S" : "", e->rst ? "R" : "", + e->psh ? "P" : "", e->ack ? "." : "", + e->keyid, e->rnext, e->maclen, e->sne, e->matched); +} + +static void print_match_stats(bool unexpected_events) +{ + size_t matches_per_type[__MAX_TRACE_EVENTS] = {}; + bool expected_but_none = false; + size_t i, total_matched = 0; + char *stat_line = NULL; + + for (i = 0; i < exp_tps_nr; i++) { + struct expected_trace_point *e = &exp_tps[i]; + + total_matched += e->matched; + matches_per_type[e->type] += e->matched; + if (!e->matched) + expected_but_none = true; + } + for (i = 0; i < __MAX_TRACE_EVENTS; i++) { + if (!matches_per_type[i]) + continue; + stat_line = test_sprintf("%s%s[%zu] ", stat_line ?: "", + trace_event_names[i], + matches_per_type[i]); + if (!stat_line) + test_error("test_sprintf()"); + } + + if (unexpected_events || expected_but_none) { + for (i = 0; i < exp_tps_nr; i++) + dump_trace_event(&exp_tps[i]); + } + + if (unexpected_events) + return; + + if (expected_but_none) + test_fail("Some trace events were expected, but didn't occur"); + else if (total_matched) + test_ok("Trace events matched expectations: %zu %s", + total_matched, stat_line); + else + test_ok("No unexpected trace events during the test run"); +} + +#define dump_events(fmt, ...) \ + __test_print(__test_msg, fmt, ##__VA_ARGS__) +static void check_free_events(struct test_ftracer *tracer) +{ + const char **lines; + size_t nr; + + if (!kernel_config_has(KCONFIG_FTRACE)) { + test_skip("kernel config doesn't have ftrace - no checks"); + return; + } + + nr = tracer_get_savedlines_nr(tracer); + lines = tracer_get_savedlines(tracer); + print_match_stats(!!nr); + if (!nr) + return; + + errno = 0; + test_xfail("Trace events [%zu] were not expected:", nr); + while (nr) + dump_events("\t%s", lines[--nr]); +} + +static int setup_tcp_trace_events(struct test_ftracer *tracer) +{ + char *filter; + size_t i; + int ret; + + filter = test_sprintf("net_cookie == %zu || net_cookie == %zu", + ns_cookie1, ns_cookie2); + if (!filter) + return -ENOMEM; + + for (i = 0; i < __MAX_TRACE_EVENTS; i++) { + char *event_name = test_sprintf("tcp/%s", trace_event_names[i]); + + if (!event_name) { + ret = -ENOMEM; + break; + } + ret = setup_trace_event(tracer, event_name, filter); + free(event_name); + if (ret) + break; + } + + free(filter); + return ret; +} + +static void aolib_tracer_destroy(struct test_ftracer *tracer) +{ + check_free_events(tracer); + free_expected_events(); +} + +static bool aolib_tracer_expecting_more(void) +{ + size_t i; + + for (i = 0; i < exp_tps_nr; i++) + if (!exp_tps[i].matched) + return true; + return false; +} + +int setup_aolib_ftracer(void) +{ + struct test_ftracer *f; + + f = create_ftracer("aolib", aolib_tracer_process_event, + aolib_tracer_destroy, aolib_tracer_expecting_more, + DEFAULT_FTRACE_BUFFER_KB, DEFAULT_TRACER_LINES_ARR); + if (!f) + return -1; + + return setup_tcp_trace_events(f); +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/ftrace.c b/tools/testing/selftests/net/tcp_ao/lib/ftrace.c new file mode 100644 index 000000000000..e4d0b173bc94 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/ftrace.c @@ -0,0 +1,543 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <inttypes.h> +#include <pthread.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mount.h> +#include <sys/time.h> +#include <unistd.h> +#include "../../../../../include/linux/kernel.h" +#include "aolib.h" + +static char ftrace_path[] = "ksft-ftrace-XXXXXX"; +static bool ftrace_mounted; +uint64_t ns_cookie1, ns_cookie2; + +struct test_ftracer { + pthread_t tracer_thread; + int error; + char *instance_path; + FILE *trace_pipe; + + enum ftracer_op (*process_line)(const char *line); + void (*destructor)(struct test_ftracer *tracer); + bool (*expecting_more)(void); + + char **saved_lines; + size_t saved_lines_size; + size_t next_line_ind; + + pthread_cond_t met_all_expected; + pthread_mutex_t met_all_expected_lock; + + struct test_ftracer *next; +}; + +static struct test_ftracer *ftracers; +static pthread_mutex_t ftracers_lock = PTHREAD_MUTEX_INITIALIZER; + +static int mount_ftrace(void) +{ + if (!mkdtemp(ftrace_path)) + test_error("Can't create temp dir"); + + if (mount("tracefs", ftrace_path, "tracefs", 0, "rw")) + return -errno; + + ftrace_mounted = true; + + return 0; +} + +static void unmount_ftrace(void) +{ + if (ftrace_mounted && umount(ftrace_path)) + test_print("Failed on cleanup: can't unmount tracefs: %m"); + + if (rmdir(ftrace_path)) + test_error("Failed on cleanup: can't remove ftrace dir %s", + ftrace_path); +} + +struct opts_list_t { + char *opt_name; + struct opts_list_t *next; +}; + +static int disable_trace_options(const char *ftrace_path) +{ + struct opts_list_t *opts_list = NULL; + char *fopts, *line = NULL; + size_t buf_len = 0; + ssize_t line_len; + int ret = 0; + FILE *opts; + + fopts = test_sprintf("%s/%s", ftrace_path, "trace_options"); + if (!fopts) + return -ENOMEM; + + opts = fopen(fopts, "r+"); + if (!opts) { + ret = -errno; + goto out_free; + } + + while ((line_len = getline(&line, &buf_len, opts)) != -1) { + struct opts_list_t *tmp; + + if (!strncmp(line, "no", 2)) + continue; + + tmp = malloc(sizeof(*tmp)); + if (!tmp) { + ret = -ENOMEM; + goto out_free_opts_list; + } + tmp->next = opts_list; + tmp->opt_name = test_sprintf("no%s", line); + if (!tmp->opt_name) { + ret = -ENOMEM; + free(tmp); + goto out_free_opts_list; + } + opts_list = tmp; + } + + while (opts_list) { + struct opts_list_t *tmp = opts_list; + + fseek(opts, 0, SEEK_SET); + fwrite(tmp->opt_name, 1, strlen(tmp->opt_name), opts); + + opts_list = opts_list->next; + free(tmp->opt_name); + free(tmp); + } + +out_free_opts_list: + while (opts_list) { + struct opts_list_t *tmp = opts_list; + + opts_list = opts_list->next; + free(tmp->opt_name); + free(tmp); + } + free(line); + fclose(opts); +out_free: + free(fopts); + return ret; +} + +static int setup_buffer_size(const char *ftrace_path, size_t sz) +{ + char *fbuf_size = test_sprintf("%s/buffer_size_kb", ftrace_path); + int ret; + + if (!fbuf_size) + return -1; + + ret = test_echo(fbuf_size, 0, "%zu", sz); + free(fbuf_size); + return ret; +} + +static int setup_ftrace_instance(struct test_ftracer *tracer, const char *name) +{ + char *tmp; + + tmp = test_sprintf("%s/instances/ksft-%s-XXXXXX", ftrace_path, name); + if (!tmp) + return -ENOMEM; + + tracer->instance_path = mkdtemp(tmp); + if (!tracer->instance_path) { + free(tmp); + return -errno; + } + + return 0; +} + +static void remove_ftrace_instance(struct test_ftracer *tracer) +{ + if (rmdir(tracer->instance_path)) + test_print("Failed on cleanup: can't remove ftrace instance %s", + tracer->instance_path); + free(tracer->instance_path); +} + +static void tracer_cleanup(void *arg) +{ + struct test_ftracer *tracer = arg; + + fclose(tracer->trace_pipe); +} + +static void tracer_set_error(struct test_ftracer *tracer, int error) +{ + if (!tracer->error) + tracer->error = error; +} + +const size_t tracer_get_savedlines_nr(struct test_ftracer *tracer) +{ + return tracer->next_line_ind; +} + +const char **tracer_get_savedlines(struct test_ftracer *tracer) +{ + return (const char **)tracer->saved_lines; +} + +static void *tracer_thread_func(void *arg) +{ + struct test_ftracer *tracer = arg; + + pthread_cleanup_push(tracer_cleanup, arg); + + while (tracer->next_line_ind < tracer->saved_lines_size) { + char **lp = &tracer->saved_lines[tracer->next_line_ind]; + enum ftracer_op op; + size_t buf_len = 0; + ssize_t line_len; + + line_len = getline(lp, &buf_len, tracer->trace_pipe); + if (line_len == -1) + break; + + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); + op = tracer->process_line(*lp); + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + + if (tracer->expecting_more) { + pthread_mutex_lock(&tracer->met_all_expected_lock); + if (!tracer->expecting_more()) + pthread_cond_signal(&tracer->met_all_expected); + pthread_mutex_unlock(&tracer->met_all_expected_lock); + } + + if (op == FTRACER_LINE_DISCARD) + continue; + if (op == FTRACER_EXIT) + break; + if (op != FTRACER_LINE_PRESERVE) + test_error("unexpected tracer command %d", op); + + tracer->next_line_ind++; + buf_len = 0; + } + test_print("too many lines in ftracer buffer %zu, exiting tracer", + tracer->next_line_ind); + + pthread_cleanup_pop(1); + return NULL; +} + +static int setup_trace_thread(struct test_ftracer *tracer) +{ + int ret = 0; + char *path; + + path = test_sprintf("%s/trace_pipe", tracer->instance_path); + if (!path) + return -ENOMEM; + + tracer->trace_pipe = fopen(path, "r"); + if (!tracer->trace_pipe) { + ret = -errno; + goto out_free; + } + + if (pthread_create(&tracer->tracer_thread, NULL, + tracer_thread_func, (void *)tracer)) { + ret = -errno; + fclose(tracer->trace_pipe); + } + +out_free: + free(path); + return ret; +} + +static void stop_trace_thread(struct test_ftracer *tracer) +{ + void *res; + + if (pthread_cancel(tracer->tracer_thread)) { + test_print("Can't stop tracer pthread: %m"); + tracer_set_error(tracer, -errno); + } + if (pthread_join(tracer->tracer_thread, &res)) { + test_print("Can't join tracer pthread: %m"); + tracer_set_error(tracer, -errno); + } + if (res != PTHREAD_CANCELED) { + test_print("Tracer thread wasn't canceled"); + tracer_set_error(tracer, -errno); + } + if (tracer->error) + test_fail("tracer errored by %s", strerror(tracer->error)); +} + +static void final_wait_for_events(struct test_ftracer *tracer, + unsigned timeout_sec) +{ + struct timespec timeout; + struct timeval now; + int ret = 0; + + if (!tracer->expecting_more) + return; + + pthread_mutex_lock(&tracer->met_all_expected_lock); + gettimeofday(&now, NULL); + timeout.tv_sec = now.tv_sec + timeout_sec; + timeout.tv_nsec = now.tv_usec * 1000; + + while (tracer->expecting_more() && ret != ETIMEDOUT) + ret = pthread_cond_timedwait(&tracer->met_all_expected, + &tracer->met_all_expected_lock, &timeout); + pthread_mutex_unlock(&tracer->met_all_expected_lock); +} + +int setup_trace_event(struct test_ftracer *tracer, + const char *event, const char *filter) +{ + char *enable_path, *filter_path, *instance = tracer->instance_path; + int ret; + + enable_path = test_sprintf("%s/events/%s/enable", instance, event); + if (!enable_path) + return -ENOMEM; + + filter_path = test_sprintf("%s/events/%s/filter", instance, event); + if (!filter_path) { + ret = -ENOMEM; + goto out_free; + } + + ret = test_echo(filter_path, 0, "%s", filter); + if (!ret) + ret = test_echo(enable_path, 0, "1"); + +out_free: + free(filter_path); + free(enable_path); + return ret; +} + +struct test_ftracer *create_ftracer(const char *name, + enum ftracer_op (*process_line)(const char *line), + void (*destructor)(struct test_ftracer *tracer), + bool (*expecting_more)(void), + size_t lines_buf_sz, size_t buffer_size_kb) +{ + struct test_ftracer *tracer; + int err; + + /* XXX: separate __create_ftracer() helper and do here + * if (!kernel_config_has(KCONFIG_FTRACE)) + * return NULL; + */ + + tracer = malloc(sizeof(*tracer)); + if (!tracer) { + test_print("malloc()"); + return NULL; + } + + memset(tracer, 0, sizeof(*tracer)); + + err = setup_ftrace_instance(tracer, name); + if (err) { + test_print("setup_ftrace_instance(): %d", err); + goto err_free; + } + + err = disable_trace_options(tracer->instance_path); + if (err) { + test_print("disable_trace_options(): %d", err); + goto err_remove; + } + + err = setup_buffer_size(tracer->instance_path, buffer_size_kb); + if (err) { + test_print("disable_trace_options(): %d", err); + goto err_remove; + } + + tracer->saved_lines = calloc(lines_buf_sz, sizeof(tracer->saved_lines[0])); + if (!tracer->saved_lines) { + test_print("calloc()"); + goto err_remove; + } + tracer->saved_lines_size = lines_buf_sz; + + tracer->process_line = process_line; + tracer->destructor = destructor; + tracer->expecting_more = expecting_more; + + err = pthread_cond_init(&tracer->met_all_expected, NULL); + if (err) { + test_print("pthread_cond_init(): %d", err); + goto err_free_lines; + } + + err = pthread_mutex_init(&tracer->met_all_expected_lock, NULL); + if (err) { + test_print("pthread_mutex_init(): %d", err); + goto err_cond_destroy; + } + + err = setup_trace_thread(tracer); + if (err) { + test_print("setup_trace_thread(): %d", err); + goto err_mutex_destroy; + } + + pthread_mutex_lock(&ftracers_lock); + tracer->next = ftracers; + ftracers = tracer; + pthread_mutex_unlock(&ftracers_lock); + + return tracer; + +err_mutex_destroy: + pthread_mutex_destroy(&tracer->met_all_expected_lock); +err_cond_destroy: + pthread_cond_destroy(&tracer->met_all_expected); +err_free_lines: + free(tracer->saved_lines); +err_remove: + remove_ftrace_instance(tracer); +err_free: + free(tracer); + return NULL; +} + +static void __destroy_ftracer(struct test_ftracer *tracer) +{ + size_t i; + + final_wait_for_events(tracer, TEST_TIMEOUT_SEC); + stop_trace_thread(tracer); + remove_ftrace_instance(tracer); + if (tracer->destructor) + tracer->destructor(tracer); + for (i = 0; i < tracer->saved_lines_size; i++) + free(tracer->saved_lines[i]); + pthread_cond_destroy(&tracer->met_all_expected); + pthread_mutex_destroy(&tracer->met_all_expected_lock); + free(tracer); +} + +void destroy_ftracer(struct test_ftracer *tracer) +{ + pthread_mutex_lock(&ftracers_lock); + if (tracer == ftracers) { + ftracers = tracer->next; + } else { + struct test_ftracer *f = ftracers; + + while (f->next != tracer) { + if (!f->next) + test_error("tracers list corruption or double free %p", tracer); + f = f->next; + } + f->next = tracer->next; + } + tracer->next = NULL; + pthread_mutex_unlock(&ftracers_lock); + __destroy_ftracer(tracer); +} + +static void destroy_all_ftracers(void) +{ + struct test_ftracer *f; + + pthread_mutex_lock(&ftracers_lock); + f = ftracers; + ftracers = NULL; + pthread_mutex_unlock(&ftracers_lock); + + while (f) { + struct test_ftracer *n = f->next; + + f->next = NULL; + __destroy_ftracer(f); + f = n; + } +} + +static void test_unset_tracing(void) +{ + destroy_all_ftracers(); + unmount_ftrace(); +} + +int test_setup_tracing(void) +{ + /* + * Just a basic protection - this should be called only once from + * lib/kconfig. Not thread safe, which is fine as it's early, before + * threads are created. + */ + static int already_set; + int err; + + if (already_set) + return -1; + + /* Needs net-namespace cookies for filters */ + if (ns_cookie1 == ns_cookie2) { + test_print("net-namespace cookies: %" PRIu64 " == %" PRIu64 ", can't set up tracing", + ns_cookie1, ns_cookie2); + return -1; + } + + already_set = 1; + + test_add_destructor(test_unset_tracing); + + err = mount_ftrace(); + if (err) { + test_print("failed to mount_ftrace(): %d", err); + return err; + } + + return setup_aolib_ftracer(); +} + +static int get_ns_cookie(int nsfd, uint64_t *out) +{ + int old_ns = switch_save_ns(nsfd); + socklen_t size = sizeof(*out); + int sk; + + sk = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) { + test_print("socket(): %m"); + return -errno; + } + + if (getsockopt(sk, SOL_SOCKET, SO_NETNS_COOKIE, out, &size)) { + test_print("getsockopt(SO_NETNS_COOKIE): %m"); + close(sk); + return -errno; + } + + close(sk); + switch_close_ns(old_ns); + return 0; +} + +void test_init_ftrace(int nsfd1, int nsfd2) +{ + get_ns_cookie(nsfd1, &ns_cookie1); + get_ns_cookie(nsfd2, &ns_cookie2); + /* Populate kernel config state */ + kernel_config_has(KCONFIG_FTRACE); +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c index f279ffc3843b..9f1c175846f8 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c +++ b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c @@ -6,7 +6,7 @@ #include "aolib.h" struct kconfig_t { - int _errno; /* the returned error if not supported */ + int _error; /* negative errno if not supported */ int (*check_kconfig)(int *error); }; @@ -62,7 +62,7 @@ static int has_tcp_ao(int *err) memcpy(&tmp.addr, &addr, sizeof(addr)); *err = 0; if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) { - *err = errno; + *err = -errno; if (errno != ENOPROTOOPT) ret = -errno; } @@ -87,7 +87,7 @@ static int has_tcp_md5(int *err) */ *err = 0; if (test_set_md5(sk, addr_any, 0, -1, DEFAULT_TEST_PASSWORD)) { - *err = errno; + *err = -errno; if (errno != ENOPROTOOPT && errno == ENOMEM) { test_print("setsockopt(TCP_MD5SIG_EXT): %m"); ret = -errno; @@ -116,13 +116,21 @@ static int has_vrfs(int *err) return ret; } +static int has_ftrace(int *err) +{ + *err = test_setup_tracing(); + return 0; +} + +#define KCONFIG_UNKNOWN 1 static pthread_mutex_t kconfig_lock = PTHREAD_MUTEX_INITIALIZER; static struct kconfig_t kconfig[__KCONFIG_LAST__] = { - { -1, has_net_ns }, - { -1, has_veth }, - { -1, has_tcp_ao }, - { -1, has_tcp_md5 }, - { -1, has_vrfs }, + { KCONFIG_UNKNOWN, has_net_ns }, + { KCONFIG_UNKNOWN, has_veth }, + { KCONFIG_UNKNOWN, has_tcp_ao }, + { KCONFIG_UNKNOWN, has_tcp_md5 }, + { KCONFIG_UNKNOWN, has_vrfs }, + { KCONFIG_UNKNOWN, has_ftrace }, }; const char *tests_skip_reason[__KCONFIG_LAST__] = { @@ -131,6 +139,7 @@ const char *tests_skip_reason[__KCONFIG_LAST__] = { "Tests require TCP-AO support (CONFIG_TCP_AO)", "setsockopt(TCP_MD5SIG_EXT) is not supported (CONFIG_TCP_MD5)", "VRFs are not supported (CONFIG_NET_VRF)", + "Ftrace points are not supported (CONFIG_TRACEPOINTS)", }; bool kernel_config_has(enum test_needs_kconfig k) @@ -138,11 +147,11 @@ bool kernel_config_has(enum test_needs_kconfig k) bool ret; pthread_mutex_lock(&kconfig_lock); - if (kconfig[k]._errno == -1) { - if (kconfig[k].check_kconfig(&kconfig[k]._errno)) + if (kconfig[k]._error == KCONFIG_UNKNOWN) { + if (kconfig[k].check_kconfig(&kconfig[k]._error)) test_error("Failed to initialize kconfig %u", k); } - ret = kconfig[k]._errno == 0; + ret = kconfig[k]._error == 0; pthread_mutex_unlock(&kconfig_lock); return ret; } diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c index e408b9243b2c..49aec2922a31 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/setup.c +++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c @@ -9,7 +9,7 @@ * Can't be included in the header: it defines static variables which * will be unique to every object. Let's include it only once here. */ -#include "../../../kselftest.h" +#include "kselftest.h" /* Prevent overriding of one thread's output by another */ static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER; @@ -111,7 +111,7 @@ static void sig_int(int signo) int open_netns(void) { - const char *netns_path = "/proc/self/ns/net"; + const char *netns_path = "/proc/thread-self/ns/net"; int fd; fd = open(netns_path, O_RDONLY); @@ -142,6 +142,13 @@ int switch_save_ns(int new_ns) return ret; } +void switch_close_ns(int fd) +{ + if (setns(fd, CLONE_NEWNET)) + test_error("setns()"); + close(fd); +} + static int nsfd_outside = -1; static int nsfd_parent = -1; static int nsfd_child = -1; @@ -243,9 +250,9 @@ void __test_init(unsigned int ntests, int family, unsigned int prefix, test_print("rand seed %u", (unsigned int)seed); srand(seed); - ksft_print_header(); init_namespaces(); + test_init_ftrace(nsfd_parent, nsfd_child); if (add_veth(veth_name, nsfd_parent, nsfd_child)) test_error("Failed to add veth"); @@ -296,7 +303,7 @@ static bool is_optmem_namespaced(void) int old_ns = switch_save_ns(nsfd_child); optmem_ns = !access(optmem_file, F_OK); - switch_ns(old_ns); + switch_close_ns(old_ns); } return !!optmem_ns; } @@ -317,7 +324,7 @@ size_t test_get_optmem(void) test_error("can't read from %s", optmem_file); fclose(foptmem); if (!is_optmem_namespaced()) - switch_ns(old_ns); + switch_close_ns(old_ns); return ret; } @@ -339,7 +346,7 @@ static void __test_set_optmem(size_t new, size_t *old) test_error("can't write %zu to %s", new, optmem_file); fclose(foptmem); if (!is_optmem_namespaced()) - switch_ns(old_ns); + switch_close_ns(old_ns); } static void test_revert_optmem(void) diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c index 15aeb0963058..ef8e9031d47a 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/sock.c +++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c @@ -34,10 +34,8 @@ int __test_listen_socket(int backlog, void *addr, size_t addr_sz) return sk; } -int test_wait_fd(int sk, time_t sec, bool write) +static int __test_wait_fd(int sk, struct timeval *tv, bool write) { - struct timeval tv = { .tv_sec = sec }; - struct timeval *ptv = NULL; fd_set fds, efds; int ret; socklen_t slen = sizeof(ret); @@ -47,14 +45,11 @@ int test_wait_fd(int sk, time_t sec, bool write) FD_ZERO(&efds); FD_SET(sk, &efds); - if (sec) - ptv = &tv; - errno = 0; if (write) - ret = select(sk + 1, NULL, &fds, &efds, ptv); + ret = select(sk + 1, NULL, &fds, &efds, tv); else - ret = select(sk + 1, &fds, NULL, &efds, ptv); + ret = select(sk + 1, &fds, NULL, &efds, tv); if (ret < 0) return -errno; if (ret == 0) { @@ -69,8 +64,54 @@ int test_wait_fd(int sk, time_t sec, bool write) return 0; } +int test_wait_fd(int sk, time_t sec, bool write) +{ + struct timeval tv = { .tv_sec = sec, }; + + return __test_wait_fd(sk, sec ? &tv : NULL, write); +} + +static bool __skpair_poll_should_stop(int sk, struct tcp_counters *c, + test_cnt condition) +{ + struct tcp_counters c2; + test_cnt diff; + + if (test_get_tcp_counters(sk, &c2)) + test_error("test_get_tcp_counters()"); + + diff = test_cmp_counters(c, &c2); + test_tcp_counters_free(&c2); + return (diff & condition) == condition; +} + +/* How often wake up and check netns counters & paired (*err) */ +#define POLL_USEC 150 +static int __test_skpair_poll(int sk, bool write, uint64_t timeout, + struct tcp_counters *c, test_cnt cond, + volatile int *err) +{ + uint64_t t; + + for (t = 0; t <= timeout * 1000000; t += POLL_USEC) { + struct timeval tv = { .tv_usec = POLL_USEC, }; + int ret; + + ret = __test_wait_fd(sk, &tv, write); + if (ret != -ETIMEDOUT) + return ret; + if (c && cond && __skpair_poll_should_stop(sk, c, cond)) + break; + if (err && *err) + return *err; + } + if (err) + *err = -ETIMEDOUT; + return -ETIMEDOUT; +} + int __test_connect_socket(int sk, const char *device, - void *addr, size_t addr_sz, time_t timeout) + void *addr, size_t addr_sz, bool async) { long flags; int err; @@ -82,15 +123,6 @@ int __test_connect_socket(int sk, const char *device, test_error("setsockopt(SO_BINDTODEVICE, %s)", device); } - if (!timeout) { - err = connect(sk, addr, addr_sz); - if (err) { - err = -errno; - goto out; - } - return 0; - } - flags = fcntl(sk, F_GETFL); if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0)) test_error("fcntl()"); @@ -100,9 +132,9 @@ int __test_connect_socket(int sk, const char *device, err = -errno; goto out; } - if (timeout < 0) + if (async) return sk; - err = test_wait_fd(sk, timeout, 1); + err = test_wait_fd(sk, TEST_TIMEOUT_SEC, 1); if (err) goto out; } @@ -113,6 +145,45 @@ out: return err; } +int test_skpair_wait_poll(int sk, bool write, + test_cnt cond, volatile int *err) +{ + struct tcp_counters c; + int ret; + + *err = 0; + if (test_get_tcp_counters(sk, &c)) + test_error("test_get_tcp_counters()"); + synchronize_threads(); /* 1: init skpair & read nscounters */ + + ret = __test_skpair_poll(sk, write, TEST_TIMEOUT_SEC, &c, cond, err); + test_tcp_counters_free(&c); + return ret; +} + +int _test_skpair_connect_poll(int sk, const char *device, + void *addr, size_t addr_sz, + test_cnt condition, volatile int *err) +{ + struct tcp_counters c; + int ret; + + *err = 0; + if (test_get_tcp_counters(sk, &c)) + test_error("test_get_tcp_counters()"); + synchronize_threads(); /* 1: init skpair & read nscounters */ + ret = __test_connect_socket(sk, device, addr, addr_sz, true); + if (ret < 0) { + test_tcp_counters_free(&c); + return (*err = ret); + } + ret = __test_skpair_poll(sk, 1, TEST_TIMEOUT_SEC, &c, condition, err); + if (ret < 0) + close(sk); + test_tcp_counters_free(&c); + return ret; +} + int __test_set_md5(int sk, void *addr, size_t addr_sz, uint8_t prefix, int vrf, const char *password) { @@ -333,12 +404,12 @@ do { \ return 0; } -int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) +int test_get_tcp_counters(int sk, struct tcp_counters *out) { struct tcp_ao_getsockopt *key_dump; socklen_t key_dump_sz = sizeof(*key_dump); struct tcp_ao_info_opt info = {}; - bool c1, c2, c3, c4, c5; + bool c1, c2, c3, c4, c5, c6, c7, c8; struct netstat *ns; int err, nr_keys; @@ -346,25 +417,30 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) /* per-netns */ ns = netstat_read(); - out->netns_ao_good = netstat_get(ns, "TCPAOGood", &c1); - out->netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2); - out->netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3); - out->netns_ao_required = netstat_get(ns, "TCPAORequired", &c4); - out->netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5); + out->ao.netns_ao_good = netstat_get(ns, "TCPAOGood", &c1); + out->ao.netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2); + out->ao.netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3); + out->ao.netns_ao_required = netstat_get(ns, "TCPAORequired", &c4); + out->ao.netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5); + out->netns_md5_notfound = netstat_get(ns, "TCPMD5NotFound", &c6); + out->netns_md5_unexpected = netstat_get(ns, "TCPMD5Unexpected", &c7); + out->netns_md5_failure = netstat_get(ns, "TCPMD5Failure", &c8); netstat_free(ns); - if (c1 || c2 || c3 || c4 || c5) + if (c1 || c2 || c3 || c4 || c5 || c6 || c7 || c8) return -EOPNOTSUPP; err = test_get_ao_info(sk, &info); + if (err == -ENOENT) + return 0; if (err) return err; /* per-socket */ - out->ao_info_pkt_good = info.pkt_good; - out->ao_info_pkt_bad = info.pkt_bad; - out->ao_info_pkt_key_not_found = info.pkt_key_not_found; - out->ao_info_pkt_ao_required = info.pkt_ao_required; - out->ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp; + out->ao.ao_info_pkt_good = info.pkt_good; + out->ao.ao_info_pkt_bad = info.pkt_bad; + out->ao.ao_info_pkt_key_not_found = info.pkt_key_not_found; + out->ao.ao_info_pkt_ao_required = info.pkt_ao_required; + out->ao.ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp; /* per-key */ nr_keys = test_get_ao_keys_nr(sk); @@ -372,14 +448,13 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) return nr_keys; if (nr_keys == 0) test_error("test_get_ao_keys_nr() == 0"); - out->nr_keys = (size_t)nr_keys; + out->ao.nr_keys = (size_t)nr_keys; key_dump = calloc(nr_keys, key_dump_sz); if (!key_dump) return -errno; key_dump[0].nkeys = nr_keys; key_dump[0].get_all = 1; - key_dump[0].get_all = 1; err = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, key_dump, &key_dump_sz); if (err) { @@ -387,72 +462,84 @@ int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) return -errno; } - out->key_cnts = calloc(nr_keys, sizeof(out->key_cnts[0])); - if (!out->key_cnts) { + out->ao.key_cnts = calloc(nr_keys, sizeof(out->ao.key_cnts[0])); + if (!out->ao.key_cnts) { free(key_dump); return -errno; } while (nr_keys--) { - out->key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid; - out->key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid; - out->key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good; - out->key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad; + out->ao.key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid; + out->ao.key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid; + out->ao.key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good; + out->ao.key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad; } free(key_dump); return 0; } -int __test_tcp_ao_counters_cmp(const char *tst_name, - struct tcp_ao_counters *before, - struct tcp_ao_counters *after, - test_cnt expected) +test_cnt test_cmp_counters(struct tcp_counters *before, + struct tcp_counters *after) { -#define __cmp_ao(cnt, expecting_inc) \ +#define __cmp(cnt, e_cnt) \ +do { \ + if (before->cnt > after->cnt) \ + test_error("counter " __stringify(cnt) " decreased"); \ + if (before->cnt != after->cnt) \ + ret |= e_cnt; \ +} while (0) + + test_cnt ret = 0; + size_t i; + + if (before->ao.nr_keys != after->ao.nr_keys) + test_error("the number of keys has changed"); + + _for_each_counter(__cmp); + + i = before->ao.nr_keys; + while (i--) { + __cmp(ao.key_cnts[i].pkt_good, TEST_CNT_KEY_GOOD); + __cmp(ao.key_cnts[i].pkt_bad, TEST_CNT_KEY_BAD); + } +#undef __cmp + return ret; +} + +int test_assert_counters_sk(const char *tst_name, + struct tcp_counters *before, + struct tcp_counters *after, + test_cnt expected) +{ +#define __cmp_ao(cnt, e_cnt) \ do { \ if (before->cnt > after->cnt) { \ test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64, \ - tst_name ?: "", before->cnt, after->cnt); \ + tst_name ?: "", before->cnt, after->cnt); \ return -1; \ } \ - if ((before->cnt != after->cnt) != (expecting_inc)) { \ + if ((before->cnt != after->cnt) != !!(expected & e_cnt)) { \ test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64, \ - tst_name ?: "", (expecting_inc) ? "" : "not ", \ + tst_name ?: "", (expected & e_cnt) ? "" : "not ", \ before->cnt, after->cnt); \ return -1; \ } \ -} while(0) +} while (0) errno = 0; - /* per-netns */ - __cmp_ao(netns_ao_good, !!(expected & TEST_CNT_NS_GOOD)); - __cmp_ao(netns_ao_bad, !!(expected & TEST_CNT_NS_BAD)); - __cmp_ao(netns_ao_key_not_found, - !!(expected & TEST_CNT_NS_KEY_NOT_FOUND)); - __cmp_ao(netns_ao_required, !!(expected & TEST_CNT_NS_AO_REQUIRED)); - __cmp_ao(netns_ao_dropped_icmp, - !!(expected & TEST_CNT_NS_DROPPED_ICMP)); - /* per-socket */ - __cmp_ao(ao_info_pkt_good, !!(expected & TEST_CNT_SOCK_GOOD)); - __cmp_ao(ao_info_pkt_bad, !!(expected & TEST_CNT_SOCK_BAD)); - __cmp_ao(ao_info_pkt_key_not_found, - !!(expected & TEST_CNT_SOCK_KEY_NOT_FOUND)); - __cmp_ao(ao_info_pkt_ao_required, !!(expected & TEST_CNT_SOCK_AO_REQUIRED)); - __cmp_ao(ao_info_pkt_dropped_icmp, - !!(expected & TEST_CNT_SOCK_DROPPED_ICMP)); + _for_each_counter(__cmp_ao); return 0; #undef __cmp_ao } -int test_tcp_ao_key_counters_cmp(const char *tst_name, - struct tcp_ao_counters *before, - struct tcp_ao_counters *after, - test_cnt expected, - int sndid, int rcvid) +int test_assert_counters_key(const char *tst_name, + struct tcp_ao_counters *before, + struct tcp_ao_counters *after, + test_cnt expected, int sndid, int rcvid) { size_t i; -#define __cmp_ao(i, cnt, expecting_inc) \ +#define __cmp_ao(i, cnt, e_cnt) \ do { \ if (before->key_cnts[i].cnt > after->key_cnts[i].cnt) { \ test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64 " for key %u:%u", \ @@ -462,16 +549,16 @@ do { \ before->key_cnts[i].rcvid); \ return -1; \ } \ - if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != (expecting_inc)) { \ + if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != !!(expected & e_cnt)) { \ test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64 " for key %u:%u", \ - tst_name ?: "", (expecting_inc) ? "" : "not ",\ + tst_name ?: "", (expected & e_cnt) ? "" : "not ",\ before->key_cnts[i].cnt, \ after->key_cnts[i].cnt, \ before->key_cnts[i].sndid, \ before->key_cnts[i].rcvid); \ return -1; \ } \ -} while(0) +} while (0) if (before->nr_keys != after->nr_keys) { test_fail("%s: Keys changed on the socket %zu != %zu", @@ -486,20 +573,22 @@ do { \ continue; if (rcvid >= 0 && before->key_cnts[i].rcvid != rcvid) continue; - __cmp_ao(i, pkt_good, !!(expected & TEST_CNT_KEY_GOOD)); - __cmp_ao(i, pkt_bad, !!(expected & TEST_CNT_KEY_BAD)); + __cmp_ao(i, pkt_good, TEST_CNT_KEY_GOOD); + __cmp_ao(i, pkt_bad, TEST_CNT_KEY_BAD); } return 0; #undef __cmp_ao } -void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts) +void test_tcp_counters_free(struct tcp_counters *cnts) { - free(cnts->key_cnts); + free(cnts->ao.key_cnts); } #define TEST_BUF_SIZE 4096 -ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) +static ssize_t _test_server_run(int sk, ssize_t quota, struct tcp_counters *c, + test_cnt cond, volatile int *err, + time_t timeout_sec) { ssize_t total = 0; @@ -508,7 +597,7 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) ssize_t bytes, sent; int ret; - ret = test_wait_fd(sk, timeout_sec, 0); + ret = __test_skpair_poll(sk, 0, timeout_sec, c, cond, err); if (ret) return ret; @@ -519,7 +608,7 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) if (bytes == 0) break; - ret = test_wait_fd(sk, timeout_sec, 1); + ret = __test_skpair_poll(sk, 1, timeout_sec, c, cond, err); if (ret) return ret; @@ -534,13 +623,41 @@ ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) return total; } -ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, - const size_t msg_len, time_t timeout_sec) +ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) +{ + return _test_server_run(sk, quota, NULL, 0, NULL, + timeout_sec ?: TEST_TIMEOUT_SEC); +} + +int test_skpair_server(int sk, ssize_t quota, test_cnt cond, volatile int *err) +{ + struct tcp_counters c; + ssize_t ret; + + *err = 0; + if (test_get_tcp_counters(sk, &c)) + test_error("test_get_tcp_counters()"); + synchronize_threads(); /* 1: init skpair & read nscounters */ + + ret = _test_server_run(sk, quota, &c, cond, err, TEST_TIMEOUT_SEC); + test_tcp_counters_free(&c); + return ret; +} + +static ssize_t test_client_loop(int sk, size_t buf_sz, const size_t msg_len, + struct tcp_counters *c, test_cnt cond, + volatile int *err) { char msg[msg_len]; int nodelay = 1; + char *buf; size_t i; + buf = alloca(buf_sz); + if (!buf) + return -ENOMEM; + randomize_buffer(buf, buf_sz); + if (setsockopt(sk, IPPROTO_TCP, TCP_NODELAY, &nodelay, sizeof(nodelay))) test_error("setsockopt(TCP_NODELAY)"); @@ -548,7 +665,7 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, size_t sent, bytes = min(msg_len, buf_sz - i); int ret; - ret = test_wait_fd(sk, timeout_sec, 1); + ret = __test_skpair_poll(sk, 1, TEST_TIMEOUT_SEC, c, cond, err); if (ret) return ret; @@ -562,7 +679,8 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, do { ssize_t got; - ret = test_wait_fd(sk, timeout_sec, 0); + ret = __test_skpair_poll(sk, 0, TEST_TIMEOUT_SEC, + c, cond, err); if (ret) return ret; @@ -581,15 +699,31 @@ ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, return i; } -int test_client_verify(int sk, const size_t msg_len, const size_t nr, - time_t timeout_sec) +int test_client_verify(int sk, const size_t msg_len, const size_t nr) { size_t buf_sz = msg_len * nr; - char *buf = alloca(buf_sz); ssize_t ret; - randomize_buffer(buf, buf_sz); - ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec); + ret = test_client_loop(sk, buf_sz, msg_len, NULL, 0, NULL); + if (ret < 0) + return (int)ret; + return ret != buf_sz ? -1 : 0; +} + +int test_skpair_client(int sk, const size_t msg_len, const size_t nr, + test_cnt cond, volatile int *err) +{ + struct tcp_counters c; + size_t buf_sz = msg_len * nr; + ssize_t ret; + + *err = 0; + if (test_get_tcp_counters(sk, &c)) + test_error("test_get_tcp_counters()"); + synchronize_threads(); /* 1: init skpair & read nscounters */ + + ret = test_client_loop(sk, buf_sz, msg_len, &c, cond, err); + test_tcp_counters_free(&c); if (ret < 0) return (int)ret; return ret != buf_sz ? -1 : 0; diff --git a/tools/testing/selftests/net/tcp_ao/lib/utils.c b/tools/testing/selftests/net/tcp_ao/lib/utils.c index 372daca525f5..bdf5522c9213 100644 --- a/tools/testing/selftests/net/tcp_ao/lib/utils.c +++ b/tools/testing/selftests/net/tcp_ao/lib/utils.c @@ -21,6 +21,32 @@ void randomize_buffer(void *buf, size_t buflen) } } +__printf(3, 4) int test_echo(const char *fname, bool append, + const char *fmt, ...) +{ + size_t len, written; + va_list vargs; + char *msg; + FILE *f; + + f = fopen(fname, append ? "a" : "w"); + if (!f) + return -errno; + + va_start(vargs, fmt); + msg = test_snprintf(fmt, vargs); + va_end(vargs); + if (!msg) { + fclose(f); + return -1; + } + len = strlen(msg); + written = fwrite(msg, 1, len, f); + fclose(f); + free(msg); + return written == len ? 0 : -1; +} + const struct sockaddr_in6 addr_any6 = { .sin6_family = AF_INET6, }; diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c index 8fdc808df325..9a059b6c4523 100644 --- a/tools/testing/selftests/net/tcp_ao/restore.c +++ b/tools/testing/selftests/net/tcp_ao/restore.c @@ -16,11 +16,11 @@ const size_t quota = nr_packets * msg_len; static void try_server_run(const char *tst_name, unsigned int port, fault_t inj, test_cnt cnt_expected) { + test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected; const char *cnt_name = "TCPAOGood"; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; uint64_t before_cnt, after_cnt; - int sk, lsk; - time_t timeout; + int sk, lsk, dummy; ssize_t bytes; if (fault(TIMEOUT)) @@ -48,11 +48,10 @@ static void try_server_run(const char *tst_name, unsigned int port, } before_cnt = netstat_get_one(cnt_name, NULL); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - bytes = test_server_run(sk, quota, timeout); + bytes = test_skpair_server(sk, quota, poll_cnt, &dummy); if (fault(TIMEOUT)) { if (bytes > 0) test_fail("%s: server served: %zd", tst_name, bytes); @@ -64,17 +63,18 @@ static void try_server_run(const char *tst_name, unsigned int port, else test_ok("%s: server alive", tst_name); } - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + synchronize_threads(); /* 3: counters checks */ + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); after_cnt = netstat_get_one(cnt_name, NULL); - test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %zu <= %zu", - tst_name, cnt_name, after_cnt, before_cnt); + test_fail("%s(server): %s counter did not increase: %" PRIu64 " <= %" PRIu64, + tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %zu => %zu", + test_ok("%s(server): counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } @@ -82,7 +82,7 @@ static void try_server_run(const char *tst_name, unsigned int port, * Before close() as that will send FIN and move the peer in TCP_CLOSE * and that will prevent reading AO counters from the peer's socket. */ - synchronize_threads(); /* 3: verified => closed */ + synchronize_threads(); /* 4: verified => closed */ out: close(sk); } @@ -91,16 +91,16 @@ static void *server_fn(void *arg) { unsigned int port = test_server_port; - try_server_run("TCP-AO migrate to another socket", port++, + try_server_run("TCP-AO migrate to another socket (server)", port++, 0, TEST_CNT_GOOD); - try_server_run("TCP-AO with wrong send ISN", port++, + try_server_run("TCP-AO with wrong send ISN (server)", port++, FAULT_TIMEOUT, TEST_CNT_BAD); - try_server_run("TCP-AO with wrong receive ISN", port++, + try_server_run("TCP-AO with wrong receive ISN (server)", port++, FAULT_TIMEOUT, TEST_CNT_BAD); - try_server_run("TCP-AO with wrong send SEQ ext number", port++, + try_server_run("TCP-AO with wrong send SEQ ext number (server)", port++, FAULT_TIMEOUT, TEST_CNT_BAD); - try_server_run("TCP-AO with wrong receive SEQ ext number", port++, - FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); + try_server_run("TCP-AO with wrong receive SEQ ext number (server)", + port++, FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); synchronize_threads(); /* don't race to exit: client exits */ return NULL; @@ -124,7 +124,7 @@ static void test_get_sk_checkpoint(unsigned int server_port, sockaddr_af *saddr, test_error("failed to connect()"); synchronize_threads(); /* 2: accepted => send data */ - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, msg_len, nr_packets)) test_fail("pre-migrate verify failed"); test_enable_repair(sk); @@ -138,11 +138,11 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port, struct tcp_ao_repair *ao_img, fault_t inj, test_cnt cnt_expected) { + test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected; const char *cnt_name = "TCPAOGood"; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; uint64_t before_cnt, after_cnt; - time_t timeout; - int sk; + int sk, dummy; if (fault(TIMEOUT)) cnt_name = "TCPAOBad"; @@ -158,38 +158,39 @@ static void test_sk_restore(const char *tst_name, unsigned int server_port, test_error("setsockopt(TCP_AO_ADD_KEY)"); test_ao_restore(sk, ao_img); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); test_disable_repair(sk); test_sock_state_free(img); - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - if (test_client_verify(sk, msg_len, nr_packets, timeout)) { + if (test_skpair_client(sk, msg_len, nr_packets, poll_cnt, &dummy)) { if (fault(TIMEOUT)) test_ok("%s: post-migrate connection is broken", tst_name); else test_fail("%s: post-migrate connection is working", tst_name); } else { if (fault(TIMEOUT)) - test_fail("%s: post-migrate connection still working", tst_name); + test_fail("%s: post-migrate connection is working", tst_name); else test_ok("%s: post-migrate connection is alive", tst_name); } - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 3: counters checks */ + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); after_cnt = netstat_get_one(cnt_name, NULL); - test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %zu <= %zu", + test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %zu => %zu", + test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } - synchronize_threads(); /* 3: verified => closed */ + synchronize_threads(); /* 4: verified => closed */ close(sk); } @@ -201,29 +202,43 @@ static void *client_fn(void *arg) sockaddr_af saddr; test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); - test_sk_restore("TCP-AO migrate to another socket", port++, + test_sk_restore("TCP-AO migrate to another socket (client)", port++, &saddr, &tcp_img, &ao_img, 0, TEST_CNT_GOOD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); ao_img.snt_isn += 1; - test_sk_restore("TCP-AO with wrong send ISN", port++, + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, + port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); + test_sk_restore("TCP-AO with wrong send ISN (client)", port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); ao_img.rcv_isn += 1; - test_sk_restore("TCP-AO with wrong receive ISN", port++, + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, + port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); + test_sk_restore("TCP-AO with wrong receive ISN (client)", port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); ao_img.snd_sne += 1; - test_sk_restore("TCP-AO with wrong send SEQ ext number", port++, - &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_addr, this_ip_dest, + -1, port, 0, -1, -1, -1, -1, -1, 100, 100, -1); + /* not expecting server => client mismatches as only snd sne is broken */ + test_sk_restore("TCP-AO with wrong send SEQ ext number (client)", + port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); ao_img.rcv_sne += 1; - test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++, - &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, + /* not expecting client => server mismatches as only rcv sne is broken */ + trace_ao_event_expect(TCP_AO_MISMATCH, this_ip_dest, this_ip_addr, + port, -1, 0, -1, -1, -1, -1, -1, 100, 100, -1); + test_sk_restore("TCP-AO with wrong receive SEQ ext number (client)", + port++, &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_NS_GOOD | TEST_CNT_BAD); return NULL; @@ -231,6 +246,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(20, server_fn, client_fn); + test_init(21, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c index a2fe88d35ac0..883cddf377cf 100644 --- a/tools/testing/selftests/net/tcp_ao/rst.c +++ b/tools/testing/selftests/net/tcp_ao/rst.c @@ -84,15 +84,15 @@ static void close_forced(int sk) static void test_server_active_rst(unsigned int port) { - struct tcp_ao_counters cnt1, cnt2; + struct tcp_counters cnt1, cnt2; ssize_t bytes; int sk, lsk; lsk = test_listen_socket(this_ip_addr, port, backlog); if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) test_error("setsockopt(TCP_AO_ADD_KEY)"); - if (test_get_tcp_ao_counters(lsk, &cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(lsk, &cnt1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 1: MKT added */ if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) @@ -103,8 +103,8 @@ static void test_server_active_rst(unsigned int port) test_error("accept()"); synchronize_threads(); /* 2: connection accept()ed, another queued */ - if (test_get_tcp_ao_counters(lsk, &cnt2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(lsk, &cnt2)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 3: close listen socket */ close(lsk); @@ -120,7 +120,7 @@ static void test_server_active_rst(unsigned int port) synchronize_threads(); /* 5: closed active sk */ synchronize_threads(); /* 6: counters checks */ - if (test_tcp_ao_counters_cmp("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD)) + if (test_assert_counters("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD)) test_fail("MKT counters (server) have not only good packets"); else test_ok("MKT counters are good on server"); @@ -128,7 +128,7 @@ static void test_server_active_rst(unsigned int port) static void test_server_passive_rst(unsigned int port) { - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; int sk, lsk; ssize_t bytes; @@ -147,8 +147,8 @@ static void test_server_passive_rst(unsigned int port) synchronize_threads(); /* 2: accepted => send data */ close(lsk); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); if (bytes != quota) { @@ -160,12 +160,12 @@ static void test_server_passive_rst(unsigned int port) synchronize_threads(); /* 3: checkpoint the client */ synchronize_threads(); /* 4: close the server, creating twsk */ - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); close(sk); synchronize_threads(); /* 5: restore the socket, send more data */ - test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD); + test_assert_counters("passive RST server", &cnt1, &cnt2, TEST_CNT_GOOD); synchronize_threads(); /* 6: server exits */ } @@ -271,8 +271,7 @@ static void test_client_active_rst(unsigned int port) synchronize_threads(); /* 1: MKT added */ for (i = 0; i < last; i++) { - err = _test_connect_socket(sk[i], this_ip_dest, port, - (i == 0) ? TEST_TIMEOUT_SEC : -1); + err = _test_connect_socket(sk[i], this_ip_dest, port, i != 0); if (err < 0) test_error("failed to connect()"); } @@ -283,12 +282,12 @@ static void test_client_active_rst(unsigned int port) test_error("test_wait_fds(): %d", err); /* async connect() with third sk to get into request_sock_queue */ - err = _test_connect_socket(sk[last], this_ip_dest, port, -1); + err = _test_connect_socket(sk[last], this_ip_dest, port, 1); if (err < 0) test_error("failed to connect()"); synchronize_threads(); /* 3: close listen socket */ - if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk[0], packet_sz, quota / packet_sz)) test_fail("Failed to send data on connected socket"); else test_ok("Verified established tcp connection"); @@ -323,7 +322,7 @@ static void test_client_active_rst(unsigned int port) static void test_client_passive_rst(unsigned int port) { - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; struct tcp_ao_repair ao_img; struct tcp_sock_state img; sockaddr_af saddr; @@ -341,7 +340,7 @@ static void test_client_passive_rst(unsigned int port) test_error("failed to connect()"); synchronize_threads(); /* 2: accepted => send data */ - if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) + if (test_client_verify(sk, packet_sz, quota / packet_sz)) test_fail("Failed to send data on connected socket"); else test_ok("Verified established tcp connection"); @@ -397,8 +396,8 @@ static void test_client_passive_rst(unsigned int port) test_error("setsockopt(TCP_AO_ADD_KEY)"); test_ao_restore(sk, &ao_img); - if (test_get_tcp_ao_counters(sk, &ao1)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt1)) + test_error("test_get_tcp_counters()"); test_disable_repair(sk); test_sock_state_free(&img); @@ -417,7 +416,7 @@ static void test_client_passive_rst(unsigned int port) * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0, * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0 */ - err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC); + err = test_client_verify(sk, packet_sz, quota / packet_sz); /* Make sure that the connection was reset, not timeouted */ if (err && err == -ECONNRESET) test_ok("client sock was passively reset post-seq-adjust"); @@ -426,12 +425,12 @@ static void test_client_passive_rst(unsigned int port) else test_fail("client sock is yet connected post-seq-adjust"); - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* 6: server exits */ close(sk); - test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD); + test_assert_counters("client passive RST", &cnt1, &cnt2, TEST_CNT_GOOD); } static void *client_fn(void *arg) @@ -455,6 +454,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(14, server_fn, client_fn); + test_init(15, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c index a5698b0a3718..2c73bea698a6 100644 --- a/tools/testing/selftests/net/tcp_ao/self-connect.c +++ b/tools/testing/selftests/net/tcp_ao/self-connect.c @@ -16,6 +16,9 @@ static void __setup_lo_intf(const char *lo_intf, if (link_set_up(lo_intf)) test_error("Failed to bring %s up", lo_intf); + + if (ip_route_add(lo_intf, TEST_FAMILY, local_addr, local_addr)) + test_error("Failed to add a local route %s", lo_intf); } static void setup_lo_intf(const char *lo_intf) @@ -30,7 +33,7 @@ static void setup_lo_intf(const char *lo_intf) static void tcp_self_connect(const char *tst, unsigned int port, bool different_keyids, bool check_restore) { - struct tcp_ao_counters before_ao, after_ao; + struct tcp_counters before, after; uint64_t before_aogood, after_aogood; struct netstat *ns_before, *ns_after; const size_t nr_packets = 20; @@ -60,17 +63,17 @@ static void tcp_self_connect(const char *tst, unsigned int port, ns_before = netstat_read(); before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); - if (test_get_tcp_ao_counters(sk, &before_ao)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &before)) + test_error("test_get_tcp_counters()"); if (__test_connect_socket(sk, "lo", (struct sockaddr *)&addr, - sizeof(addr), TEST_TIMEOUT_SEC) < 0) { + sizeof(addr), 0) < 0) { ns_after = netstat_read(); netstat_print_diff(ns_before, ns_after); test_error("failed to connect()"); } - if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, 100, nr_packets)) { test_fail("%s: tcp connection verify failed", tst); close(sk); return; @@ -78,8 +81,8 @@ static void tcp_self_connect(const char *tst, unsigned int port, ns_after = netstat_read(); after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); - if (test_get_tcp_ao_counters(sk, &after_ao)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, &after)) + test_error("test_get_tcp_counters()"); if (!check_restore) { /* to debug: netstat_print_diff(ns_before, ns_after); */ netstat_free(ns_before); @@ -87,13 +90,13 @@ static void tcp_self_connect(const char *tst, unsigned int port, netstat_free(ns_after); if (after_aogood <= before_aogood) { - test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu", + test_fail("%s: TCPAOGood counter mismatch: %" PRIu64 " <= %" PRIu64, tst, after_aogood, before_aogood); close(sk); return; } - if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) { + if (test_assert_counters(tst, &before, &after, TEST_CNT_GOOD)) { close(sk); return; } @@ -136,7 +139,7 @@ static void tcp_self_connect(const char *tst, unsigned int port, test_ao_restore(sk, &ao_img); test_disable_repair(sk); test_sock_state_free(&img); - if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, 100, nr_packets)) { test_fail("%s: tcp connection verify failed", tst); close(sk); return; @@ -148,7 +151,7 @@ static void tcp_self_connect(const char *tst, unsigned int port, netstat_free(ns_after); close(sk); if (after_aogood <= before_aogood) { - test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu", + test_fail("%s: TCPAOGood counter mismatch: %" PRIu64 " <= %" PRIu64, tst, after_aogood, before_aogood); return; } @@ -163,17 +166,26 @@ static void *client_fn(void *arg) setup_lo_intf("lo"); tcp_self_connect("self-connect(same keyids)", port++, false, false); + + /* expecting rnext to change based on the first segment RNext != Current */ + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr, + port, port, 0, -1, -1, -1, -1, -1, 7, 5, -1); tcp_self_connect("self-connect(different keyids)", port++, true, false); tcp_self_connect("self-connect(restore)", port, false, true); - port += 2; + port += 2; /* restore test restores over different port */ + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr, + port, port, 0, -1, -1, -1, -1, -1, 7, 5, -1); + /* intentionally on restore they are added to the socket in different order */ + trace_ao_event_expect(TCP_AO_RNEXT_REQUEST, local_addr, local_addr, + port + 1, port + 1, 0, -1, -1, -1, -1, -1, 5, 7, -1); tcp_self_connect("self-connect(restore, different keyids)", port, true, true); - port += 2; + port += 2; /* restore test restores over different port */ return NULL; } int main(int argc, char *argv[]) { - test_init(4, client_fn, NULL); + test_init(5, client_fn, NULL); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c index ad4e77d6823e..6478da6a71c3 100644 --- a/tools/testing/selftests/net/tcp_ao/seq-ext.c +++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Check that after SEQ number wrap-around: * 1. SEQ-extension has upper bytes set - * 2. TCP conneciton is alive and no TCPAOBad segments + * 2. TCP connection is alive and no TCPAOBad segments * In order to test (2), the test doesn't just adjust seq number for a queue * on a connected socket, but migrates it to another sk+port number, so * that there won't be any delayed packets that will fail to verify @@ -40,7 +40,7 @@ static void test_adjust_seqs(struct tcp_sock_state *img, static int test_sk_restore(struct tcp_sock_state *img, struct tcp_ao_repair *ao_img, sockaddr_af *saddr, const union tcp_addr daddr, unsigned int dport, - struct tcp_ao_counters *cnt) + struct tcp_counters *cnt) { int sk; @@ -54,8 +54,8 @@ static int test_sk_restore(struct tcp_sock_state *img, test_error("setsockopt(TCP_AO_ADD_KEY)"); test_ao_restore(sk, ao_img); - if (test_get_tcp_ao_counters(sk, cnt)) - test_error("test_get_tcp_ao_counters()"); + if (test_get_tcp_counters(sk, cnt)) + test_error("test_get_tcp_counters()"); test_disable_repair(sk); test_sock_state_free(img); @@ -65,7 +65,7 @@ static int test_sk_restore(struct tcp_sock_state *img, static void *server_fn(void *arg) { uint64_t before_good, after_good, after_bad; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; struct tcp_sock_state img; struct tcp_ao_repair ao_img; sockaddr_af saddr; @@ -114,9 +114,17 @@ static void *server_fn(void *arg) test_adjust_seqs(&img, &ao_img, true); synchronize_threads(); /* 4: dump finished */ sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, - client_new_port, &ao1); - - synchronize_threads(); /* 5: verify counters during SEQ-number rollover */ + client_new_port, &cnt1); + + trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_addr, + this_ip_dest, test_server_port + 1, client_new_port, 1); + trace_ao_event_sne_expect(TCP_AO_SND_SNE_UPDATE, this_ip_dest, + this_ip_addr, client_new_port, test_server_port + 1, 1); + trace_ao_event_sne_expect(TCP_AO_RCV_SNE_UPDATE, this_ip_addr, + this_ip_dest, test_server_port + 1, client_new_port, 1); + trace_ao_event_sne_expect(TCP_AO_RCV_SNE_UPDATE, this_ip_dest, + this_ip_addr, client_new_port, test_server_port + 1, 1); + synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */ bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); if (bytes != quota) { if (bytes > 0) @@ -127,22 +135,23 @@ static void *server_fn(void *arg) test_ok("server alive"); } - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + synchronize_threads(); /* 6: verify counters after SEQ-number rollover */ + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); after_good = netstat_get_one("TCPAOGood", NULL); - test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); + test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD); if (after_good <= before_good) { - test_fail("TCPAOGood counter did not increase: %zu <= %zu", + test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64, after_good, before_good); } else { - test_ok("TCPAOGood counter increased %zu => %zu", + test_ok("TCPAOGood counter increased %" PRIu64 " => %" PRIu64, before_good, after_good); } after_bad = netstat_get_one("TCPAOBad", NULL); if (after_bad) - test_fail("TCPAOBad counter is non-zero: %zu", after_bad); + test_fail("TCPAOBad counter is non-zero: %" PRIu64, after_bad); else test_ok("TCPAOBad counter didn't increase"); test_enable_repair(sk); @@ -164,7 +173,7 @@ out: static void *client_fn(void *arg) { uint64_t before_good, after_good, after_bad; - struct tcp_ao_counters ao1, ao2; + struct tcp_counters cnt1, cnt2; struct tcp_sock_state img; struct tcp_ao_repair ao_img; sockaddr_af saddr; @@ -182,7 +191,7 @@ static void *client_fn(void *arg) test_error("failed to connect()"); synchronize_threads(); /* 2: accepted => send data */ - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { + if (test_client_verify(sk, msg_len, nr_packets)) { test_fail("pre-migrate verify failed"); return NULL; } @@ -204,30 +213,31 @@ static void *client_fn(void *arg) test_adjust_seqs(&img, &ao_img, false); synchronize_threads(); /* 4: dump finished */ sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, - test_server_port + 1, &ao1); + test_server_port + 1, &cnt1); - synchronize_threads(); /* 5: verify counters during SEQ-number rollover */ - if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + synchronize_threads(); /* 5: verify the connection during SEQ-number rollover */ + if (test_client_verify(sk, msg_len, nr_packets)) test_fail("post-migrate verify failed"); else test_ok("post-migrate connection alive"); - if (test_get_tcp_ao_counters(sk, &ao2)) - test_error("test_get_tcp_ao_counters()"); + synchronize_threads(); /* 5: verify counters after SEQ-number rollover */ + if (test_get_tcp_counters(sk, &cnt2)) + test_error("test_get_tcp_counters()"); after_good = netstat_get_one("TCPAOGood", NULL); - test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); + test_assert_counters(NULL, &cnt1, &cnt2, TEST_CNT_GOOD); if (after_good <= before_good) { - test_fail("TCPAOGood counter did not increase: %zu <= %zu", + test_fail("TCPAOGood counter did not increase: %" PRIu64 " <= %" PRIu64, after_good, before_good); } else { - test_ok("TCPAOGood counter increased %zu => %zu", + test_ok("TCPAOGood counter increased %" PRIu64 " => %" PRIu64, before_good, after_good); } after_bad = netstat_get_one("TCPAOBad", NULL); if (after_bad) - test_fail("TCPAOBad counter is non-zero: %zu", after_bad); + test_fail("TCPAOBad counter is non-zero: %" PRIu64, after_bad); else test_ok("TCPAOBad counter didn't increase"); @@ -240,6 +250,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(7, server_fn, client_fn); + test_init(8, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c index 517930f9721b..0abb9807d742 100644 --- a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c +++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c @@ -6,6 +6,8 @@ static union tcp_addr tcp_md5_client; +#define FILTER_TEST_NKEYS 16 + static int test_port = 7788; static void make_listen(int sk) { @@ -30,8 +32,8 @@ static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info, #define __cmp_ao(member) \ do { \ if (info->member != tmp.member) { \ - test_fail("%s: getsockopt(): " __stringify(member) " %zu != %zu", \ - tst, (size_t)info->member, (size_t)tmp.member); \ + test_fail("%s: getsockopt(): " __stringify(member) " %" PRIu64 " != %" PRIu64, \ + tst, (uint64_t)info->member, (uint64_t)tmp.member); \ return; \ } \ } while(0) @@ -813,23 +815,197 @@ static void duplicate_tests(void) setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: SendID differs"); } +static void fetch_all_keys(int sk, struct tcp_ao_getsockopt *keys) +{ + socklen_t optlen = sizeof(struct tcp_ao_getsockopt); + + memset(keys, 0, sizeof(struct tcp_ao_getsockopt) * FILTER_TEST_NKEYS); + keys[0].get_all = 1; + keys[0].nkeys = FILTER_TEST_NKEYS; + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &keys[0], &optlen)) + test_error("getsockopt"); +} + +static int prepare_test_keys(struct tcp_ao_getsockopt *keys) +{ + const char *test_password = "Test password number "; + struct tcp_ao_add test_ao[FILTER_TEST_NKEYS]; + char test_password_scratch[64] = {}; + u8 rcvid = 100, sndid = 100; + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + for (int i = 0; i < FILTER_TEST_NKEYS; i++) { + snprintf(test_password_scratch, 64, "%s %d", test_password, i); + test_prepare_key(&test_ao[i], DEFAULT_TEST_ALGO, this_ip_dest, + false, false, DEFAULT_TEST_PREFIX, 0, sndid++, + rcvid++, 0, 0, strlen(test_password_scratch), + test_password_scratch); + } + test_ao[0].set_current = 1; + test_ao[1].set_rnext = 1; + /* One key with a different addr and overlapping sndid, rcvid */ + tcp_addr_to_sockaddr_in(&test_ao[2].addr, &this_ip_addr, 0); + test_ao[2].sndid = 100; + test_ao[2].rcvid = 100; + + /* Add keys in a random order */ + for (int i = 0; i < FILTER_TEST_NKEYS; i++) { + int randidx = rand() % (FILTER_TEST_NKEYS - i); + + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, + &test_ao[randidx], sizeof(struct tcp_ao_add))) + test_error("setsockopt()"); + memcpy(&test_ao[randidx], &test_ao[FILTER_TEST_NKEYS - 1 - i], + sizeof(struct tcp_ao_add)); + } + + fetch_all_keys(sk, keys); + + return sk; +} + +/* Assumes passwords are unique */ +static int compare_mkts(struct tcp_ao_getsockopt *expected, int nexpected, + struct tcp_ao_getsockopt *actual, int nactual) +{ + int matches = 0; + + for (int i = 0; i < nexpected; i++) { + for (int j = 0; j < nactual; j++) { + if (memcmp(expected[i].key, actual[j].key, + TCP_AO_MAXKEYLEN) == 0) + matches++; + } + } + return nexpected - matches; +} + +static void filter_keys_checked(int sk, struct tcp_ao_getsockopt *filter, + struct tcp_ao_getsockopt *expected, + unsigned int nexpected, const char *tst) +{ + struct tcp_ao_getsockopt filtered_keys[FILTER_TEST_NKEYS] = {}; + struct tcp_ao_getsockopt all_keys[FILTER_TEST_NKEYS] = {}; + socklen_t len = sizeof(struct tcp_ao_getsockopt); + + fetch_all_keys(sk, all_keys); + memcpy(&filtered_keys[0], filter, sizeof(struct tcp_ao_getsockopt)); + filtered_keys[0].nkeys = FILTER_TEST_NKEYS; + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, filtered_keys, &len)) + test_error("getsockopt"); + if (filtered_keys[0].nkeys != nexpected) { + test_fail("wrong nr of keys, expected %u got %u", nexpected, + filtered_keys[0].nkeys); + goto out_close; + } + if (compare_mkts(expected, nexpected, filtered_keys, + filtered_keys[0].nkeys)) { + test_fail("got wrong keys back"); + goto out_close; + } + test_ok("filter keys: %s", tst); + +out_close: + close(sk); + memset(filter, 0, sizeof(struct tcp_ao_getsockopt)); +} + +static void filter_tests(void) +{ + struct tcp_ao_getsockopt original_keys[FILTER_TEST_NKEYS]; + struct tcp_ao_getsockopt expected_keys[FILTER_TEST_NKEYS]; + struct tcp_ao_getsockopt filter = {}; + int sk, f, nmatches; + socklen_t len; + + f = 2; + sk = prepare_test_keys(original_keys); + filter.rcvid = original_keys[f].rcvid; + filter.sndid = original_keys[f].sndid; + memcpy(&filter.addr, &original_keys[f].addr, + sizeof(original_keys[f].addr)); + filter.prefix = original_keys[f].prefix; + filter_keys_checked(sk, &filter, &original_keys[f], 1, + "by sndid, rcvid, address"); + + f = -1; + sk = prepare_test_keys(original_keys); + for (int i = 0; i < original_keys[0].nkeys; i++) { + if (original_keys[i].is_current) { + f = i; + break; + } + } + if (f < 0) + test_error("No current key after adding one"); + filter.is_current = 1; + filter_keys_checked(sk, &filter, &original_keys[f], 1, "by is_current"); + + f = -1; + sk = prepare_test_keys(original_keys); + for (int i = 0; i < original_keys[0].nkeys; i++) { + if (original_keys[i].is_rnext) { + f = i; + break; + } + } + if (f < 0) + test_error("No rnext key after adding one"); + filter.is_rnext = 1; + filter_keys_checked(sk, &filter, &original_keys[f], 1, "by is_rnext"); + + f = -1; + nmatches = 0; + sk = prepare_test_keys(original_keys); + for (int i = 0; i < original_keys[0].nkeys; i++) { + if (original_keys[i].sndid == 100) { + f = i; + memcpy(&expected_keys[nmatches], &original_keys[i], + sizeof(struct tcp_ao_getsockopt)); + nmatches++; + } + } + if (f < 0) + test_error("No key for sndid 100"); + if (nmatches != 2) + test_error("Should have 2 keys with sndid 100"); + filter.rcvid = original_keys[f].rcvid; + filter.sndid = original_keys[f].sndid; + filter.addr.ss_family = test_family; + filter_keys_checked(sk, &filter, expected_keys, nmatches, + "by sndid, rcvid"); + + sk = prepare_test_keys(original_keys); + filter.get_all = 1; + filter.nkeys = FILTER_TEST_NKEYS / 2; + len = sizeof(struct tcp_ao_getsockopt); + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &filter, &len)) + test_error("getsockopt"); + if (filter.nkeys == FILTER_TEST_NKEYS) + test_ok("filter keys: correct nkeys when in.nkeys < matches"); + else + test_fail("filter keys: wrong nkeys, expected %u got %u", + FILTER_TEST_NKEYS, filter.nkeys); +} + static void *client_fn(void *arg) { if (inet_pton(TEST_FAMILY, __TEST_CLIENT_IP(2), &tcp_md5_client) != 1) test_error("Can't convert ip address"); extend_tests(); einval_tests(); + filter_tests(); duplicate_tests(); - /* - * TODO: check getsockopt(TCP_AO_GET_KEYS) with different filters - * returning proper nr & keys; - */ return NULL; } int main(int argc, char *argv[]) { - test_init(120, client_fn, NULL); + test_init(126, client_fn, NULL); return 0; } diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c index 6b59a652159f..a1467b64390a 100644 --- a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c +++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c @@ -6,6 +6,7 @@ #define fault(type) (inj == FAULT_ ## type) static const char *md5_password = "Some evil genius, enemy to mankind, must have been the first contriver."; static const char *ao_password = DEFAULT_TEST_PASSWORD; +static volatile int sk_pair; static union tcp_addr client2; static union tcp_addr client3; @@ -41,10 +42,10 @@ static void try_accept(const char *tst_name, unsigned int port, const char *cnt_name, test_cnt cnt_expected, int needs_tcp_md5, fault_t inj) { - struct tcp_ao_counters ao_cnt1, ao_cnt2; + struct tcp_counters cnt1, cnt2; uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */ - int lsk, err, sk = 0; - time_t timeout; + test_cnt poll_cnt = (cnt_expected == TEST_CNT_GOOD) ? 0 : cnt_expected; + int lsk, err, sk = -1; if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) return; @@ -63,21 +64,25 @@ static void try_accept(const char *tst_name, unsigned int port, if (cnt_name) before_cnt = netstat_get_one(cnt_name, NULL); - if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt1)) - test_error("test_get_tcp_ao_counters()"); + if (ao_addr && test_get_tcp_counters(lsk, &cnt1)) + test_error("test_get_tcp_counters()"); synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - err = test_wait_fd(lsk, timeout, 0); + err = test_skpair_wait_poll(lsk, 0, poll_cnt, &sk_pair); + synchronize_threads(); /* connect()/accept() timeouts */ if (err == -ETIMEDOUT) { + sk_pair = err; if (!fault(TIMEOUT)) - test_fail("timed out for accept()"); + test_fail("%s: timed out for accept()", tst_name); + } else if (err == -EKEYREJECTED) { + if (!fault(KEYREJECT)) + test_fail("%s: key was rejected", tst_name); } else if (err < 0) { - test_error("test_wait_fd()"); + test_error("test_skpair_wait_poll()"); } else { if (fault(TIMEOUT)) - test_fail("ready to accept"); + test_fail("%s: ready to accept", tst_name); sk = accept(lsk, NULL, NULL); if (sk < 0) { @@ -88,8 +93,8 @@ static void try_accept(const char *tst_name, unsigned int port, } } - if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt2)) - test_error("test_get_tcp_ao_counters()"); + if (ao_addr && test_get_tcp_counters(lsk, &cnt2)) + test_error("test_get_tcp_counters()"); close(lsk); if (!cnt_name) { @@ -100,18 +105,18 @@ static void try_accept(const char *tst_name, unsigned int port, after_cnt = netstat_get_one(cnt_name, NULL); if (after_cnt <= before_cnt) { - test_fail("%s: %s counter did not increase: %zu <= %zu", + test_fail("%s: %s counter did not increase: %" PRIu64 " <= %" PRIu64, tst_name, cnt_name, after_cnt, before_cnt); } else { - test_ok("%s: counter %s increased %zu => %zu", + test_ok("%s: counter %s increased %" PRIu64 " => %" PRIu64, tst_name, cnt_name, before_cnt, after_cnt); } if (ao_addr) - test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + test_assert_counters(tst_name, &cnt1, &cnt2, cnt_expected); out: synchronize_threads(); /* test_kill_sk() */ - if (sk > 0) + if (sk >= 0) test_kill_sk(sk); } @@ -152,78 +157,82 @@ static void *server_fn(void *arg) server_add_routes(); - try_accept("AO server (INADDR_ANY): AO client", port++, NULL, 0, + try_accept("[server] AO server (INADDR_ANY): AO client", port++, NULL, 0, &addr_any, 0, 0, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0); - try_accept("AO server (INADDR_ANY): MD5 client", port++, NULL, 0, + try_accept("[server] AO server (INADDR_ANY): MD5 client", port++, NULL, 0, &addr_any, 0, 0, 100, 100, 0, "TCPMD5Unexpected", - 0, 1, FAULT_TIMEOUT); - try_accept("AO server (INADDR_ANY): no sign client", port++, NULL, 0, + TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT); + try_accept("[server] AO server (INADDR_ANY): no sign client", port++, NULL, 0, &addr_any, 0, 0, 100, 100, 0, "TCPAORequired", TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT); - try_accept("AO server (AO_REQUIRED): AO client", port++, NULL, 0, + try_accept("[server] AO server (AO_REQUIRED): AO client", port++, NULL, 0, &this_ip_dest, TEST_PREFIX, true, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0); - try_accept("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, + try_accept("[server] AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, &this_ip_dest, TEST_PREFIX, true, 100, 100, 0, "TCPAORequired", TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT); - try_accept("MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0, + try_accept("[server] MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0, NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound", - 0, 1, FAULT_TIMEOUT); - try_accept("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, + TEST_CNT_NS_KEY_NOT_FOUND, 1, FAULT_TIMEOUT); + try_accept("[server] MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0); - try_accept("MD5 server (INADDR_ANY): no sign client", port++, &addr_any, + try_accept("[server] MD5 server (INADDR_ANY): no sign client", port++, &addr_any, 0, NULL, 0, 0, 0, 0, 0, "TCPMD5NotFound", - 0, 1, FAULT_TIMEOUT); + TEST_CNT_NS_MD5_NOT_FOUND, 1, FAULT_TIMEOUT); - try_accept("no sign server: AO client", port++, NULL, 0, + try_accept("[server] no sign server: AO client", port++, NULL, 0, NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound", - TEST_CNT_AO_KEY_NOT_FOUND, 0, FAULT_TIMEOUT); - try_accept("no sign server: MD5 client", port++, NULL, 0, + TEST_CNT_NS_KEY_NOT_FOUND, 0, FAULT_TIMEOUT); + try_accept("[server] no sign server: MD5 client", port++, NULL, 0, NULL, 0, 0, 0, 0, 0, "TCPMD5Unexpected", - 0, 1, FAULT_TIMEOUT); - try_accept("no sign server: no sign client", port++, NULL, 0, + TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT); + try_accept("[server] no sign server: no sign client", port++, NULL, 0, NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0); - try_accept("AO+MD5 server: AO client (matching)", port++, + try_accept("[server] AO+MD5 server: AO client (matching)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 1, 0); - try_accept("AO+MD5 server: AO client (misconfig, matching MD5)", port++, + try_accept("[server] AO+MD5 server: AO client (misconfig, matching MD5)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: AO client (misconfig, non-matching)", port++, + try_accept("[server] AO+MD5 server: AO client (misconfig, non-matching)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: MD5 client (matching)", port++, + try_accept("[server] AO+MD5 server: MD5 client (matching)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, NULL, 0, 1, 0); - try_accept("AO+MD5 server: MD5 client (misconfig, matching AO)", port++, + try_accept("[server] AO+MD5 server: MD5 client (misconfig, matching AO)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: MD5 client (misconfig, non-matching)", port++, + 100, 100, 0, "TCPMD5Unexpected", + TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT); + try_accept("[server] AO+MD5 server: MD5 client (misconfig, non-matching)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: no sign client (unmatched)", port++, + 100, 100, 0, "TCPMD5Unexpected", + TEST_CNT_NS_MD5_UNEXPECTED, 1, FAULT_TIMEOUT); + try_accept("[server] AO+MD5 server: no sign client (unmatched)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "CurrEstab", 0, 1, 0); - try_accept("AO+MD5 server: no sign client (misconfig, matching AO)", + try_accept("[server] AO+MD5 server: no sign client (misconfig, matching AO)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, 100, 100, 0, "TCPAORequired", TEST_CNT_AO_REQUIRED, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: no sign client (misconfig, matching MD5)", + try_accept("[server] AO+MD5 server: no sign client (misconfig, matching MD5)", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, "TCPMD5NotFound", 0, 1, FAULT_TIMEOUT); + 100, 100, 0, "TCPMD5NotFound", + TEST_CNT_NS_MD5_NOT_FOUND, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys", + /* Key rejected by the other side, failing short through skpair */ + try_accept("[server] AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT); - try_accept("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys", + 100, 100, 0, NULL, 0, 1, FAULT_KEYREJECT); + try_accept("[server] AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys", port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, - 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT); + 100, 100, 0, NULL, 0, 1, FAULT_KEYREJECT); server_add_fail_tests(&port); @@ -258,7 +267,6 @@ static void try_connect(const char *tst_name, unsigned int port, uint8_t sndid, uint8_t rcvid, uint8_t vrf, fault_t inj, int needs_tcp_md5, union tcp_addr *bind_addr) { - time_t timeout; int sk, ret; if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) @@ -280,10 +288,10 @@ static void try_connect(const char *tst_name, unsigned int port, synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - ret = _test_connect_socket(sk, this_ip_dest, port, timeout); - + ret = test_skpair_connect_poll(sk, this_ip_dest, port, 0, &sk_pair); + synchronize_threads(); /* connect()/accept() timeouts */ if (ret < 0) { + sk_pair = ret; if (fault(KEYREJECT) && ret == -EKEYREJECTED) test_ok("%s: connect() was prevented", tst_name); else if (ret == -ETIMEDOUT && fault(TIMEOUT)) @@ -303,8 +311,7 @@ static void try_connect(const char *tst_name, unsigned int port, out: synchronize_threads(); /* test_kill_sk() */ - /* _test_connect_socket() cleans up on failure */ - if (ret > 0) + if (ret > 0) /* test_skpair_connect_poll() cleans up on failure */ test_kill_sk(sk); } @@ -435,7 +442,6 @@ static void try_to_add(const char *tst_name, unsigned int port, int ao_vrf, uint8_t sndid, uint8_t rcvid, int needs_tcp_md5, fault_t inj) { - time_t timeout; int sk, ret; if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) @@ -448,10 +454,10 @@ static void try_to_add(const char *tst_name, unsigned int port, synchronize_threads(); /* preparations done */ - timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; - ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + ret = test_skpair_connect_poll(sk, this_ip_dest, port, 0, &sk_pair); - if (ret <= 0) { + synchronize_threads(); /* connect()/accept() timeouts */ + if (ret < 0) { test_error("%s: connect() returned %d", tst_name, ret); goto out; } @@ -487,8 +493,7 @@ static void try_to_add(const char *tst_name, unsigned int port, out: synchronize_threads(); /* test_kill_sk() */ - /* _test_connect_socket() cleans up on failure */ - if (ret > 0) + if (ret > 0) /* test_skpair_connect_poll() cleans up on failure */ test_kill_sk(sk); } @@ -671,24 +676,38 @@ static void *client_fn(void *arg) try_connect("AO server (INADDR_ANY): AO client", port++, NULL, 0, &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO server (INADDR_ANY): MD5 client", port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_AO_REQUIRED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO server (INADDR_ANY): unsigned client", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr); try_connect("AO server (AO_REQUIRED): AO client", port++, NULL, 0, &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_AO_REQUIRED, client2, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &client2); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("MD5 server (INADDR_ANY): AO client", port++, NULL, 0, &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); try_connect("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_MD5_REQUIRED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("MD5 server (INADDR_ANY): no sign client", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("no sign server: AO client", port++, NULL, 0, &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("no sign server: MD5 client", port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); try_connect("no sign server: no sign client", port++, NULL, 0, @@ -696,25 +715,37 @@ static void *client_fn(void *arg) try_connect("AO+MD5 server: AO client (matching)", port++, NULL, 0, &addr_any, 0, 100, 100, 0, 0, 1, &client2); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, this_ip_addr, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("AO+MD5 server: AO client (misconfig, matching MD5)", port++, NULL, 0, &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + trace_ao_event_expect(TCP_AO_KEY_NOT_FOUND, client3, this_ip_dest, + -1, port, 0, 0, 1, 0, 0, 0, 100, 100, -1); try_connect("AO+MD5 server: AO client (misconfig, non-matching)", port++, NULL, 0, &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &client3); try_connect("AO+MD5 server: MD5 client (matching)", port++, &addr_any, 0, NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr); + trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, client2, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO+MD5 server: MD5 client (misconfig, matching AO)", port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &client2); + trace_hash_event_expect(TCP_HASH_MD5_UNEXPECTED, client3, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO+MD5 server: MD5 client (misconfig, non-matching)", port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &client3); try_connect("AO+MD5 server: no sign client (unmatched)", port++, NULL, 0, NULL, 0, 100, 100, 0, 0, 1, &client3); + trace_hash_event_expect(TCP_HASH_AO_REQUIRED, client2, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO+MD5 server: no sign client (misconfig, matching AO)", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &client2); + trace_hash_event_expect(TCP_HASH_MD5_REQUIRED, this_ip_addr, + this_ip_dest, -1, port, 0, 0, 1, 0, 0, 0); try_connect("AO+MD5 server: no sign client (misconfig, matching MD5)", port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); @@ -736,6 +767,6 @@ static void *client_fn(void *arg) int main(int argc, char *argv[]) { - test_init(72, server_fn, client_fn); + test_init(73, server_fn, client_fn); return 0; } diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c index c1cb0c75156a..4b3f9b5e50fe 100644 --- a/tools/testing/selftests/net/tcp_fastopen_backup_key.c +++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c @@ -26,7 +26,7 @@ #include <fcntl.h> #include <time.h> -#include "../kselftest.h" +#include "kselftest.h" #ifndef TCP_FASTOPEN_KEY #define TCP_FASTOPEN_KEY 33 diff --git a/tools/testing/selftests/net/tcp_port_share.c b/tools/testing/selftests/net/tcp_port_share.c new file mode 100644 index 000000000000..6146b62610df --- /dev/null +++ b/tools/testing/selftests/net/tcp_port_share.c @@ -0,0 +1,258 @@ +// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause +// Copyright (c) 2025 Cloudflare, Inc. + +/* Tests for TCP port sharing (bind bucket reuse). */ + +#include <arpa/inet.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <fcntl.h> +#include <sched.h> +#include <stdlib.h> + +#include "kselftest_harness.h" + +#define DST_PORT 30000 +#define SRC_PORT 40000 + +struct sockaddr_inet { + union { + struct sockaddr_storage ss; + struct sockaddr_in6 v6; + struct sockaddr_in v4; + struct sockaddr sa; + }; + socklen_t len; + char str[INET6_ADDRSTRLEN + __builtin_strlen("[]:65535") + 1]; +}; + +const int one = 1; + +static int disconnect(int fd) +{ + return connect(fd, &(struct sockaddr){ AF_UNSPEC }, sizeof(struct sockaddr)); +} + +static int getsockname_port(int fd) +{ + struct sockaddr_inet addr = {}; + int err; + + addr.len = sizeof(addr); + err = getsockname(fd, &addr.sa, &addr.len); + if (err) + return -1; + + switch (addr.sa.sa_family) { + case AF_INET: + return ntohs(addr.v4.sin_port); + case AF_INET6: + return ntohs(addr.v6.sin6_port); + default: + errno = EAFNOSUPPORT; + return -1; + } +} + +static void make_inet_addr(int af, const char *ip, __u16 port, + struct sockaddr_inet *addr) +{ + const char *fmt = ""; + + memset(addr, 0, sizeof(*addr)); + + switch (af) { + case AF_INET: + addr->len = sizeof(addr->v4); + addr->v4.sin_family = af; + addr->v4.sin_port = htons(port); + inet_pton(af, ip, &addr->v4.sin_addr); + fmt = "%s:%hu"; + break; + case AF_INET6: + addr->len = sizeof(addr->v6); + addr->v6.sin6_family = af; + addr->v6.sin6_port = htons(port); + inet_pton(af, ip, &addr->v6.sin6_addr); + fmt = "[%s]:%hu"; + break; + } + + snprintf(addr->str, sizeof(addr->str), fmt, ip, port); +} + +FIXTURE(tcp_port_share) {}; + +FIXTURE_VARIANT(tcp_port_share) { + int domain; + /* IP to listen on and connect to */ + const char *dst_ip; + /* Primary IP to connect from */ + const char *src1_ip; + /* Secondary IP to connect from */ + const char *src2_ip; + /* IP to bind to in order to block the source port */ + const char *bind_ip; +}; + +FIXTURE_VARIANT_ADD(tcp_port_share, ipv4) { + .domain = AF_INET, + .dst_ip = "127.0.0.1", + .src1_ip = "127.1.1.1", + .src2_ip = "127.2.2.2", + .bind_ip = "127.3.3.3", +}; + +FIXTURE_VARIANT_ADD(tcp_port_share, ipv6) { + .domain = AF_INET6, + .dst_ip = "::1", + .src1_ip = "2001:db8::1", + .src2_ip = "2001:db8::2", + .bind_ip = "2001:db8::3", +}; + +FIXTURE_SETUP(tcp_port_share) +{ + int sc; + + ASSERT_EQ(unshare(CLONE_NEWNET), 0); + ASSERT_EQ(system("ip link set dev lo up"), 0); + ASSERT_EQ(system("ip addr add dev lo 2001:db8::1/32 nodad"), 0); + ASSERT_EQ(system("ip addr add dev lo 2001:db8::2/32 nodad"), 0); + ASSERT_EQ(system("ip addr add dev lo 2001:db8::3/32 nodad"), 0); + + sc = open("/proc/sys/net/ipv4/ip_local_port_range", O_WRONLY); + ASSERT_GE(sc, 0); + ASSERT_GT(dprintf(sc, "%hu %hu\n", SRC_PORT, SRC_PORT), 0); + ASSERT_EQ(close(sc), 0); +} + +FIXTURE_TEARDOWN(tcp_port_share) {} + +/* Verify that an ephemeral port becomes available again after the socket + * bound to it and blocking it from reuse is closed. + */ +TEST_F(tcp_port_share, can_reuse_port_after_bind_and_close) +{ + const typeof(variant) v = variant; + struct sockaddr_inet addr; + int c1, c2, ln, pb; + + /* Listen on <dst_ip>:<DST_PORT> */ + ln = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(ln, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(ln, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(bind(ln, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + ASSERT_EQ(listen(ln, 2), 0); + + /* Connect from <src1_ip>:<SRC_PORT> */ + c1 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c1, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(c1, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src1_ip, 0, &addr); + ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(connect(c1, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str); + ASSERT_EQ(getsockname_port(c1), SRC_PORT); + + /* Bind to <bind_ip>:<SRC_PORT>. Block the port from reuse. */ + pb = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(pb, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(pb, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr); + ASSERT_EQ(bind(pb, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + /* Try to connect from <src2_ip>:<SRC_PORT>. Expect failure. */ + c2 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c2, 0) TH_LOG("socket"); + ASSERT_EQ(setsockopt(c2, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src2_ip, 0, &addr); + ASSERT_EQ(bind(c2, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(connect(c2, &addr.sa, addr.len), -1) TH_LOG("connect(%s)", addr.str); + ASSERT_EQ(errno, EADDRNOTAVAIL) TH_LOG("%m"); + + /* Unbind from <bind_ip>:<SRC_PORT>. Unblock the port for reuse. */ + ASSERT_EQ(close(pb), 0); + + /* Connect again from <src2_ip>:<SRC_PORT> */ + EXPECT_EQ(connect(c2, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str); + EXPECT_EQ(getsockname_port(c2), SRC_PORT); + + ASSERT_EQ(close(c2), 0); + ASSERT_EQ(close(c1), 0); + ASSERT_EQ(close(ln), 0); +} + +/* Verify that a socket auto-bound during connect() blocks port reuse after + * disconnect (connect(AF_UNSPEC)) followed by an explicit port bind(). + */ +TEST_F(tcp_port_share, port_block_after_disconnect) +{ + const typeof(variant) v = variant; + struct sockaddr_inet addr; + int c1, c2, ln, pb; + + /* Listen on <dst_ip>:<DST_PORT> */ + ln = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(ln, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(ln, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(bind(ln, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + ASSERT_EQ(listen(ln, 2), 0); + + /* Connect from <src1_ip>:<SRC_PORT> */ + c1 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c1, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(c1, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src1_ip, 0, &addr); + ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + ASSERT_EQ(connect(c1, &addr.sa, addr.len), 0) TH_LOG("connect(%s): %m", addr.str); + ASSERT_EQ(getsockname_port(c1), SRC_PORT); + + /* Disconnect the socket and bind it to <bind_ip>:<SRC_PORT> to block the port */ + ASSERT_EQ(disconnect(c1), 0) TH_LOG("disconnect: %m"); + ASSERT_EQ(setsockopt(c1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr); + ASSERT_EQ(bind(c1, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + /* Trigger port-addr bucket state update with another bind() and close() */ + pb = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(pb, 0) TH_LOG("socket(): %m"); + ASSERT_EQ(setsockopt(pb, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->bind_ip, SRC_PORT, &addr); + ASSERT_EQ(bind(pb, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + ASSERT_EQ(close(pb), 0); + + /* Connect from <src2_ip>:<SRC_PORT>. Expect failure. */ + c2 = socket(v->domain, SOCK_STREAM, 0); + ASSERT_GE(c2, 0) TH_LOG("socket: %m"); + ASSERT_EQ(setsockopt(c2, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one)), 0); + + make_inet_addr(v->domain, v->src2_ip, 0, &addr); + ASSERT_EQ(bind(c2, &addr.sa, addr.len), 0) TH_LOG("bind(%s): %m", addr.str); + + make_inet_addr(v->domain, v->dst_ip, DST_PORT, &addr); + EXPECT_EQ(connect(c2, &addr.sa, addr.len), -1) TH_LOG("connect(%s)", addr.str); + EXPECT_EQ(errno, EADDRNOTAVAIL) TH_LOG("%m"); + + ASSERT_EQ(close(c2), 0); + ASSERT_EQ(close(c1), 0); + ASSERT_EQ(close(ln), 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/test_blackhole_dev.sh b/tools/testing/selftests/net/test_blackhole_dev.sh deleted file mode 100755 index 3119b80e711f..000000000000 --- a/tools/testing/selftests/net/test_blackhole_dev.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh -# SPDX-License-Identifier: GPL-2.0 -# Runs blackhole-dev test using blackhole-dev kernel module - -if /sbin/modprobe -q test_blackhole_dev ; then - /sbin/modprobe -q -r test_blackhole_dev; - echo "test_blackhole_dev: ok"; -else - echo "test_blackhole_dev: [FAIL]"; - exit 1; -fi diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh index 1b3f89e2b86e..2a7224fe74f2 100755 --- a/tools/testing/selftests/net/test_bridge_backup_port.sh +++ b/tools/testing/selftests/net/test_bridge_backup_port.sh @@ -315,6 +315,29 @@ backup_port() tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "No forwarding out of vx0" + # Check that packets are forwarded out of vx0 when swp1 is + # administratively down and out of swp1 when it is administratively up + # again. + run_cmd "ip -n $sw1 link set dev swp1 down" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled + log_test $? 0 "swp1 administratively down" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 3 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "Forwarding out of vx0" + + run_cmd "ip -n $sw1 link set dev swp1 up" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding + log_test $? 0 "swp1 administratively up" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "No forwarding out of vx0" + # Remove vx0 as the backup port of swp1 and check that packets are no # longer forwarded out of vx0 when swp1 does not have a carrier. run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port" @@ -322,9 +345,9 @@ backup_port() log_test $? 1 "vx0 not configured as backup port of swp1" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets $sw1 "dev swp1 egress" 101 4 + tc_check_packets $sw1 "dev swp1 egress" 101 5 log_test $? 0 "Forwarding out of swp1" - tc_check_packets $sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" run_cmd "ip -n $sw1 link set dev swp1 carrier off" @@ -332,9 +355,9 @@ backup_port() log_test $? 0 "swp1 carrier off" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets $sw1 "dev swp1 egress" 101 4 + tc_check_packets $sw1 "dev swp1 egress" 101 5 log_test $? 0 "No forwarding out of swp1" - tc_check_packets $sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" } diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh index 02b986c9c247..9067197c9055 100755 --- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh +++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh @@ -51,7 +51,9 @@ ret=0 # All tests in this script. Can be overridden with -t option. TESTS=" neigh_suppress_arp + neigh_suppress_uc_arp neigh_suppress_ns + neigh_suppress_uc_ns neigh_vlan_suppress_arp neigh_vlan_suppress_ns " @@ -388,6 +390,52 @@ neigh_suppress_arp() neigh_suppress_arp_common $vid $sip $tip } +neigh_suppress_uc_arp_common() +{ + local vid=$1; shift + local sip=$1; shift + local tip=$1; shift + local tmac + + echo + echo "Unicast ARP, per-port ARP suppression - VLAN $vid" + echo "-----------------------------------------------" + + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid" + run_cmd "ip -n $sw1 neigh replace $tip lladdr $tmac nud permanent dev br0.$vid" + + run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto arp flower arp_sip $tip arp_op reply action pass" + + run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto arp flower arp_tip $sip arp_op reply action pass" + + run_cmd "ip netns exec $h1 mausezahn eth0.$vid -c 1 -a own -b $tmac -t arp 'request sip=$sip, tip=$tip, tmac=$tmac' -q" + tc_check_packets $h1 "dev eth0.$vid ingress" 101 1 + log_test $? 0 "Unicast ARP, suppression on, h1 filter" + tc_check_packets $h2 "dev eth0.$vid egress" 101 1 + log_test $? 0 "Unicast ARP, suppression on, h2 filter" +} + +neigh_suppress_uc_arp() +{ + local vid=10 + local sip=192.0.2.1 + local tip=192.0.2.2 + + neigh_suppress_uc_arp_common $vid $sip $tip + + vid=20 + sip=192.0.2.17 + tip=192.0.2.18 + neigh_suppress_uc_arp_common $vid $sip $tip +} + neigh_suppress_ns_common() { local vid=$1; shift @@ -494,6 +542,78 @@ neigh_suppress_ns() neigh_suppress_ns_common $vid $saddr $daddr $maddr } +icmpv6_header_get() +{ + local csum=$1; shift + local tip=$1; shift + local type + local p + + # Type 135 (Neighbor Solicitation), hex format + type="87" + p=$(: + )"$type:"$( : ICMPv6.type + )"00:"$( : ICMPv6.code + )"$csum:"$( : ICMPv6.checksum + )"00:00:00:00:"$( : Reserved + )"$tip:"$( : Target Address + ) + echo $p +} + +neigh_suppress_uc_ns_common() +{ + local vid=$1; shift + local sip=$1; shift + local dip=$1; shift + local full_dip=$1; shift + local csum=$1; shift + local tmac + + echo + echo "Unicast NS, per-port NS suppression - VLAN $vid" + echo "---------------------------------------------" + + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid" + run_cmd "ip -n $sw1 -6 neigh replace $dip lladdr $tmac nud permanent dev br0.$vid" + + run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 src_ip $dip type 136 code 0 action pass" + + run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 dst_ip $sip type 136 code 0 action pass" + + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a own -b $tmac -A $sip -B $dip -t ip hop=255,next=58,payload=$(icmpv6_header_get $csum $full_dip) -q" + tc_check_packets $h1 "dev eth0.$vid ingress" 101 1 + log_test $? 0 "Unicast NS, suppression on, h1 filter" + tc_check_packets $h2 "dev eth0.$vid egress" 101 1 + log_test $? 0 "Unicast NS, suppression on, h2 filter" +} + +neigh_suppress_uc_ns() +{ + local vid=10 + local saddr=2001:db8:1::1 + local daddr=2001:db8:1::2 + local full_daddr=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02 + local csum="ef:79" + + neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum + + vid=20 + saddr=2001:db8:2::1 + daddr=2001:db8:2::2 + full_daddr=20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:02 + csum="ef:76" + + neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum +} + neigh_vlan_suppress_arp() { local vid1=10 @@ -825,6 +945,11 @@ if [ ! -x "$(command -v jq)" ]; then exit $ksft_skip fi +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit $ksft_skip +fi + bridge link help 2>&1 | grep -q "neigh_vlan_suppress" if [ $? -ne 0 ]; then echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support" diff --git a/tools/testing/selftests/net/test_neigh.sh b/tools/testing/selftests/net/test_neigh.sh new file mode 100755 index 000000000000..7c594bf6ead0 --- /dev/null +++ b/tools/testing/selftests/net/test_neigh.sh @@ -0,0 +1,366 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +TESTS=" + extern_valid_ipv4 + extern_valid_ipv6 +" +VERBOSE=0 + +################################################################################ +# Utilities + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + stderr= + fi + + out=$(eval "$cmd" "$stderr") + rc=$? + if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +################################################################################ +# Setup + +setup() +{ + set -e + + setup_ns ns1 ns2 + + ip -n "$ns1" link add veth0 type veth peer name veth1 netns "$ns2" + ip -n "$ns1" link set dev veth0 up + ip -n "$ns2" link set dev veth1 up + + ip -n "$ns1" address add 192.0.2.1/24 dev veth0 + ip -n "$ns1" address add 2001:db8:1::1/64 dev veth0 nodad + ip -n "$ns2" address add 192.0.2.2/24 dev veth1 + ip -n "$ns2" address add 2001:db8:1::2/64 dev veth1 nodad + + ip netns exec "$ns1" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + ip netns exec "$ns2" sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 + + sleep 5 + + set +e +} + +exit_cleanup_all() +{ + cleanup_all_ns + exit "${EXIT_STATUS}" +} + +################################################################################ +# Tests + +extern_valid_common() +{ + local af_str=$1; shift + local ip_addr=$1; shift + local tbl_name=$1; shift + local subnet=$1; shift + local mac + + mac=$(ip -n "$ns2" -j link show dev veth1 | jq -r '.[]["address"]') + + RET=0 + + # Check that simple addition works. + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "No \"extern_valid\" flag after addition" + + log_test "$af_str \"extern_valid\" flag: Add entry" + + RET=0 + + # Check that an entry cannot be added with "extern_valid" flag and an + # invalid state. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr nud none dev veth0 extern_valid" + check_fail $? "Managed to add an entry with \"extern_valid\" flag and an invalid state" + + log_test "$af_str \"extern_valid\" flag: Add with an invalid state" + + RET=0 + + # Check that entry cannot be added with both "extern_valid" flag and + # "use" / "managed" flag. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid use" + check_fail $? "Managed to add an entry with \"extern_valid\" flag and \"use\" flag" + + log_test "$af_str \"extern_valid\" flag: Add with \"use\" flag" + + RET=0 + + # Check that "extern_valid" flag can be toggled using replace. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Did not manage to set \"extern_valid\" flag with replace" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_fail $? "Did not manage to clear \"extern_valid\" flag with replace" + + log_test "$af_str \"extern_valid\" flag: Replace entry" + + RET=0 + + # Check that an existing "extern_valid" entry can be marked as + # "managed". + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid managed" + check_err $? "Did not manage to add \"managed\" flag to an existing \"extern_valid\" entry" + + log_test "$af_str \"extern_valid\" flag: Replace entry with \"managed\" flag" + + RET=0 + + # Check that entry cannot be replaced with "extern_valid" flag and an + # invalid state. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr nud none dev veth0 extern_valid" + check_fail $? "Managed to replace an entry with \"extern_valid\" flag and an invalid state" + + log_test "$af_str \"extern_valid\" flag: Replace with an invalid state" + + RET=0 + + # Check that an "extern_valid" entry is flushed when the interface is + # put administratively down. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 link set dev veth0 down" + run_cmd "ip -n $ns1 link set dev veth0 up" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0" + check_fail $? "\"extern_valid\" entry not flushed upon interface down" + + log_test "$af_str \"extern_valid\" flag: Interface down" + + RET=0 + + # Check that an "extern_valid" entry is not flushed when the interface + # loses its carrier. + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns2 link set dev veth1 down" + run_cmd "ip -n $ns2 link set dev veth1 up" + run_cmd "sleep 2" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0" + check_err $? "\"extern_valid\" entry flushed upon carrier down" + + log_test "$af_str \"extern_valid\" flag: Carrier down" + + RET=0 + + # Check that when entry transitions to "reachable" state it maintains + # the "extern_valid" flag. Wait "delay_probe" seconds for ARP request / + # NS to be sent. + local delay_probe + + delay_probe=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["delay_probe"]') + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use" + run_cmd "sleep $((delay_probe / 1000 + 2))" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"REACHABLE\"" + check_err $? "Entry did not transition to \"reachable\" state" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry did not maintain \"extern_valid\" flag after transition to \"reachable\" state" + + log_test "$af_str \"extern_valid\" flag: Transition to \"reachable\" state" + + RET=0 + + # Drop all packets, trigger resolution and check that entry goes back + # to "stale" state instead of "failed". + local mcast_reprobes + local retrans_time + local ucast_probes + local app_probes + local probes + local delay + + run_cmd "ip -n $ns1 neigh flush dev veth0" + run_cmd "tc -n $ns2 qdisc add dev veth1 clsact" + run_cmd "tc -n $ns2 filter add dev veth1 ingress proto all matchall action drop" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh replace $ip_addr lladdr $mac nud stale dev veth0 extern_valid use" + retrans_time=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["retrans"]') + ucast_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["ucast_probes"]') + app_probes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["app_probes"]') + mcast_reprobes=$(ip -n "$ns1" -j ntable show dev veth0 name "$tbl_name" | jq '.[]["mcast_reprobes"]') + delay=$((delay_probe + (ucast_probes + app_probes + mcast_reprobes) * retrans_time)) + run_cmd "sleep $((delay / 1000 + 2))" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"STALE\"" + check_err $? "Entry did not return to \"stale\" state" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry did not maintain \"extern_valid\" flag after returning to \"stale\" state" + probes=$(ip -n "$ns1" -j -s neigh get "$ip_addr" dev veth0 | jq '.[]["probes"]') + if [[ $probes -eq 0 ]]; then + check_err 1 "No probes were sent" + fi + + log_test "$af_str \"extern_valid\" flag: Transition back to \"stale\" state" + + run_cmd "tc -n $ns2 qdisc del dev veth1 clsact" + + RET=0 + + # Forced garbage collection runs whenever the number of entries is + # larger than "thresh3" and deletes stale entries that have not been + # updated in the last 5 seconds. + # + # Check that an "extern_valid" entry survives a forced garbage + # collection. Add an entry, wait 5 seconds and add more entries than + # "thresh3" so that forced garbage collection will run. + # + # Note that the garbage collection thresholds are global resources and + # that changes in the initial namespace affect all the namespaces. + local forced_gc_runs_t0 + local forced_gc_runs_t1 + local orig_thresh1 + local orig_thresh2 + local orig_thresh3 + + run_cmd "ip -n $ns1 neigh flush dev veth0" + orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]') + orig_thresh2=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh2")) | .["thresh2"]') + orig_thresh3=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh3")) | .["thresh3"]') + run_cmd "ip ntable change name $tbl_name thresh3 10 thresh2 9 thresh1 8" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0" + run_cmd "sleep 5" + forced_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]') + for i in {1..20}; do + run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0" + done + forced_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("forced_gc_runs")) | .["forced_gc_runs"]') + if [[ $forced_gc_runs_t1 -eq $forced_gc_runs_t0 ]]; then + check_err 1 "Forced garbage collection did not run" + fi + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry with \"extern_valid\" flag did not survive forced garbage collection" + run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0" + check_fail $? "Entry without \"extern_valid\" flag survived forced garbage collection" + + log_test "$af_str \"extern_valid\" flag: Forced garbage collection" + + run_cmd "ip ntable change name $tbl_name thresh3 $orig_thresh3 thresh2 $orig_thresh2 thresh1 $orig_thresh1" + + RET=0 + + # Periodic garbage collection runs every "base_reachable"/2 seconds and + # if the number of entries is larger than "thresh1", then it deletes + # stale entries that have not been used in the last "gc_stale" seconds. + # + # Check that an "extern_valid" entry survives a periodic garbage + # collection. Add an "extern_valid" entry, add more than "thresh1" + # regular entries, wait "base_reachable" (longer than "gc_stale") + # seconds and check that the "extern_valid" entry was not deleted. + # + # Note that the garbage collection thresholds and "base_reachable" are + # global resources and that changes in the initial namespace affect all + # the namespaces. + local periodic_gc_runs_t0 + local periodic_gc_runs_t1 + local orig_base_reachable + local orig_gc_stale + + run_cmd "ip -n $ns1 neigh flush dev veth0" + orig_thresh1=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["thresh1"]') + orig_base_reachable=$(ip -j ntable show name "$tbl_name" | jq '.[] | select(has("thresh1")) | .["base_reachable"]') + run_cmd "ip ntable change name $tbl_name thresh1 10 base_reachable 10000" + orig_gc_stale=$(ip -n "$ns1" -j ntable show name "$tbl_name" dev veth0 | jq '.[]["gc_stale"]') + run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale 1000" + run_cmd "ip -n $ns1 neigh add $ip_addr lladdr $mac nud stale dev veth0 extern_valid" + run_cmd "ip -n $ns1 neigh add ${subnet}3 lladdr $mac nud stale dev veth0" + # Wait orig_base_reachable/2 for the new interval to take effect. + run_cmd "sleep $(((orig_base_reachable / 1000) / 2 + 2))" + for i in {1..20}; do + run_cmd "ip -n $ns1 neigh add ${subnet}$((i + 4)) nud none dev veth0" + done + periodic_gc_runs_t0=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]') + run_cmd "sleep 10" + periodic_gc_runs_t1=$(ip -j -s ntable show name "$tbl_name" | jq '.[] | select(has("periodic_gc_runs")) | .["periodic_gc_runs"]') + [[ $periodic_gc_runs_t1 -ne $periodic_gc_runs_t0 ]] + check_err $? "Periodic garbage collection did not run" + run_cmd "ip -n $ns1 neigh get $ip_addr dev veth0 | grep \"extern_valid\"" + check_err $? "Entry with \"extern_valid\" flag did not survive periodic garbage collection" + run_cmd "ip -n $ns1 neigh get ${subnet}3 dev veth0" + check_fail $? "Entry without \"extern_valid\" flag survived periodic garbage collection" + + log_test "$af_str \"extern_valid\" flag: Periodic garbage collection" + + run_cmd "ip -n $ns1 ntable change name $tbl_name dev veth0 gc_stale $orig_gc_stale" + run_cmd "ip ntable change name $tbl_name thresh1 $orig_thresh1 base_reachable $orig_base_reachable" +} + +extern_valid_ipv4() +{ + extern_valid_common "IPv4" 192.0.2.2 "arp_cache" 192.0.2. +} + +extern_valid_ipv6() +{ + extern_valid_common "IPv6" 2001:db8:1::2 "ndisc_cache" 2001:db8:1:: +} + +################################################################################ +# Usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +while getopts ":t:pvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$((VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +require_command jq + +if ! ip neigh help 2>&1 | grep -q "extern_valid"; then + echo "SKIP: iproute2 ip too old, missing \"extern_valid\" support" + exit "$ksft_skip" +fi + +trap exit_cleanup_all EXIT + +for t in $TESTS +do + setup; $t; cleanup_all_ns; +done diff --git a/tools/testing/selftests/net/test_so_rcv.sh b/tools/testing/selftests/net/test_so_rcv.sh new file mode 100755 index 000000000000..d8aa4362879d --- /dev/null +++ b/tools/testing/selftests/net/test_so_rcv.sh @@ -0,0 +1,73 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +HOSTS=("127.0.0.1" "::1") +PORT=1234 +TOTAL_TESTS=0 +FAILED_TESTS=0 + +declare -A TESTS=( + ["SO_RCVPRIORITY"]="-P 2" + ["SO_RCVMARK"]="-M 3" +) + +check_result() { + ((TOTAL_TESTS++)) + if [ "$1" -ne 0 ]; then + ((FAILED_TESTS++)) + fi +} + +cleanup() +{ + cleanup_ns $NS +} + +trap cleanup EXIT + +setup_ns NS + +for HOST in "${HOSTS[@]}"; do + PROTOCOL="IPv4" + if [[ "$HOST" == "::1" ]]; then + PROTOCOL="IPv6" + fi + for test_name in "${!TESTS[@]}"; do + echo "Running $test_name test, $PROTOCOL" + arg=${TESTS[$test_name]} + + ip netns exec $NS ./so_rcv_listener $arg $HOST $PORT & + LISTENER_PID=$! + + sleep 0.5 + + if ! ip netns exec $NS ./cmsg_sender $arg $HOST $PORT; then + echo "Sender failed for $test_name, $PROTOCOL" + kill "$LISTENER_PID" 2>/dev/null + wait "$LISTENER_PID" + check_result 1 + continue + fi + + wait "$LISTENER_PID" + LISTENER_EXIT_CODE=$? + + if [ "$LISTENER_EXIT_CODE" -eq 0 ]; then + echo "Rcv test OK for $test_name, $PROTOCOL" + check_result 0 + else + echo "Rcv test FAILED for $test_name, $PROTOCOL" + check_result 1 + fi + done +done + +if [ "$FAILED_TESTS" -ne 0 ]; then + echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed" + exit ${KSFT_FAIL} +else + echo "OK - All $TOTAL_TESTS tests passed" + exit ${KSFT_PASS} +fi diff --git a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh index 2d442cdab11e..8b414d0edada 100755 --- a/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh +++ b/tools/testing/selftests/net/test_vxlan_fdb_changelink.sh @@ -1,29 +1,114 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# Check FDB default-remote handling across "ip link set". +ALL_TESTS=" + test_set_remote + test_change_mc_remote +" +source lib.sh check_remotes() { local what=$1; shift local N=$(bridge fdb sh dev vx | grep 00:00:00:00:00:00 | wc -l) - echo -ne "expected two remotes after $what\t" - if [[ $N != 2 ]]; then - echo "[FAIL]" - EXIT_STATUS=1 + ((N == 2)) + check_err $? "expected 2 remotes after $what, got $N" +} + +# Check FDB default-remote handling across "ip link set". +test_set_remote() +{ + RET=0 + + adf_ip_link_add vx up type vxlan id 2000 dstport 4789 + bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent + bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent + check_remotes "fdb append" + + ip link set dev vx type vxlan remote 192.0.2.30 + check_remotes "link set" + + log_test 'FDB default-remote handling across "ip link set"' +} + +fmt_remote() +{ + local addr=$1; shift + + if [[ $addr == 224.* ]]; then + echo "group $addr" else - echo "[ OK ]" + echo "remote $addr" fi } -ip link add name vx up type vxlan id 2000 dstport 4789 -bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.20 self permanent -bridge fdb ap dev vx 00:00:00:00:00:00 dst 192.0.2.30 self permanent -check_remotes "fdb append" +change_remote() +{ + local remote=$1; shift + + ip link set dev vx type vxlan $(fmt_remote $remote) dev v1 +} + +check_membership() +{ + local check_vec=("$@") + + local memberships + memberships=$( + netstat -n --groups | + sed -n '/^v1\b/p' | + grep -o '[^ ]*$' + ) + check_err $? "Couldn't obtain group memberships" + + local item + for item in "${check_vec[@]}"; do + eval "local $item" + echo "$memberships" | grep -q "\b$group\b" + check_err_fail $fail $? "$group is_ex reported in IGMP query response" + done +} + +test_change_mc_remote() +{ + check_command netstat || return + + adf_ip_link_add v1 up type veth peer name v2 + adf_ip_link_set_up v2 + + RET=0 + + adf_ip_link_add vx up type vxlan dstport 4789 \ + local 192.0.2.1 $(fmt_remote 224.1.1.1) dev v1 vni 1000 + + check_membership "group=224.1.1.1 fail=0" \ + "group=224.1.1.2 fail=1" \ + "group=224.1.1.3 fail=1" + + log_test "MC group report after VXLAN creation" + + RET=0 + + change_remote 224.1.1.2 + check_membership "group=224.1.1.1 fail=1" \ + "group=224.1.1.2 fail=0" \ + "group=224.1.1.3 fail=1" + + log_test "MC group report after changing VXLAN remote MC->MC" + + RET=0 + + change_remote 192.0.2.2 + check_membership "group=224.1.1.1 fail=1" \ + "group=224.1.1.2 fail=1" \ + "group=224.1.1.3 fail=1" + + log_test "MC group report after changing VXLAN remote MC->UC" +} + +trap defer_scopes_cleanup EXIT -ip link set dev vx type vxlan remote 192.0.2.30 -check_remotes "link set" +tests_run -ip link del dev vx exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/test_vxlan_nh.sh b/tools/testing/selftests/net/test_vxlan_nh.sh new file mode 100755 index 000000000000..20f3369f776b --- /dev/null +++ b/tools/testing/selftests/net/test_vxlan_nh.sh @@ -0,0 +1,223 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh +TESTS=" + basic_tx_ipv4 + basic_tx_ipv6 + learning + proxy_ipv4 + proxy_ipv6 +" +VERBOSE=0 + +################################################################################ +# Utilities + +run_cmd() +{ + local cmd="$1" + local out + local stderr="2>/dev/null" + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + stderr= + fi + + out=$(eval "$cmd" "$stderr") + rc=$? + if [ "$VERBOSE" -eq 1 ] && [ -n "$out" ]; then + echo " $out" + fi + + return $rc +} + +################################################################################ +# Cleanup + +exit_cleanup_all() +{ + cleanup_all_ns + exit "${EXIT_STATUS}" +} + +################################################################################ +# Tests + +nh_stats_get() +{ + ip -n "$ns1" -s -j nexthop show id 10 | jq ".[][\"group_stats\"][][\"packets\"]" +} + +tc_stats_get() +{ + tc_rule_handle_stats_get "dev dummy1 egress" 101 ".packets" "-n $ns1" +} + +basic_tx_common() +{ + local af_str=$1; shift + local proto=$1; shift + local local_addr=$1; shift + local plen=$1; shift + local remote_addr=$1; shift + + RET=0 + + # Test basic Tx functionality. Check that stats are incremented on + # both the FDB nexthop group and the egress device. + + run_cmd "ip -n $ns1 link add name dummy1 up type dummy" + run_cmd "ip -n $ns1 route add $remote_addr/$plen dev dummy1" + run_cmd "tc -n $ns1 qdisc add dev dummy1 clsact" + run_cmd "tc -n $ns1 filter add dev dummy1 egress proto $proto pref 1 handle 101 flower ip_proto udp dst_ip $remote_addr dst_port 4789 action pass" + + run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789" + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a own -b 00:11:22:33:44:55 -c 1 -q" + + busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" nh_stats_get > /dev/null + check_err $? "FDB nexthop group stats did not increase" + + busywait "$BUSYWAIT_TIMEOUT" until_counter_is "== 1" tc_stats_get > /dev/null + check_err $? "tc filter stats did not increase" + + log_test "VXLAN FDB nexthop: $af_str basic Tx" +} + +basic_tx_ipv4() +{ + basic_tx_common "IPv4" ipv4 192.0.2.1 32 192.0.2.2 +} + +basic_tx_ipv6() +{ + basic_tx_common "IPv6" ipv6 2001:db8:1::1 128 2001:db8:1::2 +} + +learning() +{ + RET=0 + + # When learning is enabled on the VXLAN device, an incoming packet + # might try to refresh an FDB entry that points to an FDB nexthop group + # instead of an ordinary remote destination. Check that the kernel does + # not crash in this situation. + + run_cmd "ip -n $ns1 address add 192.0.2.1/32 dev lo" + run_cmd "ip -n $ns1 address add 192.0.2.2/32 dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via 192.0.2.3 fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local 192.0.2.1 dstport 12345 localbypass" + run_cmd "ip -n $ns1 link add name vx1 up type vxlan id 10020 local 192.0.2.2 dstport 54321 learning" + + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static dst 192.0.2.2 port 54321 vni 10020" + run_cmd "bridge -n $ns1 fdb add 00:aa:bb:cc:dd:ee dev vx1 self static nhid 10" + + run_cmd "ip netns exec $ns1 mausezahn vx0 -a 00:aa:bb:cc:dd:ee -b 00:11:22:33:44:55 -c 1 -q" + + log_test "VXLAN FDB nexthop: learning" +} + +proxy_common() +{ + local af_str=$1; shift + local local_addr=$1; shift + local plen=$1; shift + local remote_addr=$1; shift + local neigh_addr=$1; shift + local ping_cmd=$1; shift + + RET=0 + + # When the "proxy" option is enabled on the VXLAN device, the device + # will suppress ARP requests and IPv6 Neighbor Solicitation messages if + # it is able to reply on behalf of the remote host. That is, if a + # matching and valid neighbor entry is configured on the VXLAN device + # whose MAC address is not behind the "any" remote (0.0.0.0 / ::). The + # FDB entry for the neighbor's MAC address might point to an FDB + # nexthop group instead of an ordinary remote destination. Check that + # the kernel does not crash in this situation. + + run_cmd "ip -n $ns1 address add $local_addr/$plen dev lo" + + run_cmd "ip -n $ns1 nexthop add id 1 via $remote_addr fdb" + run_cmd "ip -n $ns1 nexthop add id 10 group 1 fdb" + + run_cmd "ip -n $ns1 link add name vx0 up type vxlan id 10010 local $local_addr dstport 4789 proxy" + + run_cmd "ip -n $ns1 neigh add $neigh_addr lladdr 00:11:22:33:44:55 nud perm dev vx0" + + run_cmd "bridge -n $ns1 fdb add 00:11:22:33:44:55 dev vx0 self static nhid 10" + + run_cmd "ip netns exec $ns1 $ping_cmd" + + log_test "VXLAN FDB nexthop: $af_str proxy" +} + +proxy_ipv4() +{ + proxy_common "IPv4" 192.0.2.1 32 192.0.2.2 192.0.2.3 \ + "arping -b -c 1 -s 192.0.2.1 -I vx0 192.0.2.3" +} + +proxy_ipv6() +{ + proxy_common "IPv6" 2001:db8:1::1 128 2001:db8:1::2 2001:db8:1::3 \ + "ndisc6 -r 1 -s 2001:db8:1::1 -w 1 2001:db8:1::3 vx0" +} + +################################################################################ +# Usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) + -p Pause on fail + -v Verbose mode (show commands and output) +EOF +} + +################################################################################ +# Main + +while getopts ":t:pvh" opt; do + case $opt in + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=$((VERBOSE + 1));; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +require_command mausezahn +require_command arping +require_command ndisc6 +require_command jq + +if ! ip nexthop help 2>&1 | grep -q "stats"; then + echo "SKIP: iproute2 ip too old, missing nexthop stats support" + exit "$ksft_skip" +fi + +trap exit_cleanup_all EXIT + +for t in $TESTS +do + setup_ns ns1; $t; cleanup_all_ns; +done diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh index 6127a78ee988..8deacc565afa 100755 --- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh +++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh @@ -146,18 +146,17 @@ run_cmd() } check_hv_connectivity() { - ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null - sleep 1 - ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null + slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null + slowwait 5 ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null return $? } check_vm_connectivity() { - run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12" + slowwait 5 run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12" log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)" - run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22" + slowwait 5 run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22" log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)" } diff --git a/tools/testing/selftests/net/tfo.c b/tools/testing/selftests/net/tfo.c new file mode 100644 index 000000000000..eb3cac5e583c --- /dev/null +++ b/tools/testing/selftests/net/tfo.c @@ -0,0 +1,171 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <error.h> +#include <fcntl.h> +#include <limits.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <sys/socket.h> +#include <netinet/tcp.h> +#include <errno.h> + +static int cfg_server; +static int cfg_client; +static int cfg_port = 8000; +static struct sockaddr_in6 cfg_addr; +static char *cfg_outfile; + +static int parse_address(const char *str, int port, struct sockaddr_in6 *sin6) +{ + int ret; + + sin6->sin6_family = AF_INET6; + sin6->sin6_port = htons(port); + + ret = inet_pton(sin6->sin6_family, str, &sin6->sin6_addr); + if (ret != 1) { + /* fallback to plain IPv4 */ + ret = inet_pton(AF_INET, str, &sin6->sin6_addr.s6_addr32[3]); + if (ret != 1) + return -1; + + /* add ::ffff prefix */ + sin6->sin6_addr.s6_addr32[0] = 0; + sin6->sin6_addr.s6_addr32[1] = 0; + sin6->sin6_addr.s6_addr16[4] = 0; + sin6->sin6_addr.s6_addr16[5] = 0xffff; + } + + return 0; +} + +static void run_server(void) +{ + unsigned long qlen = 32; + int fd, opt, connfd; + socklen_t len; + char buf[64]; + FILE *outfile; + + outfile = fopen(cfg_outfile, "w"); + if (!outfile) + error(1, errno, "fopen() outfile"); + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, errno, "socket()"); + + opt = 1; + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) + error(1, errno, "setsockopt(SO_REUSEADDR)"); + + if (setsockopt(fd, SOL_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) < 0) + error(1, errno, "setsockopt(TCP_FASTOPEN)"); + + if (bind(fd, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)) < 0) + error(1, errno, "bind()"); + + if (listen(fd, 5) < 0) + error(1, errno, "listen()"); + + len = sizeof(cfg_addr); + connfd = accept(fd, (struct sockaddr *)&cfg_addr, &len); + if (connfd < 0) + error(1, errno, "accept()"); + + len = sizeof(opt); + if (getsockopt(connfd, SOL_SOCKET, SO_INCOMING_NAPI_ID, &opt, &len) < 0) + error(1, errno, "getsockopt(SO_INCOMING_NAPI_ID)"); + + read(connfd, buf, 64); + fprintf(outfile, "%d\n", opt); + + fclose(outfile); + close(connfd); + close(fd); +} + +static void run_client(void) +{ + int fd; + char *msg = "Hello, world!"; + + fd = socket(AF_INET6, SOCK_STREAM, 0); + if (fd == -1) + error(1, errno, "socket()"); + + sendto(fd, msg, strlen(msg), MSG_FASTOPEN, (struct sockaddr *)&cfg_addr, sizeof(cfg_addr)); + + close(fd); +} + +static void usage(const char *filepath) +{ + error(1, 0, "Usage: %s (-s|-c) -h<server_ip> -p<port> -o<outfile> ", filepath); +} + +static void parse_opts(int argc, char **argv) +{ + struct sockaddr_in6 *addr6 = (void *) &cfg_addr; + char *addr = NULL; + int ret; + int c; + + if (argc <= 1) + usage(argv[0]); + + while ((c = getopt(argc, argv, "sch:p:o:")) != -1) { + switch (c) { + case 's': + if (cfg_client) + error(1, 0, "Pass one of -s or -c"); + cfg_server = 1; + break; + case 'c': + if (cfg_server) + error(1, 0, "Pass one of -s or -c"); + cfg_client = 1; + break; + case 'h': + addr = optarg; + break; + case 'p': + cfg_port = strtoul(optarg, NULL, 0); + break; + case 'o': + cfg_outfile = strdup(optarg); + if (!cfg_outfile) + error(1, 0, "outfile invalid"); + break; + } + } + + if (cfg_server && addr) + error(1, 0, "Server cannot have -h specified"); + + memset(addr6, 0, sizeof(*addr6)); + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(cfg_port); + addr6->sin6_addr = in6addr_any; + if (addr) { + ret = parse_address(addr, cfg_port, addr6); + if (ret) + error(1, 0, "Client address parse error: %s", addr); + } +} + +int main(int argc, char **argv) +{ + parse_opts(argc, argv); + + if (cfg_server) + run_server(); + else if (cfg_client) + run_client(); + + return 0; +} diff --git a/tools/testing/selftests/net/tfo_passive.sh b/tools/testing/selftests/net/tfo_passive.sh new file mode 100755 index 000000000000..a4550511830a --- /dev/null +++ b/tools/testing/selftests/net/tfo_passive.sh @@ -0,0 +1,112 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +source lib.sh + +NSIM_SV_ID=$((256 + RANDOM % 256)) +NSIM_SV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_SV_ID +NSIM_CL_ID=$((512 + RANDOM % 256)) +NSIM_CL_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_CL_ID + +NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device +NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device +NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device +NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device + +SERVER_IP=192.168.1.1 +CLIENT_IP=192.168.1.2 +SERVER_PORT=48675 + +setup_ns() +{ + set -e + ip netns add nssv + ip netns add nscl + + NSIM_SV_NAME=$(find $NSIM_SV_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_SV_SYS/net -exec basename {} \;) + NSIM_CL_NAME=$(find $NSIM_CL_SYS/net -maxdepth 1 -type d ! \ + -path $NSIM_CL_SYS/net -exec basename {} \;) + + ip link set $NSIM_SV_NAME netns nssv + ip link set $NSIM_CL_NAME netns nscl + + ip netns exec nssv ip addr add "${SERVER_IP}/24" dev $NSIM_SV_NAME + ip netns exec nscl ip addr add "${CLIENT_IP}/24" dev $NSIM_CL_NAME + + ip netns exec nssv ip link set dev $NSIM_SV_NAME up + ip netns exec nscl ip link set dev $NSIM_CL_NAME up + + # Enable passive TFO + ip netns exec nssv sysctl -w net.ipv4.tcp_fastopen=519 > /dev/null + + set +e +} + +cleanup_ns() +{ + ip netns del nscl + ip netns del nssv +} + +### +### Code start +### + +modprobe netdevsim + +# linking + +echo $NSIM_SV_ID > $NSIM_DEV_SYS_NEW +echo $NSIM_CL_ID > $NSIM_DEV_SYS_NEW +udevadm settle + +setup_ns + +NSIM_SV_FD=$((256 + RANDOM % 256)) +exec {NSIM_SV_FD}</var/run/netns/nssv +NSIM_SV_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_SV_NAME/ifindex) + +NSIM_CL_FD=$((256 + RANDOM % 256)) +exec {NSIM_CL_FD}</var/run/netns/nscl +NSIM_CL_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_CL_NAME/ifindex) + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX $NSIM_CL_FD:$NSIM_CL_IFIDX" > \ + $NSIM_DEV_SYS_LINK + +if [ $? -ne 0 ]; then + echo "linking netdevsim1 with netdevsim2 should succeed" + cleanup_ns + exit 1 +fi + +out_file=$(mktemp) + +timeout -k 1s 30s ip netns exec nssv ./tfo \ + -s \ + -p ${SERVER_PORT} \ + -o ${out_file}& + +wait_local_port_listen nssv ${SERVER_PORT} tcp + +ip netns exec nscl ./tfo -c -h ${SERVER_IP} -p ${SERVER_PORT} + +wait + +res=$(cat $out_file) +rm $out_file + +if [ "$res" = "0" ]; then + echo "got invalid NAPI ID from passive TFO socket" + cleanup_ns + exit 1 +fi + +echo "$NSIM_SV_FD:$NSIM_SV_IFIDX" > $NSIM_DEV_SYS_UNLINK + +echo $NSIM_CL_ID > $NSIM_DEV_SYS_DEL + +cleanup_ns + +modprobe -r netdevsim + +exit 0 diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index f27a12d2a2c9..a3ef4b57eb5f 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -21,7 +21,7 @@ #include <sys/socket.h> #include <sys/stat.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" #define TLS_PAYLOAD_MAX_LEN 16384 #define SOL_TLS 282 @@ -44,9 +44,11 @@ struct tls_crypto_info_keys { }; static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type, - struct tls_crypto_info_keys *tls12) + struct tls_crypto_info_keys *tls12, + char key_generation) { - memset(tls12, 0, sizeof(*tls12)); + memset(tls12, key_generation, sizeof(*tls12)); + memset(tls12, 0, sizeof(struct tls_crypto_info)); switch (cipher_type) { case TLS_CIPHER_CHACHA20_POLY1305: @@ -179,13 +181,12 @@ static int tls_send_cmsg(int fd, unsigned char record_type, return sendmsg(fd, &msg, flags); } -static int tls_recv_cmsg(struct __test_metadata *_metadata, - int fd, unsigned char record_type, - void *data, size_t len, int flags) +static int __tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char *ctype, + void *data, size_t len, int flags) { char cbuf[CMSG_SPACE(sizeof(char))]; struct cmsghdr *cmsg; - unsigned char ctype; struct msghdr msg; struct iovec vec; int n; @@ -204,7 +205,20 @@ static int tls_recv_cmsg(struct __test_metadata *_metadata, EXPECT_NE(cmsg, NULL); EXPECT_EQ(cmsg->cmsg_level, SOL_TLS); EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE); - ctype = *((unsigned char *)CMSG_DATA(cmsg)); + if (ctype) + *ctype = *((unsigned char *)CMSG_DATA(cmsg)); + + return n; +} + +static int tls_recv_cmsg(struct __test_metadata *_metadata, + int fd, unsigned char record_type, + void *data, size_t len, int flags) +{ + unsigned char ctype; + int n; + + n = __tls_recv_cmsg(_metadata, fd, &ctype, data, len, flags); EXPECT_EQ(ctype, record_type); return n; @@ -266,6 +280,25 @@ TEST_F(tls_basic, bad_cipher) EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1); } +TEST_F(tls_basic, recseq_wrap) +{ + struct tls_crypto_info_keys tls12; + char const *test_str = "test_read"; + int send_len = 10; + + if (self->notls) + SKIP(return, "no TLS support"); + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0); + memset(&tls12.aes128.rec_seq, 0xff, sizeof(tls12.aes128.rec_seq)); + + ASSERT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + ASSERT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + EXPECT_EQ(send(self->fd, test_str, send_len, 0), -1); + EXPECT_EQ(errno, EBADMSG); +} + FIXTURE(tls) { int fd, cfd; @@ -372,7 +405,7 @@ FIXTURE_SETUP(tls) SKIP(return, "Unsupported cipher in FIPS mode"); tls_crypto_info_init(variant->tls_version, variant->cipher_type, - &tls12); + &tls12, 0); ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); @@ -406,6 +439,8 @@ TEST_F(tls, sendfile) EXPECT_GE(filefd, 0); fstat(filefd, &st); EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0); + + close(filefd); } TEST_F(tls, send_then_sendfile) @@ -427,6 +462,9 @@ TEST_F(tls, send_then_sendfile) EXPECT_GE(sendfile(self->fd, filefd, 0, st.st_size), 0); EXPECT_EQ(recv(self->cfd, buf, st.st_size, MSG_WAITALL), st.st_size); + + free(buf); + close(filefd); } static void chunked_sendfile(struct __test_metadata *_metadata, @@ -526,6 +564,40 @@ TEST_F(tls, msg_more) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +TEST_F(tls, cmsg_msg_more) +{ + char *test_str = "test_read"; + char record_type = 100; + int send_len = 10; + + /* we don't allow MSG_MORE with non-DATA records */ + EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len, + MSG_MORE), -1); + EXPECT_EQ(errno, EINVAL); +} + +TEST_F(tls, msg_more_then_cmsg) +{ + char *test_str = "test_read"; + char record_type = 100; + int send_len = 10; + char buf[10 * 2]; + int ret; + + EXPECT_EQ(send(self->fd, test_str, send_len, MSG_MORE), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1); + + ret = tls_send_cmsg(self->fd, record_type, test_str, send_len, 0); + EXPECT_EQ(ret, send_len); + + /* initial DATA record didn't get merged with the non-DATA record */ + EXPECT_EQ(recv(self->cfd, buf, send_len * 2, 0), send_len); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type, + buf, sizeof(buf), MSG_WAITALL), + send_len); +} + TEST_F(tls, msg_more_unsent) { char const *test_str = "test_read"; @@ -874,6 +946,37 @@ TEST_F(tls, peek_and_splice) EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } +#define MAX_FRAGS 48 +TEST_F(tls, splice_short) +{ + struct iovec sendchar_iov; + char read_buf[0x10000]; + char sendbuf[0x100]; + char sendchar = 'S'; + int pipefds[2]; + int i; + + sendchar_iov.iov_base = &sendchar; + sendchar_iov.iov_len = 1; + + memset(sendbuf, 's', sizeof(sendbuf)); + + ASSERT_GE(pipe2(pipefds, O_NONBLOCK), 0); + ASSERT_GE(fcntl(pipefds[0], F_SETPIPE_SZ, (MAX_FRAGS + 1) * 0x1000), 0); + + for (i = 0; i < MAX_FRAGS; i++) + ASSERT_GE(vmsplice(pipefds[1], &sendchar_iov, 1, 0), 0); + + ASSERT_EQ(write(pipefds[1], sendbuf, sizeof(sendbuf)), sizeof(sendbuf)); + + EXPECT_EQ(splice(pipefds[0], NULL, self->fd, NULL, MAX_FRAGS + 0x1000, 0), + MAX_FRAGS + sizeof(sendbuf)); + EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), 0), MAX_FRAGS + sizeof(sendbuf)); + EXPECT_EQ(recv(self->cfd, read_buf, sizeof(read_buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EAGAIN); +} +#undef MAX_FRAGS + TEST_F(tls, recvmsg_single) { char const *test_str = "test_recvmsg_single"; @@ -1156,7 +1259,7 @@ TEST_F(tls, bidir) struct tls_crypto_info_keys tls12; tls_crypto_info_init(variant->tls_version, variant->cipher_type, - &tls12); + &tls12, 0); ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12, tls12.len); @@ -1595,7 +1698,7 @@ TEST_F(tls, getsockopt) EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type); /* get the full crypto_info */ - tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect, 0); len = expect.len; memrnd(&get, sizeof(get)); EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0); @@ -1649,6 +1752,778 @@ TEST_F(tls, recv_efault) EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0); } +#define TLS_RECORD_TYPE_HANDSHAKE 0x16 +/* key_update, length 1, update_not_requested */ +static const char key_update_msg[] = "\x18\x00\x00\x01\x00"; +static void tls_send_keyupdate(struct __test_metadata *_metadata, int fd) +{ + size_t len = sizeof(key_update_msg); + + EXPECT_EQ(tls_send_cmsg(fd, TLS_RECORD_TYPE_HANDSHAKE, + (char *)key_update_msg, len, 0), + len); +} + +static void tls_recv_keyupdate(struct __test_metadata *_metadata, int fd, int flags) +{ + char buf[100]; + + EXPECT_EQ(tls_recv_cmsg(_metadata, fd, TLS_RECORD_TYPE_HANDSHAKE, buf, sizeof(buf), flags), + sizeof(key_update_msg)); + EXPECT_EQ(memcmp(buf, key_update_msg, sizeof(key_update_msg)), 0); +} + +/* set the key to 0 then 1 for RX, immediately to 1 for TX */ +TEST_F(tls_basic, rekey_rx) +{ + struct tls_crypto_info_keys tls12_0, tls12_1; + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_0, 0); + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_1, 1); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_0, tls12_0.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len); + EXPECT_EQ(ret, 0); + + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str, send_len), 0); +} + +/* set the key to 0 then 1 for TX, immediately to 1 for RX */ +TEST_F(tls_basic, rekey_tx) +{ + struct tls_crypto_info_keys tls12_0, tls12_1; + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_0, 0); + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &tls12_1, 1); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_0, tls12_0.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len); + EXPECT_EQ(ret, 0); + + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str, send_len), 0); +} + +TEST_F(tls_basic, disconnect) +{ + char const *test_str = "test_message"; + int send_len = strlen(test_str) + 1; + struct tls_crypto_info_keys key; + struct sockaddr_in addr; + char buf[20]; + int ret; + + if (self->notls) + return; + + tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128, + &key, 0); + + ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &key, key.len); + ASSERT_EQ(ret, 0); + + /* Pre-queue the data so that setsockopt parses it but doesn't + * dequeue it from the TCP socket. recvmsg would dequeue. + */ + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &key, key.len); + ASSERT_EQ(ret, 0); + + addr.sin_family = AF_UNSPEC; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + ret = connect(self->cfd, &addr, sizeof(addr)); + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno, EOPNOTSUPP); + + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); +} + +TEST_F(tls, rekey) +{ + char const *test_str_1 = "test_message_before_rekey"; + char const *test_str_2 = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* initial send/recv */ + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* send after rekey */ + send_len = strlen(test_str_2) + 1; + EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* recv blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* recv non-blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + /* recv after rekey */ + EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0); +} + +TEST_F(tls, rekey_fail) +{ + char const *test_str_1 = "test_message_before_rekey"; + char const *test_str_2 = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + /* initial send/recv */ + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + + if (variant->tls_version != TLS_1_3_VERSION) { + /* just check that rekey is not supported and return */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EBUSY); + return; + } + + /* successful update */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* invalid update: change of version */ + tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* invalid update (RX socket): change of version */ + tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* invalid update: change of cipher */ + if (variant->cipher_type == TLS_CIPHER_AES_GCM_256) + tls_crypto_info_init(variant->tls_version, TLS_CIPHER_CHACHA20_POLY1305, &tls12, 1); + else + tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_256, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1); + EXPECT_EQ(errno, EINVAL); + + /* send after rekey, the invalid updates shouldn't have an effect */ + send_len = strlen(test_str_2) + 1; + EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* recv blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* recv non-blocking -> -EKEYEXPIRED */ + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + /* recv after rekey */ + EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1); + EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0); +} + +TEST_F(tls, rekey_peek) +{ + char const *test_str_1 = "test_message_before_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + /* can't receive the KeyUpdate without a control message */ + EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), -1); + + /* peek KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); +} + +TEST_F(tls, splice_rekey) +{ + int send_len = TLS_PAYLOAD_MAX_LEN / 2; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + struct tls_crypto_info_keys tls12; + int p[2]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + memrnd(mem_send, sizeof(mem_send)); + + ASSERT_GE(pipe(p), 0); + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); + + /* can't splice the KeyUpdate */ + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1); + EXPECT_EQ(errno, EINVAL); + + /* peek KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* can't splice before updating the key */ + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1); + EXPECT_EQ(errno, EKEYEXPIRED); + + /* update RX key */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); +} + +TEST_F(tls, rekey_peek_splice) +{ + char const *test_str_1 = "test_message_before_rekey"; + struct tls_crypto_info_keys tls12; + int send_len; + char buf[100]; + char mem_recv[TLS_PAYLOAD_MAX_LEN]; + int p[2]; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + ASSERT_GE(pipe(p), 0); + + send_len = strlen(test_str_1) + 1; + EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len); + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0); + + EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len); + EXPECT_EQ(read(p[0], mem_recv, send_len), send_len); + EXPECT_EQ(memcmp(mem_recv, test_str_1, send_len), 0); +} + +TEST_F(tls, rekey_getsockopt) +{ + struct tls_crypto_info_keys tls12; + struct tls_crypto_info_keys tls12_get; + socklen_t len; + + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + tls_recv_keyupdate(_metadata, self->cfd, 0); + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); + + len = tls12.len; + EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0); + EXPECT_EQ(len, tls12.len); + EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0); +} + +TEST_F(tls, rekey_poll_pending) +{ + char const *test_str = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + struct pollfd pfd = { }; + int send_len; + int ret; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + /* send immediately after rekey */ + send_len = strlen(test_str) + 1; + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + /* key hasn't been updated, expect cfd to be non-readable */ + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 0), 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + int pid2, status; + + /* wait before installing the new key */ + sleep(1); + + /* update RX key while poll() is sleeping */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + pid2 = wait(&status); + EXPECT_EQ(pid2, ret); + EXPECT_EQ(status, 0); + } else { + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 5000), 1); + + exit(!__test_passed(_metadata)); + } +} + +TEST_F(tls, rekey_poll_delay) +{ + char const *test_str = "test_message_after_rekey"; + struct tls_crypto_info_keys tls12; + struct pollfd pfd = { }; + int send_len; + int ret; + + if (variant->tls_version != TLS_1_3_VERSION) + return; + + /* update TX key */ + tls_send_keyupdate(_metadata, self->fd); + tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1); + EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0); + + /* get KeyUpdate */ + tls_recv_keyupdate(_metadata, self->cfd, 0); + + ret = fork(); + ASSERT_GE(ret, 0); + + if (ret) { + int pid2, status; + + /* wait before installing the new key */ + sleep(1); + + /* update RX key while poll() is sleeping */ + EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0); + + sleep(1); + send_len = strlen(test_str) + 1; + EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); + + pid2 = wait(&status); + EXPECT_EQ(pid2, ret); + EXPECT_EQ(status, 0); + } else { + pfd.fd = self->cfd; + pfd.events = POLLIN; + EXPECT_EQ(poll(&pfd, 1, 5000), 1); + exit(!__test_passed(_metadata)); + } +} + +struct raw_rec { + unsigned int plain_len; + unsigned char plain_data[100]; + unsigned int cipher_len; + unsigned char cipher_data[128]; +}; + +/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: 'Hello world' */ +static const struct raw_rec id0_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x26, 0xa2, 0x33, + 0xde, 0x8d, 0x94, 0xf0, 0x29, 0x6c, 0xb1, 0xaf, + 0x6a, 0x75, 0xb2, 0x93, 0xad, 0x45, 0xd5, 0xfd, + 0x03, 0x51, 0x57, 0x8f, 0xf9, 0xcc, 0x3b, 0x42, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:0, plaintext: '' */ +static const struct raw_rec id0_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x13, 0x38, 0x7b, + 0xa6, 0x1c, 0xdd, 0xa7, 0x19, 0x33, 0xab, 0xae, + 0x88, 0xe1, 0xd2, 0x08, 0x4f, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:0, plaintext: '' */ +static const struct raw_rec id0_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0xc5, 0x37, 0x90, + 0x70, 0x45, 0x89, 0xfb, 0x5c, 0xc7, 0x89, 0x03, + 0x68, 0x80, 0xd3, 0xd8, 0xcc, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: 'Hello world' */ +static const struct raw_rec id1_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x3a, 0x1a, 0x9c, + 0xd0, 0xa8, 0x9a, 0xd6, 0x69, 0xd6, 0x1a, 0xe3, + 0xb5, 0x1f, 0x0d, 0x2c, 0xe2, 0x97, 0x46, 0xff, + 0x2b, 0xcc, 0x5a, 0xc4, 0xa3, 0xb9, 0xef, 0xba, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:1, plaintext: '' */ +static const struct raw_rec id1_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0x3e, 0xf0, 0xfe, + 0xee, 0xd9, 0xe2, 0x5d, 0xc7, 0x11, 0x4c, 0xe6, + 0xb4, 0x7e, 0xef, 0x40, 0x2b, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:1, plaintext: '' */ +static const struct raw_rec id1_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x01, 0xce, 0xfc, 0x86, + 0xc8, 0xf0, 0x55, 0xf9, 0x47, 0x3f, 0x74, 0xdc, + 0xc9, 0xbf, 0xfe, 0x5b, 0xb1, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: 'Hello world' */ +static const struct raw_rec id2_ctrl_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19, + 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87, + 0x2a, 0x04, 0x11, 0x3d, 0xf8, 0x64, 0x5f, 0x36, + 0x8b, 0xa8, 0xee, 0x4c, 0x6d, 0x62, 0xa5, 0x00, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: 'Hello world' */ +static const struct raw_rec id2_data_l11 = { + .plain_len = 11, + .plain_data = { + 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, + 0x72, 0x6c, 0x64, + }, + .cipher_len = 40, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x23, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xe5, 0x3d, 0x19, + 0x3d, 0xca, 0xb8, 0x16, 0xb6, 0xff, 0x79, 0x87, + 0x8e, 0xa1, 0xd0, 0xcd, 0x33, 0xb5, 0x86, 0x2b, + 0x17, 0xf1, 0x52, 0x2a, 0x55, 0x62, 0x65, 0x11, + }, +}; + +/* TLS 1.2, AES_CCM, ctrl, seqno:2, plaintext: '' */ +static const struct raw_rec id2_ctrl_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x16, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xdc, 0x5c, 0x0e, + 0x41, 0xdd, 0xba, 0xd3, 0xcc, 0xcf, 0x6d, 0xd9, + 0x06, 0xdb, 0x79, 0xe5, 0x5d, + }, +}; + +/* TLS 1.2, AES_CCM, data, seqno:2, plaintext: '' */ +static const struct raw_rec id2_data_l0 = { + .plain_len = 0, + .plain_data = { + }, + .cipher_len = 29, + .cipher_data = { + 0x17, 0x03, 0x03, 0x00, 0x18, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0xc3, 0xca, 0x26, + 0x22, 0xe4, 0x25, 0xfb, 0x5f, 0x6d, 0xbf, 0x83, + 0x30, 0x48, 0x69, 0x1a, 0x47, + }, +}; + +FIXTURE(zero_len) +{ + int fd, cfd; + bool notls; +}; + +FIXTURE_VARIANT(zero_len) +{ + const struct raw_rec *recs[4]; + ssize_t recv_ret[4]; +}; + +FIXTURE_VARIANT_ADD(zero_len, data_data_data) +{ + .recs = { &id0_data_l11, &id1_data_l11, &id2_data_l11, }, + .recv_ret = { 33, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, data_0ctrl_data) +{ + .recs = { &id0_data_l11, &id1_ctrl_l0, &id2_data_l11, }, + .recv_ret = { 11, 0, 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0data) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l0, }, + .recv_ret = { -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_ctrl) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l11, }, + .recv_ret = { 0, 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_0ctrl) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_ctrl_l0, }, + .recv_ret = { 0, 0, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0ctrl_0ctrl_0ctrl) +{ + .recs = { &id0_ctrl_l0, &id1_ctrl_l0, &id2_ctrl_l0, }, + .recv_ret = { 0, 0, 0, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, 0data_0data_data) +{ + .recs = { &id0_data_l0, &id1_data_l0, &id2_data_l11, }, + .recv_ret = { 11, -EAGAIN, }, +}; + +FIXTURE_VARIANT_ADD(zero_len, data_0data_0data) +{ + .recs = { &id0_data_l11, &id1_data_l0, &id2_data_l0, }, + .recv_ret = { 11, -EAGAIN, }, +}; + +FIXTURE_SETUP(zero_len) +{ + struct tls_crypto_info_keys tls12; + int ret; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, + &tls12, 0); + + ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); + if (self->notls) + return; + + /* Don't install keys on fd, we'll send raw records */ + ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); +} + +FIXTURE_TEARDOWN(zero_len) +{ + close(self->fd); + close(self->cfd); +} + +TEST_F(zero_len, test) +{ + const struct raw_rec *const *rec; + unsigned char buf[128]; + int rec_off; + int i; + + for (i = 0; i < 4 && variant->recs[i]; i++) + EXPECT_EQ(send(self->fd, variant->recs[i]->cipher_data, + variant->recs[i]->cipher_len, 0), + variant->recs[i]->cipher_len); + + rec = &variant->recs[0]; + rec_off = 0; + for (i = 0; i < 4; i++) { + int j, ret; + + ret = variant->recv_ret[i] >= 0 ? variant->recv_ret[i] : -1; + EXPECT_EQ(__tls_recv_cmsg(_metadata, self->cfd, NULL, + buf, sizeof(buf), MSG_DONTWAIT), ret); + if (ret == -1) + EXPECT_EQ(errno, -variant->recv_ret[i]); + if (variant->recv_ret[i] == -EAGAIN) + break; + + for (j = 0; j < ret; j++) { + while (rec_off == (*rec)->plain_len) { + rec++; + rec_off = 0; + } + EXPECT_EQ(buf[j], (*rec)->plain_data[rec_off]); + rec_off++; + } + } +}; + FIXTURE(tls_err) { int fd, cfd; @@ -1677,7 +2552,7 @@ FIXTURE_SETUP(tls_err) int ret; tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128, - &tls12); + &tls12, 0); ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls); ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls); @@ -1965,6 +2840,163 @@ TEST_F(tls_err, poll_partial_rec_async) } } +/* Use OOB+large send to trigger copy mode due to memory pressure. + * OOB causes a short read. + */ +TEST_F(tls_err, oob_pressure) +{ + char buf[1<<16]; + int i; + + memrnd(buf, sizeof(buf)); + + EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5); + EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf)); + for (i = 0; i < 64; i++) + EXPECT_EQ(send(self->fd2, buf, 5, MSG_OOB), 5); +} + +/* + * Parse a stream of TLS records and ensure that each record respects + * the specified @max_payload_len. + */ +static size_t parse_tls_records(struct __test_metadata *_metadata, + const __u8 *rx_buf, int rx_len, int overhead, + __u16 max_payload_len) +{ + const __u8 *rec = rx_buf; + size_t total_plaintext_rx = 0; + const __u8 rec_header_len = 5; + + while (rec < rx_buf + rx_len) { + __u16 record_payload_len; + __u16 plaintext_len; + + /* Sanity check that it's a TLS header for application data */ + ASSERT_EQ(rec[0], 23); + ASSERT_EQ(rec[1], 0x3); + ASSERT_EQ(rec[2], 0x3); + + memcpy(&record_payload_len, rec + 3, 2); + record_payload_len = ntohs(record_payload_len); + ASSERT_GE(record_payload_len, overhead); + + plaintext_len = record_payload_len - overhead; + total_plaintext_rx += plaintext_len; + + /* Plaintext must not exceed the specified limit */ + ASSERT_LE(plaintext_len, max_payload_len); + rec += rec_header_len + record_payload_len; + } + + return total_plaintext_rx; +} + +TEST(tls_12_tx_max_payload_len) +{ + struct tls_crypto_info_keys tls12; + int cfd, ret, fd, overhead; + size_t total_plaintext_rx = 0; + __u8 tx[1024], rx[2000]; + __u16 limit = 128; + __u16 opt = 0; + unsigned int optlen = sizeof(opt); + bool notls; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, + &tls12, 0); + + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); + + if (notls) + exit(KSFT_SKIP); + + /* Don't install keys on fd, we'll parse raw records */ + ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit, + sizeof(limit)); + ASSERT_EQ(ret, 0); + + ret = getsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &opt, &optlen); + EXPECT_EQ(ret, 0); + EXPECT_EQ(limit, opt); + EXPECT_EQ(optlen, sizeof(limit)); + + memset(tx, 0, sizeof(tx)); + ASSERT_EQ(send(cfd, tx, sizeof(tx), 0), sizeof(tx)); + close(cfd); + + ret = recv(fd, rx, sizeof(rx), 0); + + /* + * 16B tag + 8B IV -- record header (5B) is not counted but we'll + * need it to walk the record stream + */ + overhead = 16 + 8; + total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead, + limit); + + ASSERT_EQ(total_plaintext_rx, sizeof(tx)); + close(fd); +} + +TEST(tls_12_tx_max_payload_len_open_rec) +{ + struct tls_crypto_info_keys tls12; + int cfd, ret, fd, overhead; + size_t total_plaintext_rx = 0; + __u8 tx[1024], rx[2000]; + __u16 tx_partial = 256; + __u16 og_limit = 512, limit = 128; + bool notls; + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_CCM_128, + &tls12, 0); + + ulp_sock_pair(_metadata, &fd, &cfd, ¬ls); + + if (notls) + exit(KSFT_SKIP); + + /* Don't install keys on fd, we'll parse raw records */ + ret = setsockopt(cfd, SOL_TLS, TLS_TX, &tls12, tls12.len); + ASSERT_EQ(ret, 0); + + ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &og_limit, + sizeof(og_limit)); + ASSERT_EQ(ret, 0); + + memset(tx, 0, sizeof(tx)); + ASSERT_EQ(send(cfd, tx, tx_partial, MSG_MORE), tx_partial); + + /* + * Changing the payload limit with a pending open record should + * not be allowed. + */ + ret = setsockopt(cfd, SOL_TLS, TLS_TX_MAX_PAYLOAD_LEN, &limit, + sizeof(limit)); + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, EBUSY); + + ASSERT_EQ(send(cfd, tx + tx_partial, sizeof(tx) - tx_partial, MSG_EOR), + sizeof(tx) - tx_partial); + close(cfd); + + ret = recv(fd, rx, sizeof(rx), 0); + + /* + * 16B tag + 8B IV -- record header (5B) is not counted but we'll + * need it to walk the record stream + */ + overhead = 16 + 8; + total_plaintext_rx = parse_tls_records(_metadata, rx, ret, overhead, + og_limit); + ASSERT_EQ(total_plaintext_rx, sizeof(tx)); + close(fd); +} + TEST(non_established) { struct tls12_crypto_info_aes_gcm_256 tls12; struct sockaddr_in addr; @@ -2099,7 +3131,7 @@ TEST(tls_v6ops) { int sfd, ret, fd; socklen_t len, len2; - tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12); + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0); addr.sin6_family = AF_INET6; addr.sin6_addr = in6addr_any; @@ -2158,7 +3190,7 @@ TEST(prequeue) { len = sizeof(addr); memrnd(buf, sizeof(buf)); - tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12); + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12, 0); addr.sin_family = AF_INET; addr.sin_addr.s_addr = htonl(INADDR_ANY); @@ -2193,6 +3225,67 @@ TEST(prequeue) { close(cfd); } +TEST(data_steal) { + struct tls_crypto_info_keys tls; + char buf[20000], buf2[20000]; + struct sockaddr_in addr; + int sfd, cfd, ret, fd; + int pid, status; + socklen_t len; + + len = sizeof(addr); + memrnd(buf, sizeof(buf)); + + tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls, 0); + + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = 0; + + fd = socket(AF_INET, SOCK_STREAM, 0); + sfd = socket(AF_INET, SOCK_STREAM, 0); + + ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0); + ASSERT_EQ(listen(sfd, 10), 0); + ASSERT_EQ(getsockname(sfd, &addr, &len), 0); + ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0); + ASSERT_GE(cfd = accept(sfd, &addr, &len), 0); + close(sfd); + + ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")); + if (ret) { + ASSERT_EQ(errno, ENOENT); + SKIP(return, "no TLS support"); + } + ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0); + + /* Spawn a child and get it into the read wait path of the underlying + * TCP socket. + */ + pid = fork(); + ASSERT_GE(pid, 0); + if (!pid) { + EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2, MSG_WAITALL), + sizeof(buf) / 2); + exit(!__test_passed(_metadata)); + } + + usleep(10000); + ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0); + ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0); + + EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf)); + EXPECT_EQ(wait(&status), pid); + EXPECT_EQ(status, 0); + EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1); + /* Don't check errno, the error will be different depending + * on what random bytes TLS interpreted as the record length. + */ + + close(fd); + close(cfd); +} + static void __attribute__((constructor)) fips_check(void) { int res; FILE *f; diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c deleted file mode 100644 index 9ba03164d73a..000000000000 --- a/tools/testing/selftests/net/toeplitz.c +++ /dev/null @@ -1,589 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Toeplitz test - * - * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3 - * 2. Compute the rx_hash in software based on the packet contents - * 3. Compare the two - * - * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given. - * - * If '-C $rx_irq_cpu_list' is given, also - * - * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU - * 5. Compute the rxqueue that RSS would select based on this rx_hash - * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq - * 7. Compare the cpus from 4 and 6 - * - * Else if '-r $rps_bitmap' is given, also - * - * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU - * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap - * 6. Compare the cpus from 4 and 5 - */ - -#define _GNU_SOURCE - -#include <arpa/inet.h> -#include <errno.h> -#include <error.h> -#include <fcntl.h> -#include <getopt.h> -#include <linux/filter.h> -#include <linux/if_ether.h> -#include <linux/if_packet.h> -#include <net/if.h> -#include <netdb.h> -#include <netinet/ip.h> -#include <netinet/ip6.h> -#include <netinet/tcp.h> -#include <netinet/udp.h> -#include <poll.h> -#include <stdbool.h> -#include <stddef.h> -#include <stdint.h> -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <sys/mman.h> -#include <sys/socket.h> -#include <sys/stat.h> -#include <sys/sysinfo.h> -#include <sys/time.h> -#include <sys/types.h> -#include <unistd.h> - -#include "../kselftest.h" - -#define TOEPLITZ_KEY_MIN_LEN 40 -#define TOEPLITZ_KEY_MAX_LEN 60 - -#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */ -#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN) -#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN) - -#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2)) - -#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */ - -#define RPS_MAX_CPUS 16UL /* must be a power of 2 */ - -/* configuration options (cmdline arguments) */ -static uint16_t cfg_dport = 8000; -static int cfg_family = AF_INET6; -static char *cfg_ifname = "eth0"; -static int cfg_num_queues; -static int cfg_num_rps_cpus; -static bool cfg_sink; -static int cfg_type = SOCK_STREAM; -static int cfg_timeout_msec = 1000; -static bool cfg_verbose; - -/* global vars */ -static int num_cpus; -static int ring_block_nr; -static int ring_block_sz; - -/* stats */ -static int frames_received; -static int frames_nohash; -static int frames_error; - -#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0) - -/* tpacket ring */ -struct ring_state { - int fd; - char *mmap; - int idx; - int cpu; -}; - -static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */ -static int rps_silo_to_cpu[RPS_MAX_CPUS]; -static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN]; -static struct ring_state rings[RSS_MAX_CPUS]; - -static inline uint32_t toeplitz(const unsigned char *four_tuple, - const unsigned char *key) -{ - int i, bit, ret = 0; - uint32_t key32; - - key32 = ntohl(*((uint32_t *)key)); - key += 4; - - for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) { - for (bit = 7; bit >= 0; bit--) { - if (four_tuple[i] & (1 << bit)) - ret ^= key32; - - key32 <<= 1; - key32 |= !!(key[0] & (1 << bit)); - } - key++; - } - - return ret; -} - -/* Compare computed cpu with arrival cpu from packet_fanout_cpu */ -static void verify_rss(uint32_t rx_hash, int cpu) -{ - int queue = rx_hash % cfg_num_queues; - - log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]); - if (rx_irq_cpus[queue] != cpu) { - log_verbose(". error: rss cpu mismatch (%d)", cpu); - frames_error++; - } -} - -static void verify_rps(uint64_t rx_hash, int cpu) -{ - int silo = (rx_hash * cfg_num_rps_cpus) >> 32; - - log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]); - if (rps_silo_to_cpu[silo] != cpu) { - log_verbose(". error: rps cpu mismatch (%d)", cpu); - frames_error++; - } -} - -static void log_rxhash(int cpu, uint32_t rx_hash, - const char *addrs, int addr_len) -{ - char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN]; - uint16_t *ports; - - if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) || - !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr))) - error(1, 0, "address parse error"); - - ports = (void *)addrs + (addr_len * 2); - log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]", - cpu, rx_hash, saddr, daddr, - ntohs(ports[0]), ntohs(ports[1])); -} - -/* Compare computed rxhash with rxhash received from tpacket_v3 */ -static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu) -{ - unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0}; - uint32_t rx_hash_sw; - const char *addrs; - int addr_len; - - if (cfg_family == AF_INET) { - addr_len = sizeof(struct in_addr); - addrs = pkt + offsetof(struct iphdr, saddr); - } else { - addr_len = sizeof(struct in6_addr); - addrs = pkt + offsetof(struct ip6_hdr, ip6_src); - } - - memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2)); - rx_hash_sw = toeplitz(four_tuple, toeplitz_key); - - if (cfg_verbose) - log_rxhash(cpu, rx_hash, addrs, addr_len); - - if (rx_hash != rx_hash_sw) { - log_verbose(" != expected 0x%x\n", rx_hash_sw); - frames_error++; - return; - } - - log_verbose(" OK"); - if (cfg_num_queues) - verify_rss(rx_hash, cpu); - else if (cfg_num_rps_cpus) - verify_rps(rx_hash, cpu); - log_verbose("\n"); -} - -static char *recv_frame(const struct ring_state *ring, char *frame) -{ - struct tpacket3_hdr *hdr = (void *)frame; - - if (hdr->hv1.tp_rxhash) - verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash, - ring->cpu); - else - frames_nohash++; - - return frame + hdr->tp_next_offset; -} - -/* A single TPACKET_V3 block can hold multiple frames */ -static bool recv_block(struct ring_state *ring) -{ - struct tpacket_block_desc *block; - char *frame; - int i; - - block = (void *)(ring->mmap + ring->idx * ring_block_sz); - if (!(block->hdr.bh1.block_status & TP_STATUS_USER)) - return false; - - frame = (char *)block; - frame += block->hdr.bh1.offset_to_first_pkt; - - for (i = 0; i < block->hdr.bh1.num_pkts; i++) { - frame = recv_frame(ring, frame); - frames_received++; - } - - block->hdr.bh1.block_status = TP_STATUS_KERNEL; - ring->idx = (ring->idx + 1) % ring_block_nr; - - return true; -} - -/* simple test: sleep once unconditionally and then process all rings */ -static void process_rings(void) -{ - int i; - - usleep(1000 * cfg_timeout_msec); - - for (i = 0; i < num_cpus; i++) - do {} while (recv_block(&rings[i])); - - fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n", - frames_received - frames_nohash - frames_error, - frames_nohash, frames_error); -} - -static char *setup_ring(int fd) -{ - struct tpacket_req3 req3 = {0}; - void *ring; - - req3.tp_retire_blk_tov = cfg_timeout_msec / 8; - req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH; - - req3.tp_frame_size = 2048; - req3.tp_frame_nr = 1 << 10; - req3.tp_block_nr = 16; - - req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr; - req3.tp_block_size /= req3.tp_block_nr; - - if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3))) - error(1, errno, "setsockopt PACKET_RX_RING"); - - ring_block_sz = req3.tp_block_size; - ring_block_nr = req3.tp_block_nr; - - ring = mmap(0, req3.tp_block_size * req3.tp_block_nr, - PROT_READ | PROT_WRITE, - MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0); - if (ring == MAP_FAILED) - error(1, 0, "mmap failed"); - - return ring; -} - -static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport) -{ - struct sock_filter filter[] = { - BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4), - BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2), - BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0), - BPF_STMT(BPF_RET + BPF_K, 0), - BPF_STMT(BPF_RET + BPF_K, 0xFFFF), - }; - struct sock_fprog prog = {}; - - prog.filter = filter; - prog.len = ARRAY_SIZE(filter); - if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) - error(1, errno, "setsockopt filter"); -} - -/* filter on transport protocol and destination port */ -static void set_filter(int fd) -{ - const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */ - uint8_t proto; - - proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP; - if (cfg_family == AF_INET) - __set_filter(fd, offsetof(struct iphdr, protocol), proto, - sizeof(struct iphdr) + off_dport); - else - __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto, - sizeof(struct ip6_hdr) + off_dport); -} - -/* drop everything: used temporarily during setup */ -static void set_filter_null(int fd) -{ - struct sock_filter filter[] = { - BPF_STMT(BPF_RET + BPF_K, 0), - }; - struct sock_fprog prog = {}; - - prog.filter = filter; - prog.len = ARRAY_SIZE(filter); - if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog))) - error(1, errno, "setsockopt filter"); -} - -static int create_ring(char **ring) -{ - struct fanout_args args = { - .id = 1, - .type_flags = PACKET_FANOUT_CPU, - .max_num_members = RSS_MAX_CPUS - }; - struct sockaddr_ll ll = { 0 }; - int fd, val; - - fd = socket(PF_PACKET, SOCK_DGRAM, 0); - if (fd == -1) - error(1, errno, "socket creation failed"); - - val = TPACKET_V3; - if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val))) - error(1, errno, "setsockopt PACKET_VERSION"); - *ring = setup_ring(fd); - - /* block packets until all rings are added to the fanout group: - * else packets can arrive during setup and get misclassified - */ - set_filter_null(fd); - - ll.sll_family = AF_PACKET; - ll.sll_ifindex = if_nametoindex(cfg_ifname); - ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) : - htons(ETH_P_IPV6); - if (bind(fd, (void *)&ll, sizeof(ll))) - error(1, errno, "bind"); - - /* must come after bind: verifies all programs in group match */ - if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) { - /* on failure, retry using old API if that is sufficient: - * it has a hard limit of 256 sockets, so only try if - * (a) only testing rxhash, not RSS or (b) <= 256 cpus. - * in this API, the third argument is left implicit. - */ - if (cfg_num_queues || num_cpus > 256 || - setsockopt(fd, SOL_PACKET, PACKET_FANOUT, - &args, sizeof(uint32_t))) - error(1, errno, "setsockopt PACKET_FANOUT cpu"); - } - - return fd; -} - -/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */ -static int setup_sink(void) -{ - int fd, val; - - fd = socket(cfg_family, cfg_type, 0); - if (fd == -1) - error(1, errno, "socket %d.%d", cfg_family, cfg_type); - - val = 1 << 20; - if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val))) - error(1, errno, "setsockopt rcvbuf"); - - return fd; -} - -static void setup_rings(void) -{ - int i; - - for (i = 0; i < num_cpus; i++) { - rings[i].cpu = i; - rings[i].fd = create_ring(&rings[i].mmap); - } - - /* accept packets once all rings in the fanout group are up */ - for (i = 0; i < num_cpus; i++) - set_filter(rings[i].fd); -} - -static void cleanup_rings(void) -{ - int i; - - for (i = 0; i < num_cpus; i++) { - if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz)) - error(1, errno, "munmap"); - if (close(rings[i].fd)) - error(1, errno, "close"); - } -} - -static void parse_cpulist(const char *arg) -{ - do { - rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10); - - arg = strchr(arg, ','); - if (!arg) - break; - arg++; // skip ',' - } while (1); -} - -static void show_cpulist(void) -{ - int i; - - for (i = 0; i < cfg_num_queues; i++) - fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]); -} - -static void show_silos(void) -{ - int i; - - for (i = 0; i < cfg_num_rps_cpus; i++) - fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]); -} - -static void parse_toeplitz_key(const char *str, int slen, unsigned char *key) -{ - int i, ret, off; - - if (slen < TOEPLITZ_STR_MIN_LEN || - slen > TOEPLITZ_STR_MAX_LEN + 1) - error(1, 0, "invalid toeplitz key"); - - for (i = 0, off = 0; off < slen; i++, off += 3) { - ret = sscanf(str + off, "%hhx", &key[i]); - if (ret != 1) - error(1, 0, "key parse error at %d off %d len %d", - i, off, slen); - } -} - -static void parse_rps_bitmap(const char *arg) -{ - unsigned long bitmap; - int i; - - bitmap = strtoul(arg, NULL, 0); - - if (bitmap & ~(RPS_MAX_CPUS - 1)) - error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu", - bitmap, RPS_MAX_CPUS - 1); - - for (i = 0; i < RPS_MAX_CPUS; i++) - if (bitmap & 1UL << i) - rps_silo_to_cpu[cfg_num_rps_cpus++] = i; -} - -static void parse_opts(int argc, char **argv) -{ - static struct option long_options[] = { - {"dport", required_argument, 0, 'd'}, - {"cpus", required_argument, 0, 'C'}, - {"key", required_argument, 0, 'k'}, - {"iface", required_argument, 0, 'i'}, - {"ipv4", no_argument, 0, '4'}, - {"ipv6", no_argument, 0, '6'}, - {"sink", no_argument, 0, 's'}, - {"tcp", no_argument, 0, 't'}, - {"timeout", required_argument, 0, 'T'}, - {"udp", no_argument, 0, 'u'}, - {"verbose", no_argument, 0, 'v'}, - {"rps", required_argument, 0, 'r'}, - {0, 0, 0, 0} - }; - bool have_toeplitz = false; - int index, c; - - while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) { - switch (c) { - case '4': - cfg_family = AF_INET; - break; - case '6': - cfg_family = AF_INET6; - break; - case 'C': - parse_cpulist(optarg); - break; - case 'd': - cfg_dport = strtol(optarg, NULL, 0); - break; - case 'i': - cfg_ifname = optarg; - break; - case 'k': - parse_toeplitz_key(optarg, strlen(optarg), - toeplitz_key); - have_toeplitz = true; - break; - case 'r': - parse_rps_bitmap(optarg); - break; - case 's': - cfg_sink = true; - break; - case 't': - cfg_type = SOCK_STREAM; - break; - case 'T': - cfg_timeout_msec = strtol(optarg, NULL, 0); - break; - case 'u': - cfg_type = SOCK_DGRAM; - break; - case 'v': - cfg_verbose = true; - break; - - default: - error(1, 0, "unknown option %c", optopt); - break; - } - } - - if (!have_toeplitz) - error(1, 0, "Must supply rss key ('-k')"); - - num_cpus = get_nprocs(); - if (num_cpus > RSS_MAX_CPUS) - error(1, 0, "increase RSS_MAX_CPUS"); - - if (cfg_num_queues && cfg_num_rps_cpus) - error(1, 0, - "Can't supply both RSS cpus ('-C') and RPS map ('-r')"); - if (cfg_verbose) { - show_cpulist(); - show_silos(); - } -} - -int main(int argc, char **argv) -{ - const int min_tests = 10; - int fd_sink = -1; - - parse_opts(argc, argv); - - if (cfg_sink) - fd_sink = setup_sink(); - - setup_rings(); - process_rings(); - cleanup_rings(); - - if (cfg_sink && close(fd_sink)) - error(1, errno, "close sink"); - - if (frames_received - frames_nohash < min_tests) - error(1, 0, "too few frames for verification"); - - return frames_error; -} diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh deleted file mode 100755 index 8ff172f7bb1b..000000000000 --- a/tools/testing/selftests/net/toeplitz.sh +++ /dev/null @@ -1,199 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping -# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu -# ('-rps <rps_map>') -# -# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action, -# which is a driver-specific encoding. -# -# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \ -# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)] - -source setup_loopback.sh -readonly SERVER_IP4="192.168.1.200/24" -readonly SERVER_IP6="fda8::1/64" -readonly SERVER_MAC="aa:00:00:00:00:02" - -readonly CLIENT_IP4="192.168.1.100/24" -readonly CLIENT_IP6="fda8::2/64" -readonly CLIENT_MAC="aa:00:00:00:00:01" - -PORT=8000 -KEY="$(</proc/sys/net/core/netdev_rss_key)" -TEST_RSS=false -RPS_MAP="" -PROTO_FLAG="" -IP_FLAG="" -DEV="eth0" - -# Return the number of rxqs among which RSS is configured to spread packets. -# This is determined by reading the RSS indirection table using ethtool. -get_rss_cfg_num_rxqs() { - echo $(ethtool -x "${DEV}" | - grep -E [[:space:]]+[0-9]+:[[:space:]]+ | - cut -d: -f2- | - awk '{$1=$1};1' | - tr ' ' '\n' | - sort -u | - wc -l) -} - -# Return a list of the receive irq handler cpus. -# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc. -# Reads /sys/kernel/irq/ in order, so algorithm depends on -# irq_{rxq-0} < irq_{rxq-1}, etc. -get_rx_irq_cpus() { - CPUS="" - # sort so that irq 2 is read before irq 10 - SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V) - # Consider only as many queues as RSS actually uses. We assume that - # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1). - RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs) - RXQ_COUNT=0 - - for i in ${SORTED_IRQS} - do - [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break - # lookup relevant IRQs by action name - [[ -e "$i/actions" ]] || continue - cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue - irqname=$(<"$i/actions") - - # does the IRQ get called - irqcount=$(cat "$i/per_cpu_count" | tr -d '0,') - [[ -n "${irqcount}" ]] || continue - - # lookup CPU - irq=$(basename "$i") - cpu=$(cat "/proc/irq/$irq/smp_affinity_list") - - if [[ -z "${CPUS}" ]]; then - CPUS="${cpu}" - else - CPUS="${CPUS},${cpu}" - fi - RXQ_COUNT=$((RXQ_COUNT+1)) - done - - echo "${CPUS}" -} - -get_disable_rfs_cmd() { - echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;" -} - -get_set_rps_bitmaps_cmd() { - CMD="" - for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus - do - CMD="${CMD} echo $1 > ${i};" - done - - echo "${CMD}" -} - -get_disable_rps_cmd() { - echo "$(get_set_rps_bitmaps_cmd 0)" -} - -die() { - echo "$1" - exit 1 -} - -check_nic_rxhash_enabled() { - local -r pattern="receive-hashing:\ on" - - ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled" -} - -parse_opts() { - local prog=$0 - shift 1 - - while [[ "$1" =~ "-" ]]; do - if [[ "$1" = "-irq_prefix" ]]; then - shift - IRQ_PATTERN="^$1-[0-9]*$" - elif [[ "$1" = "-u" || "$1" = "-t" ]]; then - PROTO_FLAG="$1" - elif [[ "$1" = "-4" ]]; then - IP_FLAG="$1" - SERVER_IP="${SERVER_IP4}" - CLIENT_IP="${CLIENT_IP4}" - elif [[ "$1" = "-6" ]]; then - IP_FLAG="$1" - SERVER_IP="${SERVER_IP6}" - CLIENT_IP="${CLIENT_IP6}" - elif [[ "$1" = "-rss" ]]; then - TEST_RSS=true - elif [[ "$1" = "-rps" ]]; then - shift - RPS_MAP="$1" - elif [[ "$1" = "-i" ]]; then - shift - DEV="$1" - else - die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \ - [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]" - fi - shift - done -} - -setup() { - setup_loopback_environment "${DEV}" - - # Set up server_ns namespace and client_ns namespace - setup_macvlan_ns "${DEV}" $server_ns server \ - "${SERVER_MAC}" "${SERVER_IP}" - setup_macvlan_ns "${DEV}" $client_ns client \ - "${CLIENT_MAC}" "${CLIENT_IP}" -} - -cleanup() { - cleanup_macvlan_ns $server_ns server $client_ns client - cleanup_loopback "${DEV}" -} - -parse_opts $0 $@ - -setup -trap cleanup EXIT - -check_nic_rxhash_enabled - -# Actual test starts here -if [[ "${TEST_RSS}" = true ]]; then - # RPS/RFS must be disabled because they move packets between cpus, - # which breaks the PACKET_FANOUT_CPU identification of RSS decisions. - eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \ - ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ - -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ - -C "$(get_rx_irq_cpus)" -s -v & -elif [[ ! -z "${RPS_MAP}" ]]; then - eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \ - ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ - -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ - -r "0x${RPS_MAP}" -s -v & -else - ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ - -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v & -fi - -server_pid=$! - -ip netns exec $client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \ - "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" & - -client_pid=$! - -wait "${server_pid}" -exit_code=$? -kill -9 "${client_pid}" -if [[ "${exit_code}" -eq 0 ]]; then - echo "Test Succeeded!" -fi -exit "${exit_code}" diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh deleted file mode 100755 index 2fef34f4aba1..000000000000 --- a/tools/testing/selftests/net/toeplitz_client.sh +++ /dev/null @@ -1,28 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# A simple program for generating traffic for the toeplitz test. -# -# This program sends packets periodically for, conservatively, 20 seconds. The -# intent is for the calling program to kill this program once it is no longer -# needed, rather than waiting for the 20 second expiration. - -send_traffic() { - expiration=$((SECONDS+20)) - while [[ "${SECONDS}" -lt "${expiration}" ]] - do - if [[ "${PROTO}" == "-u" ]]; then - echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}" - else - echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}" - fi - sleep 0.001 - done -} - -PROTO=$1 -IPVER=$2 -ADDR=$3 -PORT=$4 - -send_traffic diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh index 282f14760940..a7c6ab8a0347 100755 --- a/tools/testing/selftests/net/traceroute.sh +++ b/tools/testing/selftests/net/traceroute.sh @@ -10,28 +10,6 @@ PAUSE_ON_FAIL=no ################################################################################ # -log_test() -{ - local rc=$1 - local expected=$2 - local msg="$3" - - if [ ${rc} -eq ${expected} ]; then - printf "TEST: %-60s [ OK ]\n" "${msg}" - nsuccess=$((nsuccess+1)) - else - ret=1 - nfail=$((nfail+1)) - printf "TEST: %-60s [FAIL]\n" "${msg}" - if [ "${PAUSE_ON_FAIL}" = "yes" ]; then - echo - echo "hit enter to continue, 'q' to quit" - read a - [ "$a" = "q" ] && exit 1 - fi - fi -} - run_cmd() { local ns @@ -58,6 +36,35 @@ run_cmd() return $rc } +__check_traceroute_version() +{ + local cmd=$1; shift + local req_ver=$1; shift + local ver + + req_ver=$(echo "$req_ver" | sed 's/\.//g') + ver=$($cmd -V 2>&1 | grep -Eo '[0-9]+.[0-9]+.[0-9]+' | sed 's/\.//g') + if [[ $ver -lt $req_ver ]]; then + return 1 + else + return 0 + fi +} + +check_traceroute6_version() +{ + local req_ver=$1; shift + + __check_traceroute_version traceroute6 "$req_ver" +} + +check_traceroute_version() +{ + local req_ver=$1; shift + + __check_traceroute_version traceroute "$req_ver" +} + ################################################################################ # create namespaces and interconnects @@ -81,6 +88,8 @@ create_ns() ip netns exec ${ns} ip -6 ro add unreachable default metric 8192 ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1 + ip netns exec ${ns} sysctl -qw net.ipv4.icmp_ratelimit=0 + ip netns exec ${ns} sysctl -qw net.ipv6.icmp.ratelimit=0 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1 ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1 @@ -203,34 +212,275 @@ setup_traceroute6() run_traceroute6() { - if [ ! -x "$(command -v traceroute6)" ]; then - echo "SKIP: Could not run IPV6 test without traceroute6" - return - fi - setup_traceroute6 + RET=0 + # traceroute6 host-2 from host-1 (expects 2000:102::2) run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2" - log_test $? 0 "IPV6 traceroute" + check_err $? "traceroute6 did not return 2000:102::2" + log_test "IPv6 traceroute" cleanup_traceroute6 } ################################################################################ +# traceroute6 with VRF test +# +# Verify that in this scenario +# +# ------------------------ N2 +# | | +# ------ ------ N3 ---- +# | R1 | | R2 |------|H2| +# ------ ------ ---- +# | | +# ------------------------ N1 +# | +# ---- +# |H1| +# ---- +# +# Where H1's default route goes through R1 and R1's default route goes through +# R2 over N2, traceroute6 from H1 to H2 reports R2's address on N2 and not N1. +# The interfaces connecting R2 to the different subnets are membmer in a VRF +# and the intention is to check that traceroute6 does not report the VRF's +# address. +# +# Addresses are assigned as follows: +# +# N1: 2000:101::/64 +# N2: 2000:102::/64 +# N3: 2000:103::/64 +# +# R1's host part of address: 1 +# R2's host part of address: 2 +# H1's host part of address: 3 +# H2's host part of address: 4 +# +# For example: +# the IPv6 address of R1's interface on N2 is 2000:102::1/64 + +cleanup_traceroute6_vrf() +{ + cleanup_all_ns +} + +setup_traceroute6_vrf() +{ + # Start clean + cleanup_traceroute6_vrf + + setup_ns h1 h2 r1 r2 + create_ns "$h1" + create_ns "$h2" + create_ns "$r1" + create_ns "$r2" + + ip -n "$r2" link add name vrf100 up type vrf table 100 + ip -n "$r2" addr add 2001:db8:100::1/64 dev vrf100 + + # Setup N3 + connect_ns "$r2" eth3 - 2000:103::2/64 "$h2" eth3 - 2000:103::4/64 + + ip -n "$r2" link set dev eth3 master vrf100 + + ip -n "$h2" route add default via 2000:103::2 + + # Setup N2 + connect_ns "$r1" eth2 - 2000:102::1/64 "$r2" eth2 - 2000:102::2/64 + + ip -n "$r1" route add default via 2000:102::2 + + ip -n "$r2" link set dev eth2 master vrf100 + + # Setup N1. host-1 and router-2 connect to a bridge in router-1. + ip -n "$r1" link add name br100 up type bridge + ip -n "$r1" addr add 2000:101::1/64 dev br100 + + connect_ns "$h1" eth0 - 2000:101::3/64 "$r1" eth0 - - + + ip -n "$h1" route add default via 2000:101::1 + + ip -n "$r1" link set dev eth0 master br100 + + connect_ns "$r2" eth1 - 2000:101::2/64 "$r1" eth1 - - + + ip -n "$r2" link set dev eth1 master vrf100 + + ip -n "$r1" link set dev eth1 master br100 + + # Prime the network + ip netns exec "$h1" ping6 -c5 2000:103::4 >/dev/null 2>&1 +} + +run_traceroute6_vrf() +{ + setup_traceroute6_vrf + + RET=0 + + # traceroute6 host-2 from host-1 (expects 2000:102::2) + run_cmd "$h1" "traceroute6 2000:103::4 | grep 2000:102::2" + check_err $? "traceroute6 did not return 2000:102::2" + log_test "IPv6 traceroute with VRF" + + cleanup_traceroute6_vrf +} + +################################################################################ +# traceroute6 with ICMP extensions test +# +# Verify that in this scenario +# +# ---- ---- ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- +# +# ICMP extensions are correctly reported. The loopback interfaces on all the +# nodes are assigned global addresses and the interfaces connecting the nodes +# are assigned IPv6 link-local addresses. + +cleanup_traceroute6_ext() +{ + cleanup_all_ns +} + +setup_traceroute6_ext() +{ + # Start clean + cleanup_traceroute6_ext + + setup_ns h1 r1 h2 + create_ns "$h1" + create_ns "$r1" + create_ns "$h2" + + # Setup N1 + connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64 + # Setup N2 + connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64 + + # Setup H1 + ip -n "$h1" address add 2001:db8:1::1/128 dev lo + ip -n "$h1" route add ::/0 nexthop via fe80::2 dev eth1 + + # Setup R1 + ip -n "$r1" address add 2001:db8:1::2/128 dev lo + ip -n "$r1" route add 2001:db8:1::1/128 nexthop via fe80::1 dev eth1 + ip -n "$r1" route add 2001:db8:1::3/128 nexthop via fe80::4 dev eth2 + + # Setup H2 + ip -n "$h2" address add 2001:db8:1::3/128 dev lo + ip -n "$h2" route add ::/0 nexthop via fe80::3 dev eth2 + + # Prime the network + ip netns exec "$h1" ping6 -c5 2001:db8:1::3 >/dev/null 2>&1 +} + +traceroute6_ext_iio_iif_test() +{ + local r1_ifindex h2_ifindex + local pkt_len=$1; shift + + # Test that incoming interface info is not appended by default. + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended by default when should not" + + # Test that the extension is appended when enabled. + run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_err $? "Failed to enable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_err $? "Incoming interface info not appended after enable" + + # Test that the extension is not appended when disabled. + run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_err $? "Failed to disable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended after disable" + + # Test that the extension is sent correctly from both R1 and H2. + run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01" + r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1" + + run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x01" + h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from H2" + + # Add a global address on the incoming interface of R1 and check that + # it is reported. + run_cmd "$r1" "ip address add 2001:db8:100::1/64 dev eth1 nodad" + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,2001:db8:100::1,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1 after address addition" + run_cmd "$r1" "ip address del 2001:db8:100::1/64 dev eth1" + + # Change name and MTU and make sure the result is still correct. + run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501" + run_cmd "$h1" "traceroute6 -e 2001:db8:1::3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'" + check_err $? "Wrong incoming interface info reported from R1 after name and MTU change" + run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500" + + run_cmd "$r1" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00" + run_cmd "$h2" "sysctl -w net.ipv6.icmp.errors_extension_mask=0x00" +} + +run_traceroute6_ext() +{ + # Need at least version 2.1.5 for RFC 5837 support. + if ! check_traceroute6_version 2.1.5; then + log_test_skip "traceroute6 too old, missing ICMP extensions support" + return + fi + + setup_traceroute6_ext + + RET=0 + + ## General ICMP extensions tests + + # Test that ICMP extensions are disabled by default. + run_cmd "$h1" "sysctl net.ipv6.icmp.errors_extension_mask | grep \"= 0$\"" + check_err $? "ICMP extensions are not disabled by default" + + # Test that unsupported values are rejected. Do not use "sysctl" as + # older versions do not return an error code upon failure. + run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv6/icmp/errors_extension_mask\"" + check_fail $? "Unsupported sysctl value was not rejected" + + ## Extension-specific tests + + # Incoming interface info test. Test with various packet sizes, + # including the default one. + traceroute6_ext_iio_iif_test + traceroute6_ext_iio_iif_test 127 + traceroute6_ext_iio_iif_test 128 + traceroute6_ext_iio_iif_test 129 + + log_test "IPv6 traceroute with ICMP extensions" + + cleanup_traceroute6_ext +} + +################################################################################ # traceroute test # -# Verify that traceroute from H1 to H2 shows 1.0.1.1 in this scenario +# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when +# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively. # -# 1.0.3.1/24 +# 1.0.3.3/24 1.0.3.1/24 # ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ---- # |H1|--------------------------|R1|--------------------------|H2| # ---- N1 ---- N2 ---- # -# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and -# 1.0.3.1/24 and 1.0.1.1/24 are respectively R1's primary and secondary -# address on N1. -# +# where net.ipv4.icmp_errors_use_inbound_ifaddr is set on R1 and 1.0.3.1/24 and +# 1.0.1.1/24 are R1's primary addresses on N1. The kernel is expected to prefer +# a source address that is on the same subnet as the destination IP of the ICMP +# error message. cleanup_traceroute() { @@ -250,6 +500,7 @@ setup_traceroute() connect_ns $h1 eth0 1.0.1.3/24 - \ $router eth1 1.0.3.1/24 - + ip -n "$h1" addr add 1.0.3.3/24 dev eth0 ip netns exec $h1 ip route add default via 1.0.1.1 ip netns exec $router ip addr add 1.0.1.1/24 dev eth1 @@ -268,18 +519,232 @@ setup_traceroute() run_traceroute() { - if [ ! -x "$(command -v traceroute)" ]; then - echo "SKIP: Could not run IPV4 test without traceroute" + setup_traceroute + + RET=0 + + # traceroute host-2 from host-1. Expect a source IP that is on the same + # subnet as destination IP of the ICMP error message. + run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep -q 1.0.1.1" + check_err $? "traceroute did not return 1.0.1.1" + run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep -q 1.0.3.1" + check_err $? "traceroute did not return 1.0.3.1" + log_test "IPv4 traceroute" + + cleanup_traceroute +} + +################################################################################ +# traceroute with VRF test +# +# Verify that traceroute from H1 to H2 shows 1.0.3.1 and 1.0.1.1 when +# traceroute uses 1.0.3.3 and 1.0.1.3 as the source IP, respectively. The +# intention is to check that the kernel does not choose an IP assigned to the +# VRF device, but rather an address from the VRF port (eth1) that received the +# packet that generates the ICMP error message. +# +# 1.0.4.1/24 (vrf100) +# 1.0.3.3/24 1.0.3.1/24 +# ---- 1.0.1.3/24 1.0.1.1/24 ---- 1.0.2.1/24 1.0.2.4/24 ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- + +cleanup_traceroute_vrf() +{ + cleanup_all_ns +} + +setup_traceroute_vrf() +{ + # Start clean + cleanup_traceroute_vrf + + setup_ns h1 h2 router + create_ns "$h1" + create_ns "$h2" + create_ns "$router" + + ip -n "$router" link add name vrf100 up type vrf table 100 + ip -n "$router" addr add 1.0.4.1/24 dev vrf100 + + connect_ns "$h1" eth0 1.0.1.3/24 - \ + "$router" eth1 1.0.1.1/24 - + + ip -n "$h1" addr add 1.0.3.3/24 dev eth0 + ip -n "$h1" route add default via 1.0.1.1 + + ip -n "$router" link set dev eth1 master vrf100 + ip -n "$router" addr add 1.0.3.1/24 dev eth1 + ip netns exec "$router" sysctl -qw \ + net.ipv4.icmp_errors_use_inbound_ifaddr=1 + + connect_ns "$h2" eth0 1.0.2.4/24 - \ + "$router" eth2 1.0.2.1/24 - + + ip -n "$h2" route add default via 1.0.2.1 + + ip -n "$router" link set dev eth2 master vrf100 + + # Prime the network + ip netns exec "$h1" ping -c5 1.0.2.4 >/dev/null 2>&1 +} + +run_traceroute_vrf() +{ + setup_traceroute_vrf + + RET=0 + + # traceroute host-2 from host-1. Expect a source IP that is on the same + # subnet as destination IP of the ICMP error message. + run_cmd "$h1" "traceroute -s 1.0.1.3 1.0.2.4 | grep 1.0.1.1" + check_err $? "traceroute did not return 1.0.1.1" + run_cmd "$h1" "traceroute -s 1.0.3.3 1.0.2.4 | grep 1.0.3.1" + check_err $? "traceroute did not return 1.0.3.1" + log_test "IPv4 traceroute with VRF" + + cleanup_traceroute_vrf +} + +################################################################################ +# traceroute with ICMP extensions test +# +# Verify that in this scenario +# +# ---- ---- ---- +# |H1|--------------------------|R1|--------------------------|H2| +# ---- N1 ---- N2 ---- +# +# ICMP extensions are correctly reported. The loopback interfaces on all the +# nodes are assigned global addresses and the interfaces connecting the nodes +# are assigned IPv6 link-local addresses. + +cleanup_traceroute_ext() +{ + cleanup_all_ns +} + +setup_traceroute_ext() +{ + # Start clean + cleanup_traceroute_ext + + setup_ns h1 r1 h2 + create_ns "$h1" + create_ns "$r1" + create_ns "$h2" + + # Setup N1 + connect_ns "$h1" eth1 - fe80::1/64 "$r1" eth1 - fe80::2/64 + # Setup N2 + connect_ns "$r1" eth2 - fe80::3/64 "$h2" eth2 - fe80::4/64 + + # Setup H1 + ip -n "$h1" address add 192.0.2.1/32 dev lo + ip -n "$h1" route add 0.0.0.0/0 nexthop via inet6 fe80::2 dev eth1 + + # Setup R1 + ip -n "$r1" address add 192.0.2.2/32 dev lo + ip -n "$r1" route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1 + ip -n "$r1" route add 192.0.2.3/32 nexthop via inet6 fe80::4 dev eth2 + + # Setup H2 + ip -n "$h2" address add 192.0.2.3/32 dev lo + ip -n "$h2" route add 0.0.0.0/0 nexthop via inet6 fe80::3 dev eth2 + + # Prime the network + ip netns exec "$h1" ping -c5 192.0.2.3 >/dev/null 2>&1 +} + +traceroute_ext_iio_iif_test() +{ + local r1_ifindex h2_ifindex + local pkt_len=$1; shift + + # Test that incoming interface info is not appended by default. + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended by default when should not" + + # Test that the extension is appended when enabled. + run_cmd "$r1" "bash -c \"echo 0x01 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_err $? "Failed to enable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_err $? "Incoming interface info not appended after enable" + + # Test that the extension is not appended when disabled. + run_cmd "$r1" "bash -c \"echo 0x00 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_err $? "Failed to disable incoming interface info extension on R1" + + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep INC" + check_fail $? "Incoming interface info appended after disable" + + # Test that the extension is sent correctly from both R1 and H2. + run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01" + r1_ifindex=$(ip -n "$r1" -j link show dev eth1 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1" + + run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x01" + h2_ifindex=$(ip -n "$h2" -j link show dev eth2 | jq '.[]["ifindex"]') + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$h2_ifindex,\"eth2\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from H2" + + # Add a global address on the incoming interface of R1 and check that + # it is reported. + run_cmd "$r1" "ip address add 198.51.100.1/24 dev eth1" + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,198.51.100.1,\"eth1\",mtu=1500>'" + check_err $? "Wrong incoming interface info reported from R1 after address addition" + run_cmd "$r1" "ip address del 198.51.100.1/24 dev eth1" + + # Change name and MTU and make sure the result is still correct. + # Re-add the route towards H1 since it was deleted when we removed the + # last IPv4 address from eth1 on R1. + run_cmd "$r1" "ip route add 192.0.2.1/32 nexthop via inet6 fe80::1 dev eth1" + run_cmd "$r1" "ip link set dev eth1 name eth1tag mtu 1501" + run_cmd "$h1" "traceroute -e 192.0.2.3 $pkt_len | grep '<INC:$r1_ifindex,\"eth1tag\",mtu=1501>'" + check_err $? "Wrong incoming interface info reported from R1 after name and MTU change" + run_cmd "$r1" "ip link set dev eth1tag name eth1 mtu 1500" + + run_cmd "$r1" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00" + run_cmd "$h2" "sysctl -w net.ipv4.icmp_errors_extension_mask=0x00" +} + +run_traceroute_ext() +{ + # Need at least version 2.1.5 for RFC 5837 support. + if ! check_traceroute_version 2.1.5; then + log_test_skip "traceroute too old, missing ICMP extensions support" return fi - setup_traceroute + setup_traceroute_ext - # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while. - run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1" - log_test $? 0 "IPV4 traceroute" + RET=0 - cleanup_traceroute + ## General ICMP extensions tests + + # Test that ICMP extensions are disabled by default. + run_cmd "$h1" "sysctl net.ipv4.icmp_errors_extension_mask | grep \"= 0$\"" + check_err $? "ICMP extensions are not disabled by default" + + # Test that unsupported values are rejected. Do not use "sysctl" as + # older versions do not return an error code upon failure. + run_cmd "$h1" "bash -c \"echo 0x80 > /proc/sys/net/ipv4/icmp_errors_extension_mask\"" + check_fail $? "Unsupported sysctl value was not rejected" + + ## Extension-specific tests + + # Incoming interface info test. Test with various packet sizes, + # including the default one. + traceroute_ext_iio_iif_test + traceroute_ext_iio_iif_test 127 + traceroute_ext_iio_iif_test 128 + traceroute_ext_iio_iif_test 129 + + log_test "IPv4 traceroute with ICMP extensions" + + cleanup_traceroute_ext } ################################################################################ @@ -288,15 +753,16 @@ run_traceroute() run_tests() { run_traceroute6 + run_traceroute6_vrf + run_traceroute6_ext run_traceroute + run_traceroute_vrf + run_traceroute_ext } ################################################################################ # main -declare -i nfail=0 -declare -i nsuccess=0 - while getopts :pv o do case $o in @@ -306,7 +772,10 @@ do esac done +require_command traceroute6 +require_command traceroute +require_command jq + run_tests -printf "\nTests passed: %3d\n" ${nsuccess} -printf "Tests failed: %3d\n" ${nfail} +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c index fa83918b62d1..0efc67b0357a 100644 --- a/tools/testing/selftests/net/tun.c +++ b/tools/testing/selftests/net/tun.c @@ -15,7 +15,7 @@ #include <sys/ioctl.h> #include <sys/socket.h> -#include "../kselftest_harness.h" +#include "kselftest_harness.h" static int tun_attach(int fd, char *dev) { diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index ec60a16c9307..bcc14688661d 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -77,6 +77,8 @@ static bool cfg_epollet; static bool cfg_do_listen; static uint16_t dest_port = 9000; static bool cfg_print_nsec; +static uint32_t ts_opt_id; +static bool cfg_use_cmsg_opt_id; static struct sockaddr_in daddr; static struct sockaddr_in6 daddr6; @@ -136,12 +138,13 @@ static void validate_key(int tskey, int tstype) /* compare key for each subsequent request * must only test for one type, the first one requested */ - if (saved_tskey == -1) + if (saved_tskey == -1 || cfg_use_cmsg_opt_id) saved_tskey_type = tstype; else if (saved_tskey_type != tstype) return; stepsize = cfg_proto == SOCK_STREAM ? cfg_payload_len : 1; + stepsize = cfg_use_cmsg_opt_id ? 0 : stepsize; if (tskey != saved_tskey + stepsize) { fprintf(stderr, "ERROR: key %d, expected %d\n", tskey, saved_tskey + stepsize); @@ -214,7 +217,7 @@ static void print_timestamp_usr(void) static void print_timestamp(struct scm_timestamping *tss, int tstype, int tskey, int payload_len) { - const char *tsname; + const char *tsname = NULL; validate_key(tskey, tstype); @@ -356,8 +359,12 @@ static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) } } - if (batch > 1) + if (batch > 1) { fprintf(stderr, "batched %d timestamps\n", batch); + } else if (!batch) { + fprintf(stderr, "Failed to report timestamps\n"); + test_failed = true; + } } static int recv_errmsg(int fd) @@ -480,7 +487,7 @@ static void fill_header_udp(void *p, bool is_ipv4) static void do_test(int family, unsigned int report_opt) { - char control[CMSG_SPACE(sizeof(uint32_t))]; + char control[2 * CMSG_SPACE(sizeof(uint32_t))]; struct sockaddr_ll laddr; unsigned int sock_opt; struct cmsghdr *cmsg; @@ -620,18 +627,32 @@ static void do_test(int family, unsigned int report_opt) msg.msg_iov = &iov; msg.msg_iovlen = 1; - if (cfg_use_cmsg) { + if (cfg_use_cmsg || cfg_use_cmsg_opt_id) { memset(control, 0, sizeof(control)); msg.msg_control = control; - msg.msg_controllen = sizeof(control); + msg.msg_controllen = cfg_use_cmsg * CMSG_SPACE(sizeof(uint32_t)); + msg.msg_controllen += cfg_use_cmsg_opt_id * CMSG_SPACE(sizeof(uint32_t)); + + cmsg = NULL; + if (cfg_use_cmsg) { + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SO_TIMESTAMPING; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); - cmsg = CMSG_FIRSTHDR(&msg); - cmsg->cmsg_level = SOL_SOCKET; - cmsg->cmsg_type = SO_TIMESTAMPING; - cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + *((uint32_t *)CMSG_DATA(cmsg)) = report_opt; + } + if (cfg_use_cmsg_opt_id) { + cmsg = cmsg ? CMSG_NXTHDR(&msg, cmsg) : CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_TS_OPT_ID; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + + *((uint32_t *)CMSG_DATA(cmsg)) = ts_opt_id; + saved_tskey = ts_opt_id; + } - *((uint32_t *) CMSG_DATA(cmsg)) = report_opt; } val = sendmsg(fd, &msg, 0); @@ -681,6 +702,7 @@ static void __attribute__((noreturn)) usage(const char *filepath) " -L listen on hostname and port\n" " -n: set no-payload option\n" " -N: print timestamps and durations in nsec (instead of usec)\n" + " -o N: use SCM_TS_OPT_ID control message to provide N as tskey\n" " -p N: connect to port N\n" " -P: use PF_PACKET\n" " -r: use raw\n" @@ -701,7 +723,7 @@ static void parse_opt(int argc, char **argv) int c; while ((c = getopt(argc, argv, - "46bc:CeEFhIl:LnNp:PrRS:t:uv:V:x")) != -1) { + "46bc:CeEFhIl:LnNo:p:PrRS:t:uv:V:x")) != -1) { switch (c) { case '4': do_ipv6 = 0; @@ -742,6 +764,10 @@ static void parse_opt(int argc, char **argv) case 'N': cfg_print_nsec = true; break; + case 'o': + ts_opt_id = strtoul(optarg, NULL, 10); + cfg_use_cmsg_opt_id = true; + break; case 'p': dest_port = strtoul(optarg, NULL, 10); break; @@ -799,6 +825,8 @@ static void parse_opt(int argc, char **argv) error(1, 0, "cannot ask for pktinfo over pf_packet"); if (cfg_busy_poll && cfg_use_epoll) error(1, 0, "pass epoll or busy_poll, not both"); + if (cfg_proto == SOCK_STREAM && cfg_use_cmsg_opt_id) + error(1, 0, "TCP sockets don't support SCM_TS_OPT_ID"); if (optind != argc - 1) error(1, 0, "missing required hostname argument"); diff --git a/tools/testing/selftests/net/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh index 25baca4b148e..fe4649bb8786 100755 --- a/tools/testing/selftests/net/txtimestamp.sh +++ b/tools/testing/selftests/net/txtimestamp.sh @@ -37,11 +37,13 @@ run_test_v4v6() { run_test_tcpudpraw() { local -r args=$@ - run_test_v4v6 ${args} # tcp - run_test_v4v6 ${args} -u # udp - run_test_v4v6 ${args} -r # raw - run_test_v4v6 ${args} -R # raw (IPPROTO_RAW) - run_test_v4v6 ${args} -P # pf_packet + run_test_v4v6 ${args} # tcp + run_test_v4v6 ${args} -u # udp + run_test_v4v6 ${args} -u -o 42 # udp with fixed tskey + run_test_v4v6 ${args} -r # raw + run_test_v4v6 ${args} -r -o 42 # raw + run_test_v4v6 ${args} -R # raw (IPPROTO_RAW) + run_test_v4v6 ${args} -P # pf_packet } run_test_all() { diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 11a1ebda564f..b17e032a6d75 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -3,12 +3,10 @@ # # Run a series of udpgro functional tests. -source net_helper.sh +source lib.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.bpf.o" - # set global exit status, but never reset nonzero one. check_err() { @@ -38,7 +36,7 @@ cfg_veth() { ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24 ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad ip -netns "${PEER_NS}" link set dev veth1 up - ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp + ip netns exec "${PEER_NS}" ethtool -K veth1 gro on } run_one() { @@ -46,17 +44,19 @@ run_one() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} && \ - echo "ok" || \ - echo "failed" & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} & + local PID1=$! wait_local_port_listen ${PEER_NS} 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - wait $(jobs -p) + check_err $? + wait ${PID1} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -73,6 +73,7 @@ run_one_nat() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 if [[ ${tx_args} = *-4* ]]; then ipt_cmd=iptables @@ -93,16 +94,17 @@ run_one_nat() { # ... so that GRO will match the UDP_GRO enabled socket, but packets # will land on the 'plain' one ip netns exec "${PEER_NS}" ./udpgso_bench_rx -G ${family} -b ${addr1} -n 0 & - pid=$! - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${family} -b ${addr2%/*} ${rx_args} && \ - echo "ok" || \ - echo "failed"& + local PID1=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${family} -b ${addr2%/*} ${rx_args} & + local PID2=$! wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - kill -INT $pid - wait $(jobs -p) + check_err $? + kill -INT ${PID1} + wait ${PID2} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -111,20 +113,26 @@ run_one_2sock() { local -r all="$@" local -r tx_args=${all%rx*} local -r rx_args=${all#*rx} + local ret=0 cfg_veth - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 10 ${rx_args} -p 12345 & - ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 10 ${rx_args} && \ - echo "ok" || \ - echo "failed" & + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 1000 -R 100 ${rx_args} -p 12345 & + local PID1=$! + ip netns exec "${PEER_NS}" ./udpgso_bench_rx -C 2000 -R 100 ${rx_args} & + local PID2=$! wait_local_port_listen "${PEER_NS}" 12345 udp ./udpgso_bench_tx ${tx_args} -p 12345 + check_err $? wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} - ret=$? - wait $(jobs -p) + check_err $? + wait ${PID1} + check_err $? + wait ${PID2} + check_err $? + [ "$ret" -eq 0 ] && echo "ok" || echo "failed" return $ret } @@ -196,11 +204,6 @@ run_all() { return $ret } -if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Run 'make' first" - exit -1 -fi - if [[ $# -eq 0 ]]; then run_all elif [[ $1 == "__subprocess" ]]; then diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index c51ea90a1395..54fa4821bc5e 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -3,11 +3,11 @@ # # Run a series of udpgro benchmarks -source net_helper.sh +source lib.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.bpf.o" +BPF_FILE="lib/xdp_dummy.bpf.o" cleanup() { local -r jobs="$(jobs -p)" diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 17404f49cdb6..9a2cfec1153e 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -3,11 +3,11 @@ # # Run a series of udpgro benchmarks -source net_helper.sh +source lib.sh readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="xdp_dummy.bpf.o" +BPF_FILE="lib/xdp_dummy.bpf.o" cleanup() { local -r jobs="$(jobs -p)" diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index 550d8eb3e224..a39fdc4aa2ff 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,9 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -source net_helper.sh +source lib.sh -BPF_FILE="xdp_dummy.bpf.o" +BPF_FILE="lib/xdp_dummy.bpf.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 readonly DST=1 diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 3e74cfa1a2bf..36ff28af4b19 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -67,6 +67,7 @@ struct testcase { int gso_len; /* mss after applying gso */ int r_num_mss; /* recv(): number of calls of full mss */ int r_len_last; /* recv(): size of last non-mss dgram, if any */ + bool v6_ext_hdr; /* send() dgrams with IPv6 extension headers */ }; const struct in6_addr addr6 = { @@ -77,6 +78,8 @@ const struct in_addr addr4 = { __constant_htonl(0x0a000001), /* 10.0.0.1 */ }; +static const char ipv6_hopopts_pad1[8] = { 0 }; + struct testcase testcases_v4[] = { { /* no GSO: send a single byte */ @@ -100,6 +103,19 @@ struct testcase testcases_v4[] = { .r_num_mss = 1, }, { + /* datalen <= MSS < gso_len: will fall back to no GSO */ + .tlen = CONST_MSS_V4, + .gso_len = CONST_MSS_V4 + 1, + .r_num_mss = 0, + .r_len_last = CONST_MSS_V4, + }, + { + /* MSS < datalen < gso_len: fail */ + .tlen = CONST_MSS_V4 + 1, + .gso_len = CONST_MSS_V4 + 2, + .tfail = true, + }, + { /* send a single MSS + 1B */ .tlen = CONST_MSS_V4 + 1, .gso_len = CONST_MSS_V4, @@ -203,6 +219,19 @@ struct testcase testcases_v6[] = { .r_num_mss = 1, }, { + /* datalen <= MSS < gso_len: will fall back to no GSO */ + .tlen = CONST_MSS_V6, + .gso_len = CONST_MSS_V6 + 1, + .r_num_mss = 0, + .r_len_last = CONST_MSS_V6, + }, + { + /* MSS < datalen < gso_len: fail */ + .tlen = CONST_MSS_V6 + 1, + .gso_len = CONST_MSS_V6 + 2, + .tfail = true + }, + { /* send a single MSS + 1B */ .tlen = CONST_MSS_V6 + 1, .gso_len = CONST_MSS_V6, @@ -256,6 +285,13 @@ struct testcase testcases_v6[] = { .r_num_mss = 2, }, { + /* send 2 1B segments with extension headers */ + .tlen = 2, + .gso_len = 1, + .r_num_mss = 2, + .v6_ext_hdr = true, + }, + { /* send 2B + 2B + 1B segments */ .tlen = 5, .gso_len = 2, @@ -396,11 +432,18 @@ static void run_one(struct testcase *test, int fdt, int fdr, int i, ret, val, mss; bool sent; - fprintf(stderr, "ipv%d tx:%d gso:%d %s\n", + fprintf(stderr, "ipv%d tx:%d gso:%d %s%s\n", addr->sa_family == AF_INET ? 4 : 6, test->tlen, test->gso_len, + test->v6_ext_hdr ? "ext-hdr " : "", test->tfail ? "(fail)" : ""); + if (test->v6_ext_hdr) { + if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS, + ipv6_hopopts_pad1, sizeof(ipv6_hopopts_pad1))) + error(1, errno, "setsockopt ipv6 hopopts"); + } + val = test->gso_len; if (cfg_do_setsockopt) { if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val))) @@ -412,6 +455,12 @@ static void run_one(struct testcase *test, int fdt, int fdr, error(1, 0, "send succeeded while expecting failure"); if (!sent && !test->tfail) error(1, 0, "send failed while expecting success"); + + if (test->v6_ext_hdr) { + if (setsockopt(fdt, IPPROTO_IPV6, IPV6_HOPOPTS, NULL, 0)) + error(1, errno, "setsockopt ipv6 hopopts clear"); + } + if (!sent) return; diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh index 640bc43452fa..88fa1d53ba2b 100755 --- a/tools/testing/selftests/net/udpgso_bench.sh +++ b/tools/testing/selftests/net/udpgso_bench.sh @@ -92,6 +92,9 @@ run_udp() { echo "udp" run_in_netns ${args} + echo "udp sendmmsg" + run_in_netns ${args} -m + echo "udp gso" run_in_netns ${args} -S 0 diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c index 477392715a9a..86d80cce55b4 100644 --- a/tools/testing/selftests/net/udpgso_bench_tx.c +++ b/tools/testing/selftests/net/udpgso_bench_tx.c @@ -25,7 +25,7 @@ #include <sys/types.h> #include <unistd.h> -#include "../kselftest.h" +#include "kselftest.h" #ifndef ETH_MAX_MTU #define ETH_MAX_MTU 0xFFFFU diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh index f52aa5f7da52..3e751234ccfe 100755 --- a/tools/testing/selftests/net/unicast_extensions.sh +++ b/tools/testing/selftests/net/unicast_extensions.sh @@ -30,14 +30,7 @@ source lib.sh -# nettest can be run from PATH or from same directory as this selftest -if ! which nettest >/dev/null; then - PATH=$PWD:$PATH - if ! which nettest >/dev/null; then - echo "'nettest' command not found; skipping tests" - exit $ksft_skip - fi -fi +check_gen_prog "nettest" result=0 diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 4f1edbafb946..9709dd067c72 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -BPF_FILE="xdp_dummy.bpf.o" +BPF_FILE="lib/xdp_dummy.bpf.o" readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" readonly BASE=`basename $STATS` readonly SRC=2 @@ -46,8 +46,6 @@ create_ns() { ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24 ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad done - echo "#kernel" > $BASE - chmod go-rw $BASE } __chk_flag() { diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh new file mode 100755 index 000000000000..e8c02c64e03a --- /dev/null +++ b/tools/testing/selftests/net/vlan_bridge_binding.sh @@ -0,0 +1,258 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +ALL_TESTS=" + test_binding_on + test_binding_off + test_binding_toggle_on + test_binding_toggle_off + test_binding_toggle_on_when_upper_down + test_binding_toggle_off_when_upper_down + test_binding_toggle_on_when_lower_down + test_binding_toggle_off_when_lower_down +" + +setup_prepare() +{ + local port + + adf_ip_link_add br up type bridge vlan_filtering 1 + + for port in d1 d2 d3; do + adf_ip_link_add $port type veth peer name r$port + adf_ip_link_set_up $port + adf_ip_link_set_up r$port + adf_ip_link_set_master $port br + done + + adf_bridge_vlan_add vid 11 dev br self + adf_bridge_vlan_add vid 11 dev d1 master + + adf_bridge_vlan_add vid 12 dev br self + adf_bridge_vlan_add vid 12 dev d2 master + + adf_bridge_vlan_add vid 13 dev br self + adf_bridge_vlan_add vid 13 dev d1 master + adf_bridge_vlan_add vid 13 dev d2 master + + adf_bridge_vlan_add vid 14 dev br self + adf_bridge_vlan_add vid 14 dev d1 master + adf_bridge_vlan_add vid 14 dev d2 master + adf_bridge_vlan_add vid 14 dev d3 master +} + +operstate_is() +{ + local dev=$1; shift + local expect=$1; shift + + local operstate=$(ip -j link show $dev | jq -r .[].operstate) + if [[ $operstate == UP ]]; then + operstate=1 + elif [[ $operstate == DOWN || $operstate == LOWERLAYERDOWN ]]; then + operstate=0 + fi + echo -n $operstate + [[ $operstate == $expect ]] +} + +check_operstate() +{ + local dev=$1; shift + local expect=$1; shift + local operstate + + operstate=$(busywait 1000 \ + operstate_is "$dev" "$expect") + check_err $? "Got operstate of $operstate, expected $expect" +} + +add_one_vlan() +{ + local link=$1; shift + local id=$1; shift + + adf_ip_link_add $link.$id link $link type vlan id $id "$@" +} + +add_vlans() +{ + add_one_vlan br 11 "$@" + add_one_vlan br 12 "$@" + add_one_vlan br 13 "$@" + add_one_vlan br 14 "$@" +} + +set_vlans() +{ + ip link set dev br.11 "$@" + ip link set dev br.12 "$@" + ip link set dev br.13 "$@" + ip link set dev br.14 "$@" +} + +down_netdevs() +{ + local dev + + for dev in "$@"; do + adf_ip_link_set_down $dev + done +} + +check_operstates() +{ + local opst_11=$1; shift + local opst_12=$1; shift + local opst_13=$1; shift + local opst_14=$1; shift + + check_operstate br.11 $opst_11 + check_operstate br.12 $opst_12 + check_operstate br.13 $opst_13 + check_operstate br.14 $opst_14 +} + +do_test_binding() +{ + local inject=$1; shift + local what=$1; shift + local opsts_d1=$1; shift + local opsts_d2=$1; shift + local opsts_d12=$1; shift + local opsts_d123=$1; shift + + RET=0 + + defer_scope_push + down_netdevs d1 + $inject + check_operstates $opsts_d1 + defer_scope_pop + + defer_scope_push + down_netdevs d2 + $inject + check_operstates $opsts_d2 + defer_scope_pop + + defer_scope_push + down_netdevs d1 d2 + $inject + check_operstates $opsts_d12 + defer_scope_pop + + defer_scope_push + down_netdevs d1 d2 d3 + $inject + check_operstates $opsts_d123 + defer_scope_pop + + log_test "Test bridge_binding $what" +} + +do_test_binding_on() +{ + local inject=$1; shift + local what=$1; shift + + do_test_binding "$inject" "$what" \ + "0 1 1 1" \ + "1 0 1 1" \ + "0 0 0 1" \ + "0 0 0 0" +} + +do_test_binding_off() +{ + local inject=$1; shift + local what=$1; shift + + do_test_binding "$inject" "$what" \ + "1 1 1 1" \ + "1 1 1 1" \ + "1 1 1 1" \ + "0 0 0 0" +} + +test_binding_on() +{ + add_vlans bridge_binding on + set_vlans up + do_test_binding_on : "on" +} + +test_binding_off() +{ + add_vlans bridge_binding off + set_vlans up + do_test_binding_off : "off" +} + +test_binding_toggle_on() +{ + add_vlans bridge_binding off + set_vlans up + set_vlans type vlan bridge_binding on + do_test_binding_on : "off->on" +} + +test_binding_toggle_off() +{ + add_vlans bridge_binding on + set_vlans up + set_vlans type vlan bridge_binding off + do_test_binding_off : "on->off" +} + +adf_set_binding_on() +{ + set_vlans type vlan bridge_binding on + defer set_vlans type vlan bridge_binding off +} + +adf_set_binding_off() +{ + set_vlans type vlan bridge_binding off + defer set_vlans type vlan bridge_binding on +} + +test_binding_toggle_on_when_lower_down() +{ + add_vlans bridge_binding off + set_vlans up + do_test_binding_on adf_set_binding_on "off->on when lower down" +} + +test_binding_toggle_off_when_lower_down() +{ + add_vlans bridge_binding on + set_vlans up + do_test_binding_off adf_set_binding_off "on->off when lower down" +} + +test_binding_toggle_on_when_upper_down() +{ + add_vlans bridge_binding off + set_vlans type vlan bridge_binding on + set_vlans up + do_test_binding_on : "off->on when upper down" +} + +test_binding_toggle_off_when_upper_down() +{ + add_vlans bridge_binding on + set_vlans type vlan bridge_binding off + set_vlans up + do_test_binding_off : "on->off when upper down" +} + +require_command jq + +trap defer_scopes_cleanup EXIT +setup_prepare +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh index 7bc804ffaf7c..e195d5cab6f7 100755 --- a/tools/testing/selftests/net/vlan_hw_filter.sh +++ b/tools/testing/selftests/net/vlan_hw_filter.sh @@ -3,27 +3,101 @@ readonly NETNS="ns-$(mktemp -u XXXXXX)" +ALL_TESTS=" + test_vlan_filter_check + test_vlan0_del_crash_01 + test_vlan0_del_crash_02 + test_vlan0_del_crash_03 + test_vid0_memleak +" + ret=0 +setup() { + ip netns add ${NETNS} +} + cleanup() { - ip netns del $NETNS + ip netns del $NETNS 2>/dev/null } trap cleanup EXIT fail() { - echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 - ret=1 + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + ret=1 +} + +tests_run() +{ + local current_test + for current_test in ${TESTS:-$ALL_TESTS}; do + $current_test + done +} + +test_vlan_filter_check() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 + ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 + ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 + ip netns exec ${NETNS} ip link set bond_slave_1 nomaster + ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" + cleanup } -ip netns add ${NETNS} -ip netns exec ${NETNS} ip link add bond0 type bond mode 0 -ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 -ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 -ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off -ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 -ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 -ip netns exec ${NETNS} ip link set bond_slave_1 nomaster -ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" +#enable vlan_filter feature of real_dev with vlan0 during running time +test_vlan0_del_crash_01() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link set dev bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ip link set dev bond0 down + ip netns exec ${NETNS} ip link set dev bond0 up + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +#enable vlan_filter feature and add vlan0 for real_dev during running time +test_vlan0_del_crash_02() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link set dev bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ip link set dev bond0 down + ip netns exec ${NETNS} ip link set dev bond0 up + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +#enable vlan_filter feature of real_dev during running time +#test kernel_bug of vlan unregister +test_vlan0_del_crash_03() { + setup + ip netns exec ${NETNS} ip link add bond0 type bond mode 0 + ip netns exec ${NETNS} ip link add link bond0 name vlan0 type vlan id 0 protocol 802.1q + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link set dev bond0 up + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter on + ip netns exec ${NETNS} ip link set dev bond0 down + ip netns exec ${NETNS} ip link del vlan0 || fail "Please check vlan HW filter function" + cleanup +} + +test_vid0_memleak() { + setup + ip netns exec ${NETNS} ip link add bond0 up type bond mode 0 + ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off + ip netns exec ${NETNS} ip link del dev bond0 || fail "Please check vlan HW filter function" + cleanup +} +tests_run exit $ret diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh index 2da32f4c479b..ce34cb2e6e0b 100755 --- a/tools/testing/selftests/net/vrf_route_leaking.sh +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -275,7 +275,7 @@ setup_sym() # Wait for ip config to settle - sleep 2 + slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 } setup_asym() @@ -370,7 +370,7 @@ setup_asym() ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad # Wait for ip config to settle - sleep 2 + slowwait 5 ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 } check_connectivity() @@ -533,6 +533,86 @@ ipv6_ping_frag_asym() ipv6_ping_frag asym } +ipv4_ping_local() +{ + log_section "IPv4 (sym route): VRF ICMP local error route lookup ping" + + setup_sym + + check_connectivity || return + + run_cmd ip netns exec $r1 ip vrf exec blue ping -c1 -w1 ${H2_N2_IP} + log_test $? 0 "VRF ICMP local IPv4" +} + +ipv4_tcp_local() +{ + log_section "IPv4 (sym route): VRF tcp local connection" + + setup_sym + + check_connectivity || return + + run_cmd nettest -s -O "$h2" -l ${H2_N2_IP} -I eth0 -3 eth0 & + sleep 1 + run_cmd nettest -N "$r1" -d blue -r ${H2_N2_IP} + log_test $? 0 "VRF tcp local connection IPv4" +} + +ipv4_udp_local() +{ + log_section "IPv4 (sym route): VRF udp local connection" + + setup_sym + + check_connectivity || return + + run_cmd nettest -s -D -O "$h2" -l ${H2_N2_IP} -I eth0 -3 eth0 & + sleep 1 + run_cmd nettest -D -N "$r1" -d blue -r ${H2_N2_IP} + log_test $? 0 "VRF udp local connection IPv4" +} + +ipv6_ping_local() +{ + log_section "IPv6 (sym route): VRF ICMP local error route lookup ping" + + setup_sym + + check_connectivity6 || return + + run_cmd ip netns exec $r1 ip vrf exec blue ${ping6} -c1 -w1 ${H2_N2_IP6} + log_test $? 0 "VRF ICMP local IPv6" +} + +ipv6_tcp_local() +{ + log_section "IPv6 (sym route): VRF tcp local connection" + + setup_sym + + check_connectivity6 || return + + run_cmd nettest -s -6 -O "$h2" -l ${H2_N2_IP6} -I eth0 -3 eth0 & + sleep 1 + run_cmd nettest -6 -N "$r1" -d blue -r ${H2_N2_IP6} + log_test $? 0 "VRF tcp local connection IPv6" +} + +ipv6_udp_local() +{ + log_section "IPv6 (sym route): VRF udp local connection" + + setup_sym + + check_connectivity6 || return + + run_cmd nettest -s -6 -D -O "$h2" -l ${H2_N2_IP6} -I eth0 -3 eth0 & + sleep 1 + run_cmd nettest -6 -D -N "$r1" -d blue -r ${H2_N2_IP6} + log_test $? 0 "VRF udp local connection IPv6" +} + ################################################################################ # usage @@ -555,8 +635,12 @@ EOF # Some systems don't have a ping6 binary anymore command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping) -TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym" -TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_ttl_asym ipv6_traceroute_asym" +check_gen_prog "nettest" + +TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_local ipv4_tcp_local +ipv4_udp_local ipv4_ping_ttl_asym ipv4_traceroute_asym" +TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_local ipv6_tcp_local ipv6_udp_local +ipv6_ping_ttl_asym ipv6_traceroute_asym" ret=0 nsuccess=0 @@ -594,12 +678,18 @@ do ipv4_traceroute|traceroute) ipv4_traceroute;;& ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;& ipv4_ping_frag|ping) ipv4_ping_frag;;& + ipv4_ping_local|ping) ipv4_ping_local;;& + ipv4_tcp_local) ipv4_tcp_local;;& + ipv4_udp_local) ipv4_udp_local;;& ipv6_ping_ttl|ping) ipv6_ping_ttl;;& ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;& ipv6_traceroute|traceroute) ipv6_traceroute;;& ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;& ipv6_ping_frag|ping) ipv6_ping_frag;;& + ipv6_ping_local|ping) ipv6_ping_local;;& + ipv6_tcp_local) ipv6_tcp_local;;& + ipv6_udp_local) ipv6_udp_local;;& # setup namespaces and config, but do not run any tests setup_sym|setup) setup_sym; exit 0;; diff --git a/tools/testing/selftests/net/xfrm_policy_add_speed.sh b/tools/testing/selftests/net/xfrm_policy_add_speed.sh new file mode 100755 index 000000000000..2fab29d3cb91 --- /dev/null +++ b/tools/testing/selftests/net/xfrm_policy_add_speed.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +source lib.sh + +timeout=4m +ret=0 +tmp=$(mktemp) +cleanup() { + cleanup_all_ns + rm -f "$tmp" +} + +trap cleanup EXIT + +maxpolicies=100000 +[ "$KSFT_MACHINE_SLOW" = "yes" ] && maxpolicies=10000 + +do_dummies4() { + local dir="$1" + local max="$2" + + local policies + local pfx + pfx=30 + policies=0 + + ip netns exec "$ns" ip xfrm policy flush + + for i in $(seq 1 100);do + local s + local d + for j in $(seq 1 255);do + s=$((i+0)) + d=$((i+100)) + + for a in $(seq 1 8 255); do + policies=$((policies+1)) + [ "$policies" -gt "$max" ] && return + echo xfrm policy add src 10.$s.$j.0/30 dst 10.$d.$j.$a/$pfx dir $dir action block + done + for a in $(seq 1 8 255); do + policies=$((policies+1)) + [ "$policies" -gt "$max" ] && return + echo xfrm policy add src 10.$s.$j.$a/30 dst 10.$d.$j.0/$pfx dir $dir action block + done + done + done +} + +setup_ns ns + +do_bench() +{ + local max="$1" + + start=$(date +%s%3N) + do_dummies4 "out" "$max" > "$tmp" + if ! timeout "$timeout" ip netns exec "$ns" ip -batch "$tmp";then + echo "WARNING: policy insertion cancelled after $timeout" + ret=1 + fi + stop=$(date +%s%3N) + + result=$((stop-start)) + + policies=$(wc -l < "$tmp") + printf "Inserted %-06s policies in $result ms\n" $policies + + have=$(ip netns exec "$ns" ip xfrm policy show | grep "action block" | wc -l) + if [ "$have" -ne "$policies" ]; then + echo "WARNING: mismatch, have $have policies, expected $policies" + ret=1 + fi +} + +p=100 +while [ $p -le "$maxpolicies" ]; do + do_bench "$p" + p="${p}0" +done + +exit $ret diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk index 59cb26cf3f73..793a2fc33d9f 100644 --- a/tools/testing/selftests/net/ynl.mk +++ b/tools/testing/selftests/net/ynl.mk @@ -5,10 +5,13 @@ # Inputs: # # YNL_GENS: families we need in the selftests -# YNL_PROGS: TEST_PROGS which need YNL (TODO, none exist, yet) +# YNL_GEN_PROGS: TEST_GEN_PROGS which need YNL # YNL_GEN_FILES: TEST_GEN_FILES which need YNL -YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) +YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES)) \ + $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_PROGS)) +YNL_SPECS := \ + $(patsubst %,$(top_srcdir)/Documentation/netlink/specs/%.yaml,$(YNL_GENS)) $(YNL_OUTPUTS): $(OUTPUT)/libynl.a $(YNL_OUTPUTS): CFLAGS += \ @@ -16,6 +19,22 @@ $(YNL_OUTPUTS): CFLAGS += \ -I$(top_srcdir)/tools/net/ynl/lib/ \ -I$(top_srcdir)/tools/net/ynl/generated/ -$(OUTPUT)/libynl.a: - $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a +# Make sure we rebuild libynl if user added a new family. We can't easily +# depend on the contents of a variable so create a fake file with a hash. +YNL_GENS_HASH := $(shell echo $(YNL_GENS) | sha1sum | cut -c1-8) +$(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig: + $(Q)rm -f $(OUTPUT)/.libynl-*.sig + $(Q)touch $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig + +$(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig + $(Q)rm -f $(top_srcdir)/tools/net/ynl/libynl.a + $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl \ + GENS="$(YNL_GENS)" RSTS="" libynl.a $(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a + +EXTRA_CLEAN += \ + $(top_srcdir)/tools/net/ynl/pyynl/__pycache__ \ + $(top_srcdir)/tools/net/ynl/pyynl/lib/__pycache__ \ + $(top_srcdir)/tools/net/ynl/lib/*.[ado] \ + $(OUTPUT)/.libynl-*.sig \ + $(OUTPUT)/libynl.a |
