summaryrefslogtreecommitdiff
path: root/tools/testing/selftests/net
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests/net')
-rw-r--r--tools/testing/selftests/net/.gitignore1
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh29
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh88
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_ets.sh9
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_tests.sh10
-rwxr-xr-xtools/testing/selftests/net/forwarding/skbedit_priority.sh163
-rw-r--r--tools/testing/selftests/net/forwarding/tc_common.sh32
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c45
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh24
-rw-r--r--tools/testing/selftests/net/reuseaddr_ports_exhausted.c162
-rwxr-xr-xtools/testing/selftests/net/reuseaddr_ports_exhausted.sh35
12 files changed, 540 insertions, 60 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index ecc52d4c034d..91f9aea853b1 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -23,3 +23,4 @@ so_txtime
tcp_fastopen_backup_key
nettest
fin_ack_lat
+reuseaddr_ports_exhausted \ No newline at end of file
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 4c1bd03ffa1c..24d8424010eb 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -14,6 +14,7 @@ TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh
TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh
TEST_PROGS += route_localnet.sh
+TEST_PROGS += reuseaddr_ports_exhausted.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -24,6 +25,7 @@ TEST_GEN_FILES += tcp_fastopen_backup_key
TEST_GEN_FILES += fin_ack_lat
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+TEST_GEN_FILES += reuseaddr_ports_exhausted
KSFT_KHDR_INSTALL := 1
include ../lib.mk
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 40b076983239..0df6d8942721 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -35,6 +35,12 @@ if [ $? -ne 0 ]; then
exit 1
fi
+devlink dev help 2>&1 | grep info &> /dev/null
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing devlink dev info support"
+ exit 1
+fi
+
##############################################################################
# Devlink helpers
@@ -373,6 +379,7 @@ devlink_trap_drop_test()
local trap_name=$1; shift
local group_name=$1; shift
local dev=$1; shift
+ local handle=$1; shift
# This is the common part of all the tests. It checks that stats are
# initially idle, then non-idle after changing the trap action and
@@ -397,7 +404,7 @@ devlink_trap_drop_test()
devlink_trap_group_stats_idle_test $group_name
check_err $? "Trap group stats not idle after setting action to drop"
- tc_check_packets "dev $dev egress" 101 0
+ tc_check_packets "dev $dev egress" $handle 0
check_err $? "Packets were not dropped"
}
@@ -406,7 +413,25 @@ devlink_trap_drop_cleanup()
local mz_pid=$1; shift
local dev=$1; shift
local proto=$1; shift
+ local pref=$1; shift
+ local handle=$1; shift
kill $mz_pid && wait $mz_pid &> /dev/null
- tc filter del dev $dev egress protocol $proto pref 1 handle 101 flower
+ tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower
+}
+
+devlink_port_by_netdev()
+{
+ local if_name=$1
+
+ devlink -j port show $if_name | jq -e '.[] | keys' | jq -r '.[]'
+}
+
+devlink_cpu_port_get()
+{
+ local cpu_dl_port_num=$(devlink port list | grep "$DEVLINK_DEV" |
+ grep cpu | cut -d/ -f3 | cut -d: -f1 |
+ sed -n '1p')
+
+ echo "$DEVLINK_DEV/$cpu_dl_port_num"
}
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 2f5da414aaa7..977fc2b326a2 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -60,6 +60,15 @@ check_tc_chain_support()
fi
}
+check_tc_action_hw_stats_support()
+{
+ tc actions help 2>&1 | grep -q hw_stats
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
+ exit 1
+ fi
+}
+
if [[ "$(id -u)" -ne 0 ]]; then
echo "SKIP: need root privileges"
exit 0
@@ -248,13 +257,40 @@ busywait()
done
}
+not()
+{
+ "$@"
+ [[ $? != 0 ]]
+}
+
+grep_bridge_fdb()
+{
+ local addr=$1; shift
+ local word
+ local flag
+
+ if [ "$1" == "self" ] || [ "$1" == "master" ]; then
+ word=$1; shift
+ if [ "$1" == "-v" ]; then
+ flag=$1; shift
+ fi
+ fi
+
+ $@ | grep $addr | grep $flag "$word"
+}
+
+wait_for_offload()
+{
+ "$@" | grep -q offload
+}
+
until_counter_is()
{
- local value=$1; shift
+ local expr=$1; shift
local current=$("$@")
echo $((current))
- ((current >= value))
+ ((current $expr))
}
busywait_for_counter()
@@ -263,7 +299,7 @@ busywait_for_counter()
local delta=$1; shift
local base=$("$@")
- busywait "$timeout" until_counter_is $((base + delta)) "$@"
+ busywait "$timeout" until_counter_is ">= $((base + delta))" "$@"
}
setup_wait_dev()
@@ -599,6 +635,17 @@ tc_rule_stats_get()
| jq ".[1].options.actions[].stats$selector"
}
+tc_rule_handle_stats_get()
+{
+ local id=$1; shift
+ local handle=$1; shift
+ local selector=${1:-.packets}; shift
+
+ tc -j -s filter show $id \
+ | jq ".[] | select(.options.handle == $handle) | \
+ .options.actions[0].stats$selector"
+}
+
ethtool_stats_get()
{
local dev=$1; shift
@@ -607,6 +654,26 @@ ethtool_stats_get()
ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
}
+qdisc_stats_get()
+{
+ local dev=$1; shift
+ local handle=$1; shift
+ local selector=$1; shift
+
+ tc -j -s qdisc show dev "$dev" \
+ | jq '.[] | select(.handle == "'"$handle"'") | '"$selector"
+}
+
+qdisc_parent_stats_get()
+{
+ local dev=$1; shift
+ local parent=$1; shift
+ local selector=$1; shift
+
+ tc -j -s qdisc show dev "$dev" invisible \
+ | jq '.[] | select(.parent == "'"$parent"'") | '"$selector"
+}
+
humanize()
{
local speed=$1; shift
@@ -1132,18 +1199,29 @@ flood_test()
flood_multicast_test $br_port $host1_if $host2_if
}
-start_traffic()
+__start_traffic()
{
+ local proto=$1; shift
local h_in=$1; shift # Where the traffic egresses the host
local sip=$1; shift
local dip=$1; shift
local dmac=$1; shift
$MZ $h_in -p 8000 -A $sip -B $dip -c 0 \
- -a own -b $dmac -t udp -q &
+ -a own -b $dmac -t "$proto" -q "$@" &
sleep 1
}
+start_traffic()
+{
+ __start_traffic udp "$@"
+}
+
+start_tcp_traffic()
+{
+ __start_traffic tcp "$@"
+}
+
stop_traffic()
{
# Suppress noise from killing mausezahn.
diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh
index 40e0ad1bc4f2..e60c8b4818cc 100755
--- a/tools/testing/selftests/net/forwarding/sch_ets.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets.sh
@@ -34,11 +34,14 @@ switch_destroy()
}
# Callback from sch_ets_tests.sh
-get_stats()
+collect_stats()
{
- local stream=$1; shift
+ local -a streams=("$@")
+ local stream
- link_stats_get $h2.1$stream rx bytes
+ for stream in ${streams[@]}; do
+ qdisc_parent_stats_get $swp2 10:$((stream + 1)) .bytes
+ done
}
ets_run
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
index 3c3b204d47e8..cdf689e99458 100644
--- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
@@ -2,7 +2,7 @@
# Global interface:
# $put -- port under test (e.g. $swp2)
-# get_stats($band) -- A function to collect stats for band
+# collect_stats($streams...) -- A function to get stats for individual streams
# ets_start_traffic($band) -- Start traffic for this band
# ets_change_qdisc($op, $dev, $nstrict, $quanta...) -- Add or change qdisc
@@ -94,15 +94,11 @@ __ets_dwrr_test()
sleep 10
- t0=($(for stream in ${streams[@]}; do
- get_stats $stream
- done))
+ t0=($(collect_stats "${streams[@]}"))
sleep 10
- t1=($(for stream in ${streams[@]}; do
- get_stats $stream
- done))
+ t1=($(collect_stats "${streams[@]}"))
d=($(for ((i = 0; i < ${#streams[@]}; i++)); do
echo $((${t1[$i]} - ${t0[$i]}))
done))
diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
new file mode 100755
index 000000000000..0e7693297765
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
@@ -0,0 +1,163 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on
+# egress of $swp2, the traffic is acted upon by an action skbedit priority. The
+# new priority should be taken into account when classifying traffic on the PRIO
+# qdisc at $swp2. The test verifies that for different priority values, the
+# traffic ends up in expected PRIO band.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +----|-----------------+ +----------------|-----+
+# | |
+# +----|----------------------------------------------------------------|-----+
+# | SW | | |
+# | +-|----------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | | PRIO | |
+# | +--------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ test_ingress
+ test_egress
+"
+
+NUM_NETIFS=4
+source lib.sh
+
+: ${HIT_TIMEOUT:=2000} # ms
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+switch_create()
+{
+ ip link add name br1 up type bridge vlan_filtering 1
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+ tc qdisc add dev $swp2 root handle 10: \
+ prio bands 8 priomap 7 6 5 4 3 2 1 0
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 root
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+test_skbedit_priority_one()
+{
+ local locus=$1; shift
+ local prio=$1; shift
+ local classid=$1; shift
+
+ RET=0
+
+ tc filter add $locus handle 101 pref 1 \
+ flower action skbedit priority $prio
+
+ local pkt0=$(qdisc_parent_stats_get $swp2 $classid .packets)
+ $MZ $h1 -t udp "sp=54321,dp=12345" -c 10 -d 20msec -p 100 \
+ -a own -b $h2mac -A 192.0.2.1 -B 192.0.2.2 -q
+ local pkt1
+ pkt1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((pkt0 + 10))" \
+ qdisc_parent_stats_get $swp2 $classid .packets)
+
+ check_err $? "Expected to get 10 packets on class $classid, but got
+$((pkt1 - pkt0))."
+ log_test "$locus skbedit priority $prio -> classid $classid"
+
+ tc filter del $locus pref 1
+}
+
+test_ingress()
+{
+ local prio
+
+ for prio in {0..7}; do
+ test_skbedit_priority_one "dev $swp1 ingress" \
+ $prio 10:$((8 - prio))
+ done
+}
+
+test_egress()
+{
+ local prio
+
+ for prio in {0..7}; do
+ test_skbedit_priority_one "dev $swp2 egress" \
+ $prio 10:$((8 - prio))
+ done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
index 64f652633585..0e18e8be6e2a 100644
--- a/tools/testing/selftests/net/forwarding/tc_common.sh
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -6,39 +6,14 @@ CHECK_TC="yes"
# Can be overridden by the configuration file. See lib.sh
TC_HIT_TIMEOUT=${TC_HIT_TIMEOUT:=1000} # ms
-__tc_check_packets()
-{
- local id=$1
- local handle=$2
- local count=$3
- local operator=$4
-
- start_time="$(date -u +%s%3N)"
- while true
- do
- cmd_jq "tc -j -s filter show $id" \
- ".[] | select(.options.handle == $handle) | \
- select(.options.actions[0].stats.packets $operator $count)" \
- &> /dev/null
- ret=$?
- if [[ $ret -eq 0 ]]; then
- return $ret
- fi
- current_time="$(date -u +%s%3N)"
- diff=$(expr $current_time - $start_time)
- if [ "$diff" -gt "$TC_HIT_TIMEOUT" ]; then
- return 1
- fi
- done
-}
-
tc_check_packets()
{
local id=$1
local handle=$2
local count=$3
- __tc_check_packets "$id" "$handle" "$count" "=="
+ busywait "$TC_HIT_TIMEOUT" until_counter_is "== $count" \
+ tc_rule_handle_stats_get "$id" "$handle" > /dev/null
}
tc_check_packets_hitting()
@@ -46,5 +21,6 @@ tc_check_packets_hitting()
local id=$1
local handle=$2
- __tc_check_packets "$id" "$handle" 0 ">"
+ busywait "$TC_HIT_TIMEOUT" until_counter_is "> 0" \
+ tc_rule_handle_stats_get "$id" "$handle" > /dev/null
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 99579c0223c1..702bab2c12da 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -34,8 +34,8 @@ extern int optind;
#define TCP_ULP 31
#endif
+static int poll_timeout = 10 * 1000;
static bool listen_mode;
-static int poll_timeout;
enum cfg_mode {
CFG_MODE_POLL,
@@ -50,11 +50,20 @@ static int cfg_sock_proto = IPPROTO_MPTCP;
static bool tcpulp_audit;
static int pf = AF_INET;
static int cfg_sndbuf;
+static int cfg_rcvbuf;
static void die_usage(void)
{
- fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] -m mode]"
- "[ -l ] [ -t timeout ] connect_address\n");
+ fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
+ "[-l] connect_address\n");
+ fprintf(stderr, "\t-6 use ipv6\n");
+ fprintf(stderr, "\t-t num -- set poll timeout to num\n");
+ fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
+ fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
+ fprintf(stderr, "\t-p num -- use port num\n");
+ fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n");
+ fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n");
+ fprintf(stderr, "\t-u -- check mptcp ulp\n");
exit(1);
}
@@ -97,6 +106,17 @@ static void xgetaddrinfo(const char *node, const char *service,
}
}
+static void set_rcvbuf(int fd, unsigned int size)
+{
+ int err;
+
+ err = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size));
+ if (err) {
+ perror("set SO_RCVBUF");
+ exit(1);
+ }
+}
+
static void set_sndbuf(int fd, unsigned int size)
{
int err;
@@ -704,6 +724,8 @@ int main_loop(void)
check_getpeername_connect(fd);
+ if (cfg_rcvbuf)
+ set_rcvbuf(fd, cfg_rcvbuf);
if (cfg_sndbuf)
set_sndbuf(fd, cfg_sndbuf);
@@ -745,7 +767,7 @@ int parse_mode(const char *mode)
return 0;
}
-int parse_sndbuf(const char *size)
+static int parse_int(const char *size)
{
unsigned long s;
@@ -765,16 +787,14 @@ int parse_sndbuf(const char *size)
die_usage();
}
- cfg_sndbuf = s;
-
- return 0;
+ return (int)s;
}
static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6lp:s:hut:m:b:")) != -1) {
+ while ((c = getopt(argc, argv, "6lp:s:hut:m:S:R:")) != -1) {
switch (c) {
case 'l':
listen_mode = true;
@@ -802,8 +822,11 @@ static void parse_opts(int argc, char **argv)
case 'm':
cfg_mode = parse_mode(optarg);
break;
- case 'b':
- cfg_sndbuf = parse_sndbuf(optarg);
+ case 'S':
+ cfg_sndbuf = parse_int(optarg);
+ break;
+ case 'R':
+ cfg_rcvbuf = parse_int(optarg);
break;
}
}
@@ -831,6 +854,8 @@ int main(int argc, char *argv[])
if (fd < 0)
return 1;
+ if (cfg_rcvbuf)
+ set_rcvbuf(fd, cfg_rcvbuf);
if (cfg_sndbuf)
set_sndbuf(fd, cfg_sndbuf);
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index d573a0feb98d..acf02e156d20 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -3,7 +3,7 @@
time_start=$(date +%s)
-optstring="b:d:e:l:r:h4cm:"
+optstring="S:R:d:e:l:r:h4cm:"
ret=0
sin=""
sout=""
@@ -19,6 +19,7 @@ tc_loss=$((RANDOM%101))
tc_reorder=""
testmode=""
sndbuf=0
+rcvbuf=0
options_log=true
if [ $tc_loss -eq 100 ];then
@@ -39,7 +40,8 @@ usage() {
echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)"
echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
- echo -e "\t-b: set sndbuf value (default: use kernel default)"
+ echo -e "\t-S: set sndbuf value (default: use kernel default)"
+ echo -e "\t-R: set rcvbuf value (default: use kernel default)"
echo -e "\t-m: test mode (poll, sendfile; default: poll)"
}
@@ -73,11 +75,19 @@ while getopts "$optstring" option;do
"c")
capture=true
;;
- "b")
+ "S")
if [ $OPTARG -ge 0 ];then
sndbuf="$OPTARG"
else
- echo "-s requires numeric argument, got \"$OPTARG\"" 1>&2
+ echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2
+ exit 1
+ fi
+ ;;
+ "R")
+ if [ $OPTARG -ge 0 ];then
+ rcvbuf="$OPTARG"
+ else
+ echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2
exit 1
fi
;;
@@ -342,8 +352,12 @@ do_transfer()
port=$((10000+$TEST_COUNT))
TEST_COUNT=$((TEST_COUNT+1))
+ if [ "$rcvbuf" -gt 0 ]; then
+ extra_args="$extra_args -R $rcvbuf"
+ fi
+
if [ "$sndbuf" -gt 0 ]; then
- extra_args="$extra_args -b $sndbuf"
+ extra_args="$extra_args -S $sndbuf"
fi
if [ -n "$testmode" ]; then
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
new file mode 100644
index 000000000000..7b01b7c2ec10
--- /dev/null
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Check if we can fully utilize 4-tuples for connect().
+ *
+ * Rules to bind sockets to the same port when all ephemeral ports are
+ * exhausted.
+ *
+ * 1. if there are TCP_LISTEN sockets on the port, fail to bind.
+ * 2. if there are sockets without SO_REUSEADDR, fail to bind.
+ * 3. if SO_REUSEADDR is disabled, fail to bind.
+ * 4. if SO_REUSEADDR is enabled and SO_REUSEPORT is disabled,
+ * succeed to bind.
+ * 5. if SO_REUSEADDR and SO_REUSEPORT are enabled and
+ * there is no socket having the both options and the same EUID,
+ * succeed to bind.
+ * 6. fail to bind.
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+#include <arpa/inet.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include "../kselftest_harness.h"
+
+struct reuse_opts {
+ int reuseaddr[2];
+ int reuseport[2];
+};
+
+struct reuse_opts unreusable_opts[12] = {
+ {0, 0, 0, 0},
+ {0, 0, 0, 1},
+ {0, 0, 1, 0},
+ {0, 0, 1, 1},
+ {0, 1, 0, 0},
+ {0, 1, 0, 1},
+ {0, 1, 1, 0},
+ {0, 1, 1, 1},
+ {1, 0, 0, 0},
+ {1, 0, 0, 1},
+ {1, 0, 1, 0},
+ {1, 0, 1, 1},
+};
+
+struct reuse_opts reusable_opts[4] = {
+ {1, 1, 0, 0},
+ {1, 1, 0, 1},
+ {1, 1, 1, 0},
+ {1, 1, 1, 1},
+};
+
+int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport)
+{
+ struct sockaddr_in local_addr;
+ int len = sizeof(local_addr);
+ int fd, ret;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_NE(-1, fd) TH_LOG("failed to open socket.");
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, sizeof(int));
+ ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEADDR.");
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &reuseport, sizeof(int));
+ ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEPORT.");
+
+ local_addr.sin_family = AF_INET;
+ local_addr.sin_addr.s_addr = inet_addr("127.0.0.1");
+ local_addr.sin_port = 0;
+
+ if (bind(fd, (struct sockaddr *)&local_addr, len) == -1) {
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+TEST(reuseaddr_ports_exhausted_unreusable)
+{
+ struct reuse_opts *opts;
+ int i, j, fd[2];
+
+ for (i = 0; i < 12; i++) {
+ opts = &unreusable_opts[i];
+
+ for (j = 0; j < 2; j++)
+ fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]);
+
+ ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
+ EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind.");
+
+ for (j = 0; j < 2; j++)
+ if (fd[j] != -1)
+ close(fd[j]);
+ }
+}
+
+TEST(reuseaddr_ports_exhausted_reusable_same_euid)
+{
+ struct reuse_opts *opts;
+ int i, j, fd[2];
+
+ for (i = 0; i < 4; i++) {
+ opts = &reusable_opts[i];
+
+ for (j = 0; j < 2; j++)
+ fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]);
+
+ ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
+
+ if (opts->reuseport[0] && opts->reuseport[1]) {
+ EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened.");
+ } else {
+ EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations.");
+ }
+
+ for (j = 0; j < 2; j++)
+ if (fd[j] != -1)
+ close(fd[j]);
+ }
+}
+
+TEST(reuseaddr_ports_exhausted_reusable_different_euid)
+{
+ struct reuse_opts *opts;
+ int i, j, ret, fd[2];
+ uid_t euid[2] = {10, 20};
+
+ for (i = 0; i < 4; i++) {
+ opts = &reusable_opts[i];
+
+ for (j = 0; j < 2; j++) {
+ ret = seteuid(euid[j]);
+ ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: %d.", euid[j]);
+
+ fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]);
+
+ ret = seteuid(0);
+ ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: 0.");
+ }
+
+ ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind.");
+ EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind because one socket can be bound in each euid.");
+
+ if (fd[1] != -1) {
+ ret = listen(fd[0], 5);
+ ASSERT_EQ(0, ret) TH_LOG("failed to listen.");
+
+ ret = listen(fd[1], 5);
+ EXPECT_EQ(-1, ret) TH_LOG("should fail to listen because only one uid reserves the port in TCP_LISTEN.");
+ }
+
+ for (j = 0; j < 2; j++)
+ if (fd[j] != -1)
+ close(fd[j]);
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh b/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh
new file mode 100755
index 000000000000..20e3a2913d06
--- /dev/null
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run tests when all ephemeral ports are exhausted.
+#
+# Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+
+set +x
+set -e
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+setup() {
+ ip netns add "${NETNS}"
+ ip -netns "${NETNS}" link set lo up
+ ip netns exec "${NETNS}" \
+ sysctl -w net.ipv4.ip_local_port_range="32768 32768" \
+ > /dev/null 2>&1
+ ip netns exec "${NETNS}" \
+ sysctl -w net.ipv4.ip_autobind_reuse=1 > /dev/null 2>&1
+}
+
+cleanup() {
+ ip netns del "${NETNS}"
+}
+
+trap cleanup EXIT
+setup
+
+do_test() {
+ ip netns exec "${NETNS}" ./reuseaddr_ports_exhausted
+}
+
+do_test
+echo "tests done"