diff options
Diffstat (limited to 'tools/testing/selftests/net/packetdrill')
71 files changed, 3219 insertions, 0 deletions
diff --git a/tools/testing/selftests/net/packetdrill/Makefile b/tools/testing/selftests/net/packetdrill/Makefile new file mode 100644 index 000000000000..31cfb666ba8b --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 + +TEST_INCLUDES := ksft_runner.sh \ + defaults.sh \ + set_sysctls.py \ + ../../kselftest/ktap_helpers.sh + +TEST_PROGS := $(wildcard *.pkt) + +include ../../lib.mk diff --git a/tools/testing/selftests/net/packetdrill/config b/tools/testing/selftests/net/packetdrill/config new file mode 100644 index 000000000000..0237ed98f3c0 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/config @@ -0,0 +1,11 @@ +CONFIG_IPV6=y +CONFIG_HZ_1000=y +CONFIG_HZ=1000 +CONFIG_NET_NS=y +CONFIG_NET_SCH_FIFO=y +CONFIG_NET_SCH_FQ=y +CONFIG_PROC_SYSCTL=y +CONFIG_SYN_COOKIES=y +CONFIG_TCP_CONG_CUBIC=y +CONFIG_TCP_MD5SIG=y +CONFIG_TUN=y diff --git a/tools/testing/selftests/net/packetdrill/defaults.sh b/tools/testing/selftests/net/packetdrill/defaults.sh new file mode 100755 index 000000000000..1095a7b22f44 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/defaults.sh @@ -0,0 +1,63 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Set standard production config values that relate to TCP behavior. + +# Flush old cached data (fastopen cookies). +ip tcp_metrics flush all > /dev/null 2>&1 + +# TCP min, default, and max receive and send buffer sizes. +sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))" +sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304" + +# TCP timestamps. +sysctl -q net.ipv4.tcp_timestamps=1 + +# TCP SYN(ACK) retry thresholds +sysctl -q net.ipv4.tcp_syn_retries=5 +sysctl -q net.ipv4.tcp_synack_retries=5 + +# TCP Forward RTO-Recovery, RFC 5682. +sysctl -q net.ipv4.tcp_frto=2 + +# TCP Selective Acknowledgements (SACK) +sysctl -q net.ipv4.tcp_sack=1 + +# TCP Duplicate Selective Acknowledgements (DSACK) +sysctl -q net.ipv4.tcp_dsack=1 + +# TCP FACK (Forward Acknowldgement) +sysctl -q net.ipv4.tcp_fack=0 + +# TCP reordering degree ("dupthresh" threshold for entering Fast Recovery). +sysctl -q net.ipv4.tcp_reordering=3 + +# TCP congestion control. +sysctl -q net.ipv4.tcp_congestion_control=cubic + +# TCP slow start after idle. +sysctl -q net.ipv4.tcp_slow_start_after_idle=0 + +# TCP RACK and TLP. +sysctl -q net.ipv4.tcp_early_retrans=4 net.ipv4.tcp_recovery=1 + +# TCP method for deciding when to defer sending to accumulate big TSO packets. +sysctl -q net.ipv4.tcp_tso_win_divisor=3 + +# TCP Explicit Congestion Notification (ECN) +sysctl -q net.ipv4.tcp_ecn=0 + +sysctl -q net.ipv4.tcp_pacing_ss_ratio=200 +sysctl -q net.ipv4.tcp_pacing_ca_ratio=120 +sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1 + +sysctl -q net.ipv4.tcp_fastopen=0x70403 +sysctl -q net.ipv4.tcp_fastopen_key=a1a1a1a1-b2b2b2b2-c3c3c3c3-d4d4d4d4 + +sysctl -q net.ipv4.tcp_syncookies=1 + +# Override the default qdisc on the tun device. +# Many tests fail with timing errors if the default +# is FQ and that paces their flows. +tc qdisc add dev tun0 root pfifo + diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh new file mode 100755 index 000000000000..ef8b25a606d8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -0,0 +1,62 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh" + +readonly ipv4_args=('--ip_version=ipv4 ' + '--local_ip=192.168.0.1 ' + '--gateway_ip=192.168.0.1 ' + '--netmask_ip=255.255.0.0 ' + '--remote_ip=192.0.2.1 ' + '-D CMSG_LEVEL_IP=SOL_IP ' + '-D CMSG_TYPE_RECVERR=IP_RECVERR ') + +readonly ipv6_args=('--ip_version=ipv6 ' + '--mtu=1520 ' + '--local_ip=fd3d:0a0b:17d6::1 ' + '--gateway_ip=fd3d:0a0b:17d6:8888::1 ' + '--remote_ip=fd3d:fa7b:d17d::1 ' + '-D CMSG_LEVEL_IP=SOL_IPV6 ' + '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ') + +if [ $# -ne 1 ]; then + ktap_exit_fail_msg "usage: $0 <script>" + exit "$KSFT_FAIL" +fi +script="$(basename $1)" + +if [ -z "$(which packetdrill)" ]; then + ktap_skip_all "packetdrill not found in PATH" + exit "$KSFT_SKIP" +fi + +declare -a optargs +failfunc=ktap_test_fail + +if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then + optargs+=('--tolerance_usecs=14000') + + # xfail tests that are known flaky with dbg config, not fixable. + # still run them for coverage (and expect 100% pass without dbg). + declare -ar xfail_list=( + "tcp_eor_no-coalesce-retrans.pkt" + "tcp_fast_recovery_prr-ss.*.pkt" + "tcp_slow_start_slow-start-after-win-update.pkt" + "tcp_timestamping.*.pkt" + "tcp_user_timeout_user-timeout-probe.pkt" + "tcp_zerocopy_epoll_.*.pkt" + "tcp_tcp_info_tcp-info-.*-limited.pkt" + ) + readonly xfail_regex="^($(printf '%s|' "${xfail_list[@]}"))$" + [[ "$script" =~ ${xfail_regex} ]] && failfunc=ktap_test_xfail +fi + +ktap_print_header +ktap_set_plan 2 + +unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $script > /dev/null \ + && ktap_test_pass "ipv4" || $failfunc "ipv4" +unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $script > /dev/null \ + && ktap_test_pass "ipv6" || $failfunc "ipv6" + +ktap_finished diff --git a/tools/testing/selftests/net/packetdrill/set_sysctls.py b/tools/testing/selftests/net/packetdrill/set_sysctls.py new file mode 100755 index 000000000000..5ddf456ae973 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/set_sysctls.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Sets sysctl values and writes a file that restores them. + +The arguments are of the form "<proc-file>=<val>" separated by spaces. +The program first reads the current value of the proc-file and creates +a shell script named "/tmp/sysctl_restore_${PACKETDRILL_PID}.sh" which +restores the values when executed. It then sets the new values. + +PACKETDRILL_PID is set by packetdrill to the pid of itself, so a .pkt +file could restore sysctls by running `/tmp/sysctl_restore_${PPID}.sh` +at the end. +""" + +import os +import subprocess +import sys + +filename = '/tmp/sysctl_restore_%s.sh' % os.environ['PACKETDRILL_PID'] + +# Open file for restoring sysctl values +restore_file = open(filename, 'w') +print('#!/bin/bash', file=restore_file) + +for a in sys.argv[1:]: + sysctl = a.split('=') + # sysctl[0] contains the proc-file name, sysctl[1] the new value + + # read current value and add restore command to file + cur_val = subprocess.check_output(['cat', sysctl[0]], universal_newlines=True) + print('echo "%s" > %s' % (cur_val.strip(), sysctl[0]), file=restore_file) + + # set new value + cmd = 'echo "%s" > %s' % (sysctl[1], sysctl[0]) + os.system(cmd) + +os.system('chmod u+x %s' % filename) diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt new file mode 100644 index 000000000000..38535701656e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking accept. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0...0.200 accept(3, ..., ...) = 4 + + +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + + +.1 write(4, ..., 2000) = 2000 + +0 > P. 1:2001(2000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt new file mode 100644 index 000000000000..3692ef102381 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking connect. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + + +.1...0.200 connect(3, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.1 < S. 0:0(0) ack 1 win 5792 <mss 1460,nop,wscale 2,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt new file mode 100644 index 000000000000..914eabab367a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking read. +--tolerance_usecs=10000 + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + + +0...0.100 read(4, ..., 2000) = 2000 + +.1 < P. 1:2001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 2001 + + +.1...0.200 read(4, ..., 2000) = 2000 + +.1 < P. 2001:4001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 + + +.1 < P. 4001:6001(2000) ack 1 win 257 + +0 > . 1:1(0) ack 6001 + +0...0.000 read(4, ..., 1000) = 1000 + +0...0.000 read(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt new file mode 100644 index 000000000000..cec5a0725d95 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test for blocking write. +--tolerance_usecs=10000 + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_min_tso_segs=10 +` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 50000 <mss 1000,nop,wscale 0> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 50000 + +0 accept(3, ..., ...) = 4 + +// Kernel doubles our value -> sk->sk_sndbuf is set to 42000 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [21000], 4) = 0 + +0 getsockopt(4, SOL_SOCKET, SO_SNDBUF, [42000], [4]) = 0 + +// A write of 60000 does not block. + +0...0.300 write(4, ..., 61000) = 61000 // this write() blocks + + +.1 < . 1:1(0) ack 10001 win 50000 + + +.1 < . 1:1(0) ack 30001 win 50000 + +// This ACK should wakeup the write(). An ACK of 35001 does not. + +.1 < . 1:1(0) ack 36001 win 50000 + +// Reset to sysctls defaults. +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt new file mode 100644 index 000000000000..8514d6bdbb6d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test basic connection teardown where local process closes first: +// the local process calls close() first, so we send a FIN, and receive an ACK. +// Then we receive a FIN and ACK it. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +.01...0.011 connect(3, ..., ...) = 0 + +0 > S 0:0(0) <...> + +0 < S. 0:0(0) ack 1 win 32768 <mss 1000,nop,wscale 6,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + + +0 write(3, ..., 1000) = 1000 + +0 > P. 1:1001(1000) ack 1 + +0 < . 1:1(0) ack 1001 win 257 + + +0 close(3) = 0 + +0 > F. 1001:1001(0) ack 1 + +0 < . 1:1(0) ack 1002 win 257 + + +0 < F. 1:1(0) ack 1002 win 257 + +0 > . 1002:1002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt new file mode 100644 index 000000000000..04103134bd99 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test to make sure no RST is being sent when close() +// is called on a socket with SYN_SENT state. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <...> + +// Application decideds to close the socket in SYN_SENT state +// Make sure no RST is sent after close(). + +0 close(3) = 0 + +// Receive syn-ack to trigger the send side packet examination: +// If a RESET were sent right after close(), it would have failed with +// a mismatched timestamp. + +.1 < S. 0:0(0) ack 1 win 32000 <mss 1460,nop,wscale 7> + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt new file mode 100644 index 000000000000..5f3a2914213a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify behavior for the sequence: remote side sends FIN, then we close(). +// Since the remote side (client) closes first, we test our LAST_ACK code path. + +`./defaults.sh` + +// Initialize a server socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + +// Client closes first. + +.01 < F. 1:1(0) ack 1 win 257 + +0 > . 1:1(0) ack 2 + +// App notices that client closed. + +0 read(4, ..., 1000) = 0 + +// Then we close. + +.01 close(4) = 0 + +0 > F. 1:1(0) ack 2 + +// Client ACKs our FIN. + +.01 < . 2:2(0) ack 2 win 257 + +// Verify that we send RST in response to any incoming segments +// (because the kernel no longer has any record of this socket). + +.01 < . 2:2(0) ack 2 win 257 + +0 > R 2:2(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt new file mode 100644 index 000000000000..643baf3267cf --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test ECN: verify that Linux TCP ECN sending code uses ECT0 (not ECT1). +// +`./defaults.sh +sysctl -q net.ipv4.tcp_ecn=1 # fully enabled +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +// ECN handshake: send EW flags in SYN packet, E flag in SYN-ACK response ++.002 ... 0.004 connect(4, ..., ...) = 0 + + +0 > SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.002 < SE. 0:0(0) ack 1 win 32767 <mss 1000,nop,wscale 6,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + +// Write 1 MSS. ++.002 write(4, ..., 1000) = 1000 +// Send 1 MSS with ect0. + +0 > [ect0] P. 1:1001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt new file mode 100644 index 000000000000..f95b9b3c9fa1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. The large chunk itself should be packetized as +// usual. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write another 10040B chunk with no coalescing options. + +0 send(4, ..., 10400, MSG_EOR) = 10400 + +// Write a 2KB chunk. This chunk should not be appended to the packets created +// the previous chunk. + +0 write(4, ..., 2000) = 2000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:20801(10800) ack 1 ++.001 < . 1:1(0) ack 20801 win 514 +// This 2KB packet should be sent alone. + +0 > P. 20801:22801(2000) ack 1 ++.001 < . 1:1(0) ack 22801 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt new file mode 100644 index 000000000000..2ff66075288e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. Also, when packets are retransmitted, they +// will not be coalesce into the same skb. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write 10 400B chunks with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 +// The 9 remaining 400B chunks should be sent as individual packets. + +0 > P. 10801:11201(400) ack 1 + +0 > P. 11201:11601(400) ack 1 + +0 > P. 11601:12001(400) ack 1 + +0 > P. 12001:12401(400) ack 1 + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 + +0 > P. 13201:13601(400) ack 1 + +0 > P. 13601:14001(400) ack 1 + +0 > P. 14001:14401(400) ack 1 +// The last 10KB chunk should be sent separately. + +0 > P. 14401:24401(10000) ack 1 + ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 11201 win 514 ++.001 < . 1:1(0) ack 11601 win 514 ++.001 < . 1:1(0) ack 12001 win 514 <sack 13201:14401,nop,nop> +// TCP should fill the hole but no coalescing should happen, and all +// retransmissions should be sent out as individual packets. + +// Note : This is timeout based retransmit. +// Do not put +0 here or flakes will come back. ++.004~+.008 > P. 12001:12401(400) ack 1 + ++.001 < . 1:1(0) ack 12401 win 514 <sack 13201:14401,nop,nop> + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 ++.001 < . 1:1(0) ack 12801 win 514 <sack 13201:14401,nop,nop> ++.001 < . 1:1(0) ack 14401 win 514 ++.001 < . 1:1(0) ack 24401 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt new file mode 100644 index 000000000000..77039c5aac39 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write a 400B chunk with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 + +0 > P. 10801:20801(10000) ack 1 ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 20801 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt new file mode 100644 index 000000000000..dd5a06250595 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP does not append any data from consequent writes to the tail +// skb created for the chunk even though we have 10 back-to-back small +// writes. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on +// the tail. + +0 write(4, ..., 10400) = 10400 + +// Write 10 400B chunks with no coalescing options. + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 + +0 send(4, ..., 400, MSG_EOR) = 400 +// This chunk should not be appended to the skbs created for the previous chunk. + +0 write(4, ..., 10000) = 10000 + + +0 > P. 1:10001(10000) ack 1 ++.001 < . 1:1(0) ack 10001 win 514 +// Now we have enough room to send out the 2 x 400B packets out. + +0 > P. 10001:10801(800) ack 1 +// The 9 remaining 400B chunks should be sent as individual packets. + +0 > P. 10801:11201(400) ack 1 + +0 > P. 11201:11601(400) ack 1 + +0 > P. 11601:12001(400) ack 1 + +0 > P. 12001:12401(400) ack 1 + +0 > P. 12401:12801(400) ack 1 + +0 > P. 12801:13201(400) ack 1 + +0 > P. 13201:13601(400) ack 1 + +0 > P. 13601:14001(400) ack 1 + +0 > P. 14001:14401(400) ack 1 +// The last 10KB chunk should be sent separately. + +0 > P. 14401:24401(10000) ack 1 + ++.001 < . 1:1(0) ack 10401 win 514 ++.001 < . 1:1(0) ack 10801 win 514 ++.001 < . 1:1(0) ack 11201 win 514 ++.001 < . 1:1(0) ack 11601 win 514 ++.001 < . 1:1(0) ack 12001 win 514 ++.001 < . 1:1(0) ack 12401 win 514 ++.001 < . 1:1(0) ack 12801 win 514 ++.001 < . 1:1(0) ack 13201 win 514 ++.001 < . 1:1(0) ack 13601 win 514 ++.001 < . 1:1(0) ack 14001 win 514 ++.001 < . 1:1(0) ack 14401 win 514 ++.001 < . 1:1(0) ack 24401 win 514 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt new file mode 100644 index 000000000000..0d3c8077e830 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. +// In this variant we test a simple case where in-flight == ssthresh +// all the way through recovery, so during fast recovery we send one segment +// for each segment SACKed/ACKed. + +// Set up config. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// RTT 100ms + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 10 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1:1001. + +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop> + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop> + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:4001,nop,nop> +// Enter fast recovery. + +0 > . 1:1001(1000) ack 1 + +.01 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd +assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh +}% + +// Write some more, which we will send 1 MSS at a time, +// as in-flight segments are SACKed or ACKed. + +.01 write(4, ..., 7000) = 7000 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:5001,nop,nop> + +0 > . 10001:11001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:6001,nop,nop> + +0 > . 11001:12001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:7001,nop,nop> + +0 > . 12001:13001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:8001,nop,nop> + +0 > . 13001:14001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:9001,nop,nop> + +0 > . 14001:15001(1000) ack 1 + + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:10001,nop,nop> + +0 > . 15001:16001(1000) ack 1 + + +.02 < . 1:1(0) ack 10001 win 320 + +0 > P. 16001:17001(1000) ack 1 +// Leave fast recovery. + +.01 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd +assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh +}% + + +.03 < . 1:1(0) ack 12001 win 320 + +.02 < . 1:1(0) ack 14001 win 320 + +.02 < . 1:1(0) ack 16001 win 320 + +.02 < . 1:1(0) ack 17001 win 320 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt new file mode 100644 index 000000000000..7842a10b6967 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. The sender sends 20 packets. Packet +// 1 to 4, and 11 to 16 are dropped. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write 20 data segments. + +0 write(4, ..., 20000) = 20000 + +0 > P. 1:10001(10000) ack 1 + +// Receive first DUPACK, entering PRR part + +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop> + +0 > . 10001:11001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop> + +0 > . 11001:12001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop> + +0 > . 1:1001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop> + +0 > . 2001:3001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop> + +0 > . 3001:4001(1000) ack 1 +// Enter PRR CRB ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:11001,nop,nop> + +0 > . 12001:13001(1000) ack 1 ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:12001,nop,nop> + +0 > . 13001:14001(1000) ack 1 +// Enter PRR slow start + +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:12001,nop,nop> + +0 > P. 14001:16001(2000) ack 1 ++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:12001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 > . 16001:17001(1000) ack 1 +// inflight reaches ssthresh, goes into packet conservation mode ++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:13001,nop,nop> + +0 > . 17001:18001(1000) ack 1 ++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:14001,nop,nop> + +0 > . 18001:19001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt new file mode 100644 index 000000000000..b66d7644c3b6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. The sender sends 20 packets. Packet +// 1 to 4 are lost. The sender writes another 10 packets. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 20 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1,2,3,4 + +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop> ++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop> + +0 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop> + +0 > . 1:1001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop> + +0 > . 2001:3001(1000) ack 1 + +0 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop> + +0 > . 3001:4001(1000) ack 1 + +// Receiver ACKs all data. + +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:10001,nop,nop> + +0 < . 1:1(0) ack 2001 win 320 <sack 4001:10001,nop,nop> + +0 < . 1:1(0) ack 3001 win 320 <sack 4001:10001,nop,nop> + +0 < . 1:1(0) ack 10001 win 320 + +// Writes another 10 packets, which the ssthresh*mss amount +// should be sent right away + +.01 write(4, ..., 10000) = 10000 + +0 > . 10001:17001(7000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt new file mode 100644 index 000000000000..8e87bfecabb5 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test PRR-slowstart implementation. +// In this variant we verify that the sender uses SACK info on an ACK +// below snd_una. + +// Set up config. +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 8> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// RTT 10ms + +.01 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Send 10 data segments. + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// Lost packet 1:1001,4001:5001,7001:8001. + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop> + +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop> + +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001 8001:9001,nop,nop> + +0 > . 1:1001(1000) ack 1 + ++.012 < . 1:1(0) ack 4001 win 320 <sack 8001:9001,nop,nop> + +0 > . 4001:7001(3000) ack 1 + + +0 write(4, ..., 10000) = 10000 + +// The following ACK was reordered - delayed so that it arrives with +// an ACK field below snd_una. Here we check that the newly-SACKed +// 2MSS at 5001:7001 cause us to send out 2 more MSS. ++.002 < . 1:1(0) ack 3001 win 320 <sack 5001:7001,nop,nop> + +0 > . 7001:8001(1000) ack 1 + +0 > . 10001:11001(1000) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt new file mode 100644 index 000000000000..df49c67645ac --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_client.pkt @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_INQ and TCP_CM_INQ on the client side. +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Connect to the server and enable TCP_INQ. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 setsockopt(3, SOL_TCP, TCP_INQ, [1], 4) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 5792 <mss 1460,sackOK,TS val 700 ecr 100,nop,wscale 7> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700> + +// Now we have 10K of data ready on the socket. + +0 < . 1:10001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 10001 <nop,nop,TS val 200 ecr 700> + +// We read 1K and we should have 9K ready to read. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 1000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=9000}]}, 0) = 1000 +// We read 9K and we should have no further data ready to read. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 9000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=0}]}, 0) = 9000 + +// Server sends more data and closes the connections. + +0 < F. 10001:20001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 20002 <nop,nop,TS val 200 ecr 700> + +// We read 10K and we should have one "fake" byte because the connection is +// closed. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{..., 10000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=1}]}, 0) = 10000 +// Now, receive EOF. + +0 read(3, ..., 2000) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt new file mode 100644 index 000000000000..04a5e2590c62 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_inq_server.pkt @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_INQ and TCP_CM_INQ on the server side. +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + +// Accept the connection and enable TCP_INQ. + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_TCP, TCP_INQ, [1], 4) = 0 + +// Now we have 10K of data ready on the socket. + +0 < . 1:10001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 10001 + +// We read 2K and we should have 8K ready to read. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 2000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=8000}]}, 0) = 2000 +// We read 8K and we should have no further data ready to read. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 8000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=0}]}, 0) = 8000 +// Client sends more data and closes the connections. + +0 < F. 10001:20001(10000) ack 1 win 514 + +0 > . 1:1(0) ack 20002 + +// We read 10K and we should have one "fake" byte because the connection is +// closed. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 10000}], + msg_flags=0, + msg_control=[{cmsg_level=SOL_TCP, + cmsg_type=TCP_CM_INQ, + cmsg_data=1}]}, 0) = 10000 +// Now, receive error. + +0 read(3, ..., 2000) = -1 ENOTCONN (Transport endpoint is not connected) diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt new file mode 100644 index 000000000000..96b01eb5b7a4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test RFC 3042 "Limited Transmit": "sending a new data segment in +// response to each of the first two duplicate acknowledgments that +// arrive at the sender". +// This variation tests a receiver that doesn't support SACK. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write some data, and send the initial congestion window. + +0 write(4, ..., 15000) = 15000 + +0 > P. 1:10001(10000) ack 1 + +// Limited transmit: on first dupack, send a new data segment. + +.11 < . 1:1(0) ack 1 win 320 + +0 > . 10001:11001(1000) ack 1 + +// Limited transmit: on second dupack, send a new data segment. + +.01 < . 1:1(0) ack 1 win 320 + +0 > . 11001:12001(1000) ack 1 + +// It turned out to be reordering, not loss. +// We have one packet newly acked (1001:3001 were DUP-ACK'd) +// So we revert state back to Open. Slow start cwnd from 10 to 11 +// and send 11 - 9 = 2 packets + +.01 < . 1:1(0) ack 3001 win 320 + +0 > P. 12001:14001(2000) ack 1 + + +.02 < . 1:1(0) ack 5001 win 320 + +0 > P. 14001:15001(1000) ack 1 + +// Client gradually ACKs all data. + +.02 < . 1:1(0) ack 7001 win 320 + +.02 < . 1:1(0) ack 9001 win 320 + +.02 < . 1:1(0) ack 11001 win 320 + +.02 < . 1:1(0) ack 13001 win 320 + +.02 < . 1:1(0) ack 15001 win 320 + +// Clean up. + +.17 close(4) = 0 + +0 > F. 15001:15001(0) ack 1 + +.1 < F. 1:1(0) ack 15002 win 257 + +0 > . 15002:15002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt new file mode 100644 index 000000000000..642da51ec3a4 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test RFC 3042 "Limited Transmit": "sending a new data segment in +// response to each of the first two duplicate acknowledgments that +// arrive at the sender". +// This variation tests a receiver that supports SACK. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 320 + +0 accept(3, ..., ...) = 4 + +// Write some data, and send the initial congestion window. + +0 write(4, ..., 15000) = 15000 + +0 > P. 1:10001(10000) ack 1 + +// Limited transmit: on first dupack, send a new data segment. + +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop> + +0 > . 10001:11001(1000) ack 1 + +// Limited transmit: on second dupack, send a new data segment. + +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop> + +0 > . 11001:12001(1000) ack 1 + +// It turned out to be reordering, not loss. + +.01 < . 1:1(0) ack 3001 win 320 + +0 > P. 12001:14001(2000) ack 1 + + +.02 < . 1:1(0) ack 5001 win 320 + +0 > P. 14001:15001(1000) ack 1 + +// Client gradually ACKs all data. + +.02 < . 1:1(0) ack 7001 win 320 + +.02 < . 1:1(0) ack 9001 win 320 + +.02 < . 1:1(0) ack 11001 win 320 + +.02 < . 1:1(0) ack 13001 win 320 + +.02 < . 1:1(0) ack 15001 win 320 + +// Clean up. + +.17 close(4) = 0 + +0 > F. 15001:15001(0) ack 1 + +.1 < F. 1:1(0) ack 15002 win 257 + +0 > . 15002:15002(0) ack 2 diff --git a/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt new file mode 100644 index 000000000000..25dfef95d3f8 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_md5_md5-only-on-client-ack.pkt @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test what happens when client does not provide MD5 on SYN, +// but then does on the ACK that completes the three-way handshake. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> +// Ooh, weird: client provides MD5 option on the ACK: + +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop> + +.01 < . 1:1(0) ack 1 win 514 <md5 000102030405060708090a0b0c0d0e0f,nop,nop> + +// The TCP listener refcount should be 2, but on buggy kernels it can be 0: + +0 `grep " 0A " /proc/net/tcp /proc/net/tcp6 | grep ":1F90"` + +// Now here comes the legit ACK: + +.01 < . 1:1(0) ack 1 win 514 + +// Make sure the connection is OK: + +0 accept(3, ..., ...) = 4 + + +.01 write(4, ..., 1000) = 1000 diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt new file mode 100644 index 000000000000..7adae7a9ef4a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +// This is a test inspired by an Android client app using SSL. This +// test verifies using TCP_NODELAY would save application latency +// (Perhaps even better with TCP_NAGLE). +// +`./defaults.sh +ethtool -K tun0 tso off gso off +./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + + +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < S. 0:0(0) ack 1 win 5792 <mss 974,nop,nop,sackOK,nop,wscale 7> + +0 > . 1:1(0) ack 1 + +// SSL handshake (resumed session) + +0 write(4, ..., 517) = 517 + +0 > P. 1:518(517) ack 1 + +.1 < . 1:1(0) ack 518 win 229 + + +0 < P. 1:144(143) ack 1 win 229 + +0 > . 518:518(0) ack 144 + +0 read(4, ..., 1000) = 143 + +// Application POST header (51B) and body (2002B) + +0 write(4, ..., 51) = 51 + +0 > P. 518:569(51) ack 144 + +.03 write(4, ..., 2002) = 2002 + +0 > . 569:1543(974) ack 144 + +0 > P. 1543:2517(974) ack 144 +// Without disabling Nagle, this packet will not happen until the remote ACK. + +0 > P. 2517:2571(54) ack 144 + + +.1 < . 1:1(0) ack 2571 win 229 + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt new file mode 100644 index 000000000000..fa9c01813996 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test the MSG_MORE flag will correctly corks the tiny writes +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 +// Disable Nagle by default on this socket. + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +// Test the basic case: MSG_MORE overwrites TCP_NODELAY and enables Nagle. + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 40}], msg_flags=0}, MSG_MORE) = 40 + +.21~+.215 > P. 1:41(40) ack 1 + +.01 < . 1:1(0) ack 41 win 257 + +// Test unsetting MSG_MORE releases the packet + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 100}], msg_flags=0}, MSG_MORE) = 100 ++.005 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 160}], msg_flags=0}, MSG_MORE) = 160 + +.01 sendmsg(4, {msg_name(...)=..., + msg_iov(3)=[{..., 100}, {..., 200}, {..., 195}], + msg_flags=0}, MSG_MORE) = 495 ++.008 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 5}], msg_flags=0}, 0) = 5 + +0 > P. 41:801(760) ack 1 + +.02 < . 1:1(0) ack 801 win 257 + + +// Test >MSS write will unleash MSS packets but hold on the remaining data. + +.1 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 3100}], msg_flags=0}, MSG_MORE) = 3100 + +0 > . 801:3801(3000) ack 1 ++.003 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 50}], msg_flags=0}, MSG_MORE) = 50 + + +.01 < . 1:1(0) ack 2801 win 257 +// Err... we relase the remaining right after the ACK? note that PUSH is reset + +0 > . 3801:3951(150) ack 1 + +// Test we'll hold on the subsequent writes when inflight (3801:3951) > 0 ++.001 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 1}], msg_flags=0}, MSG_MORE) = 1 ++.002 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 2}], msg_flags=0}, MSG_MORE) = 2 ++.003 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 3}], msg_flags=0}, MSG_MORE) = 3 ++.004 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 4}], msg_flags=0}, MSG_MORE) = 4 + +.02 < . 1:1(0) ack 3951 win 257 + +0 > . 3951:3961(10) ack 1 + +.02 < . 1:1(0) ack 3961 win 257 + + +// Test the case a MSG_MORE send followed by a write flushes the data + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(1)=[{..., 20}], msg_flags=0}, MSG_MORE) = 20 + +.05 write(4, ..., 20) = 20 + +0 > P. 3961:4001(40) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt new file mode 100644 index 000000000000..0ddec5f7dc1a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP_CORK and TCP_NODELAY sockopt behavior +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 +// Set TCP_CORK sockopt to hold small packets + +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0 + + +0 write(4, ..., 40) = 40 + +.05 write(4, ..., 40) = 40 + +// Unset TCP_CORK should push pending bytes out + +.01 setsockopt(4, SOL_TCP, TCP_CORK, [0], 4) = 0 + +0 > P. 1:81(80) ack 1 + +.01 < . 1:1(0) ack 81 win 257 + +// Set TCP_CORK sockopt to hold small packets + +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0 + + +0 write(4, ..., 40) = 40 + +.05 write(4, ..., 40) = 40 + +// Set TCP_NODELAY sockopt should push pending bytes out + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +0 > P. 81:161(80) ack 1 + +.01 < . 1:1(0) ack 161 win 257 + +// Set MSG_MORE to hold small packets + +0 send(4, ..., 40, MSG_MORE) = 40 + +.05 send(4, ..., 40, MSG_MORE) = 40 + +// Set TCP_NODELAY sockopt should push pending bytes out + +.01 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + +0 > . 161:241(80) ack 1 + +.01 < . 1:1(0) ack 241 win 257 diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt new file mode 100644 index 000000000000..310ef31518da --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify that setsockopt calls that force a route refresh do not +// cause problems matching SACKs with packets in the write queue. +// This variant tests IP_TOS. + +`./defaults.sh` + +// Establish a connection. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_IP, IP_MTU_DISCOVER, [IP_PMTUDISC_DONT], 1) = 0 + +0...0.010 connect(3, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 65535 <mss 1460,nop,wscale 2,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + + +.01 write(3, ..., 5840) = 5840 + +0 > P. 1:5841(5840) ack 1 + +.01 < . 1:1(0) ack 5841 win 65535 + + +.01 write(3, ..., 5840) = 5840 + +0 > P. 5841:11681(5840) ack 1 + +.01 < . 1:1(0) ack 11681 win 65535 + + +.01 write(3, ..., 14600) = 14600 + +0 > P. 11681:26281(14600) ack 1 + +// Try the socket option that we know can force a route refresh. + +0 setsockopt(3, SOL_IP, IP_TOS, [4], 1) = 0 +// Then revert to avoid routing/mangling/etc implications of that setting. + +0 setsockopt(3, SOL_IP, IP_TOS, [0], 1) = 0 + +// Verify that we do not retransmit the SACKed segments. + +.01 < . 1:1(0) ack 13141 win 65535 <sack 16061:17521 20441:26281,nop,nop> + +0 > . 13141:16061(2920) ack 1 + +0 > P. 17521:20441(2920) ack 1 + +.01 < . 1:1(0) ack 26281 win 65535 diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt new file mode 100644 index 000000000000..f185e1ac57ea --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests non-FACK SACK with SACKs coming in the order +// 2 6 8 3 9, to test what happens when we get a new SACKed range +// (for packet 3) that is on the right of an existing SACKed range +// (for packet 2). + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + + +.1 < . 1:1(0) ack 1 win 257 <sack 2001:3001,nop,nop> ++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001,nop,nop> ++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001 8001:9001,nop,nop> + +// 3 SACKed packets, so we enter Fast Recovery. + +0 > . 1:1001(1000) ack 1 + +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }% + +0 %{ assert tcpi_lost == 6, tcpi_lost }% + +// SACK for 3001:4001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. ++.007 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:9001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +0 %{ assert tcpi_reordering == 6, tcpi_reordering }% // 8001:9001 -> 3001:4001 is 6 + +// SACK for 9001:10001. + +.01 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop> + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +// ACK for 1:1001 as packets from t=0.303 arrive. ++.083 < . 1:1(0) ack 1001 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop> + +0 %{ assert tcpi_lost == 4,tcpi_lost }% + +// ACK for 1:4001 as packets from t=0.310 arrive. ++.017 < . 1:1(0) ack 4001 win 257 <sack 6001:7001 8001:10001,nop,nop> + +0 %{ assert tcpi_lost == 3,tcpi_lost }% + +// ACK for 1:7001 as packets from t=0.320 arrive. + +.01 < . 1:1(0) ack 7001 win 257 <sack 8001:10001,nop,nop> + +// ACK for all data as packets from t=0.403 arrive. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt new file mode 100644 index 000000000000..0093b4973934 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests the case where we mark packets 0-4 lost, then +// get a SACK for 3, and then a SACK for 4. + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// SACK for 7001:8001. Using RACK we delay the fast retransmit. + +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop> +// RACK reordering timer ++.027 > . 1:1001(1000) ack 1 + +0 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_lost == 7, tcpi_lost # RACK thinks 1:7001 are lost +assert tcpi_reordering == 3, tcpi_reordering +}% + +// SACK for 3001:4001. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:4001 7001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 %{ +assert tcpi_lost == 6, tcpi_lost # since 3001:4001 is no longer lost +assert tcpi_reordering == 5, tcpi_reordering # 7001:8001 -> 3001:4001 +}% + +// SACK for 4001:5001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. +// It uses the RFC3517 algorithm to mark 1:3001 lost +// because >=3 higher-sequence packets are SACKed. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:8001,nop,nop> + +0 > . 2001:3001(1000) ack 1 + +0 %{ +assert tcpi_lost == 5,tcpi_lost # SACK/RFC3517 thinks 1:3001 are lost +}% + +// SACK for 8001:9001. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:9001,nop,nop> + +// SACK for 9001:10001. ++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:10001,nop,nop> + +0 > . 5001:6001(1000) ack 1 + +// To simplify clean-up, say we get an ACK for all data. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt new file mode 100644 index 000000000000..980a832dc81c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb. +// This variant tests the case where we mark packets 0-4 lost, then +// get a SACK for 5, and then a SACK for 6. + +`./defaults.sh` + +// Establish a connection and send 10 MSS. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 1024 + +0 accept(3, ..., ...) = 4 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +// SACK for 7001:8001. Using RACK we delay a fast retransmit. + +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop> ++.027 > . 1:1001(1000) ack 1 + +0 %{ +assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state +assert tcpi_lost == 7,tcpi_lost # RACK thinks 1:7001 are lost +assert tcpi_reordering == 3, tcpi_reordering +}% + +// SACK for 5001:6001. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:6001 7001:8001,nop,nop> + +0 > . 1001:2001(1000) ack 1 + +0 %{ +assert tcpi_lost == 6, tcpi_lost +assert tcpi_reordering == 3, tcpi_reordering # 7001:8001 -> 5001:6001 is 3 +}% + +// SACK for 6001:7001. +// This SACK for an adjacent range causes the sender to +// shift the newly-SACKed range onto the previous skb. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:8001,nop,nop> + +0 > . 2001:3001(1000) ack 1 + +0 %{ assert tcpi_lost == 5, tcpi_lost }% + +// SACK for 8001:9001. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:9001,nop,nop> + +0 > . 3001:4001(1000) ack 1 + +// SACK for 9001:10001. + +0 < . 1:1(0) ack 1 win 257 <sack 5001:10001,nop,nop> + +0 > . 4001:5001(1000) ack 1 + +// To simplify clean-up, say we get an ACK for all data. + +.1 < . 1:1(0) ack 10001 win 257 + +0 %{ +assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state +assert tcpi_unacked == 0, tcpi_unacked +assert tcpi_sacked == 0, tcpi_sacked +assert tcpi_lost == 0, tcpi_lost +assert tcpi_retrans == 0, tcpi_retrans +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt new file mode 100644 index 000000000000..6740859a1360 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0 +// Simplest possible test of open() and then sendfile(). +// We write some zeroes into a file (since packetdrill expects payloads +// to be all zeroes) and then open() the file, then use sendfile() +// and verify that the correct number of zeroes goes out. + +`./defaults.sh +/bin/rm -f /tmp/testfile +/bin/dd bs=1 count=5 if=/dev/zero of=/tmp/testfile status=none +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + + +0 open("/tmp/testfile", O_RDONLY) = 5 + +0 sendfile(4, 5, [0], 5) = 5 + +0 > P. 1:6(5) ack 1 diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt new file mode 100644 index 000000000000..795c476d222d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-1pkt.pkt @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow. +// In this variant, the receiver ACKs every packet. + +// Set up config. To keep things simple, disable the +// mechanism that defers sending in order to send bigger TSO packets. +`./defaults.sh +sysctl -q net.ipv4.tcp_tso_win_divisor=100` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 30000) = 30000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 1001 win 257 + +0 > P. 10001:12001(2000) ack 1 + + +0 < . 1:1(0) ack 2001 win 257 + +0 > P. 12001:14001(2000) ack 1 + ++.005 < . 1:1(0) ack 3001 win 257 + +0 > P. 14001:16001(2000) ack 1 + + +0 < . 1:1(0) ack 4001 win 257 + +0 > P. 16001:18001(2000) ack 1 + ++.005 < . 1:1(0) ack 5001 win 257 + +0 > P. 18001:20001(2000) ack 1 + + +0 < . 1:1(0) ack 6001 win 257 + +0 > P. 20001:22001(2000) ack 1 + ++.005 < . 1:1(0) ack 7001 win 257 + +0 > P. 22001:24001(2000) ack 1 + + +0 < . 1:1(0) ack 8001 win 257 + +0 > P. 24001:26001(2000) ack 1 + ++.005 < . 1:1(0) ack 9001 win 257 + +0 > P. 26001:28001(2000) ack 1 + + +0 < . 1:1(0) ack 10001 win 257 + +0 > P. 28001:30001(2000) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt new file mode 100644 index 000000000000..9212ae1fd0f2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-5pkt.pkt @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when an outstanding flight of packets is +// less than the current cwnd, and not big enough to bump up cwnd. +// +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +// Only send 5 packets. + +0 write(4, ..., 5000) = 5000 + +0 > P. 1:5001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 5001 win 257 + +0 %{ assert tcpi_snd_cwnd == 10, 'cwnd=%d' % tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt new file mode 100644 index 000000000000..416c901ddf51 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt-send-6pkt.pkt @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when an outstanding flight of packets is +// less than the current cwnd, but still big enough that in slow +// start we want to increase our cwnd a little. +// +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +// Only send 6 packets. + +0 write(4, ..., 6000) = 6000 + +0 > P. 1:6001(6000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }% + + +0 < . 1:1(0) ack 6001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, 'cwnd=%d' % tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt new file mode 100644 index 000000000000..a894b7d4559c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-2pkt.pkt @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow. +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Set up config. To keep things simple, disable the +// mechanism that defers sending in order to send bigger TSO packets. +`./defaults.sh +sysctl -q net.ipv4.tcp_tso_win_divisor=100` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 30000) = 30000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2001 win 257 + +0 > P. 10001:14001(4000) ack 1 + ++.005 < . 1:1(0) ack 4001 win 257 + +0 > P. 14001:18001(4000) ack 1 + ++.005 < . 1:1(0) ack 6001 win 257 + +0 > P. 18001:22001(4000) ack 1 + ++.005 < . 1:1(0) ack 8001 win 257 + +0 > P. 22001:26001(4000) ack 1 + ++.005 < . 1:1(0) ack 10001 win 257 + +0 > P. 26001:30001(4000) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt new file mode 100644 index 000000000000..065fae9e9abd --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-ack-per-4pkt.pkt @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow. +// In this variant, the receiver sends one ACK per 4 packets. + +// Set up config. To keep things simple, disable the +// mechanism that defers sending in order to send bigger TSO packets. +`./defaults.sh +sysctl -q net.ipv4.tcp_tso_win_divisor=100` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 30000) = 30000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +.11 < . 1:1(0) ack 4001 win 257 + +0 > P. 10001:18001(8000) ack 1 + + +.01 < . 1:1(0) ack 8001 win 257 + +0 > P. 18001:26001(8000) ack 1 + ++.005 < . 1:1(0) ack 10001 win 257 + +0 > P. 26001:30001(4000) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt new file mode 100644 index 000000000000..11b213be1138 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-idle.pkt @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start after idle +// This test expects tso size to be at least initial cwnd * mss + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \ + /proc/sys/net/ipv4/tcp_min_tso_segs=10` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 511 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 26000) = 26000 + +0 > P. 1:5001(5000) ack 1 + +0 > P. 5001:10001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +.1 < . 1:1(0) ack 10001 win 511 + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% + +0 > P. 10001:20001(10000) ack 1 + +0 > P. 20001:26001(6000) ack 1 + + +.1 < . 1:1(0) ack 26001 win 511 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + + +2 write(4, ..., 20000) = 20000 +// If slow start after idle works properly, we should send 5 MSS here (cwnd/2) + +0 > P. 26001:31001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt new file mode 100644 index 000000000000..577ed8c8852c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-after-win-update.pkt @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start after window update +// This test expects tso size to be at least initial cwnd * mss + +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_slow_start_after_idle=1 \ + /proc/sys/net/ipv4/tcp_min_tso_segs=10` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 511 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 26000) = 26000 + +0 > P. 1:5001(5000) ack 1 + +0 > P. 5001:10001(5000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + + +.1 < . 1:1(0) ack 10001 win 511 + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% + +0 > P. 10001:20001(10000) ack 1 + +0 > P. 20001:26001(6000) ack 1 + + +.1 < . 1:1(0) ack 26001 win 0 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + + +0 write(4, ..., 20000) = 20000 +// 1st win0 probe ++.3~+.310 > . 26000:26000(0) ack 1 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + +// 2nd win0 probe ++.6~+.620 > . 26000:26000(0) ack 1 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + +// 3rd win0 probe ++1.2~+1.240 > . 26000:26000(0) ack 1 + +0 %{ assert tcpi_snd_cwnd == 36, tcpi_snd_cwnd }% + + +.9 < . 1:1(0) ack 26001 win 511 + +0 > P. 26001:31001(5000) ack 1 + +// Reset sysctls +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt new file mode 100644 index 000000000000..869f32c35a2a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited-9-packets-out.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when application-limited: in this case, +// with IW10, if we don't fully use our cwnd but instead +// send just 9 packets, then cwnd should grow to twice that +// value, or 18 packets. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 9000) = 9000 + +0 > P. 1:9001(9000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 6001 win 257 + +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 8001 win 257 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 9001 win 257 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt new file mode 100644 index 000000000000..0f77b7955db6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-app-limited.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when application-limited: in this case, +// with IW10, if we send exactly 10 packets then cwnd should grow to 20. + +// Set up config. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 10000) = 10000 + +0 > P. 1:10001(10000) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2001 win 257 + +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 4001 win 257 + +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 6001 win 257 + +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 8001 win 257 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% + ++.005 < . 1:1(0) ack 10001 win 257 + +0 %{ assert tcpi_snd_cwnd == 20, tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt new file mode 100644 index 000000000000..7e9c83d617c2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_slow_start_slow-start-fq-ack-per-2pkt.pkt @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test of slow start when not application-limited, so that +// the cwnd continues to grow, even if TSQ triggers. +// In this variant, the receiver ACKs every other packet, +// approximating standard delayed ACKs. + +// Note we use FQ/pacing to check if TCP Small Queues is not hurting + +`./defaults.sh +tc qdisc replace dev tun0 root fq +sysctl -q net/ipv4/tcp_pacing_ss_ratio=200 +sysctl -e -q net.ipv4.tcp_min_tso_segs=2` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +.1 < S 0:0(0) win 32792 <mss 1460,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.1 < . 1:1(0) ack 1 win 500 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [200000], 4) = 0 + + +0 write(4, ..., 40000) = 40000 +// This might change if we cook the initial packet with 10 MSS. + +0 > P. 1:2921(2920) ack 1 + +0 > P. 2921:5841(2920) ack 1 + +0 > P. 5841:8761(2920) ack 1 + +0 > P. 8761:11681(2920) ack 1 + +0 > P. 11681:14601(2920) ack 1 + +0 %{ assert tcpi_snd_cwnd == 10, tcpi_snd_cwnd }% + ++.105 < . 1:1(0) ack 2921 win 500 + +0 %{ assert tcpi_snd_cwnd == 12, tcpi_snd_cwnd }% + +// Note: after this commit : "net_sched: sch_fq: account for schedule/timers drifts" +// FQ notices that this packet missed the 'time to send next packet' computed +// when prior packet (11681:14601(2920)) was sent. +// So FQ will allow following packet to be sent a bit earlier (quantum/2) +// (FQ commit allows an application/cwnd limited flow to get at most quantum/2 extra credit) + +0 > P. 14601:17521(2920) ack 1 + ++.003 < . 1:1(0) ack 5841 win 500 + +0 %{ assert tcpi_snd_cwnd == 14, tcpi_snd_cwnd }% + ++.001 > P. 17521:20441(2920) ack 1 + ++.001 < . 1:1(0) ack 8761 win 500 + +0 %{ assert tcpi_snd_cwnd == 16, tcpi_snd_cwnd }% + +// remaining packets are delivered at a constant rate. ++.007 > P. 20441:23361(2920) ack 1 + ++.002 < . 1:1(0) ack 11681 win 500 + +0 %{ assert tcpi_snd_cwnd == 18, tcpi_snd_cwnd }% ++.001 < . 1:1(0) ack 14601 win 500 + ++.004 > P. 23361:26281(2920) ack 1 + ++.007 > P. 26281:29201(2920) ack 1 + + +0 %{ assert tcpi_snd_cwnd == 20, 'cwnd=%d' % tcpi_snd_cwnd }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt new file mode 100644 index 000000000000..0cbd43253236 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh` + +// Initialize a server socket + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_IP, IP_FREEBIND, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Connection should get accepted + +0 < S 0:0(0) win 32972 <mss 1460,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <...> + +0 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + + +0 pipe([5, 6]) = 0 + +0 < U. 1:101(100) ack 1 win 257 urg 100 + +0 splice(4, NULL, 6, NULL, 99, 0) = 99 + +0 splice(4, NULL, 6, NULL, 1, 0) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt new file mode 100644 index 000000000000..8940726a3ec2 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test TCP fastopen behavior with NULL as buffer pointer, but a non-zero +// buffer length. +`./defaults.sh +./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0` + +// Cache warmup: send a Fast Open cookie request + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation is now in progress) ++0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO,nop,nop> ++0 < S. 123:123(0) ack 1 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6,FO abcd1234,nop,nop> ++0 > . 1:1(0) ack 1 ++0 close(3) = 0 ++0 > F. 1:1(0) ack 1 ++0 < F. 1:1(0) ack 2 win 92 ++0 > . 2:2(0) ack 2 + +// Test with MSG_FASTOPEN without TCP_FASTOPEN_CONNECT. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 ++0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 sendto(4, NULL, 1, MSG_FASTOPEN, ..., ...) = -1 ++0 close(4) = 0 + +// Test with TCP_FASTOPEN_CONNECT without MSG_FASTOPEN. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5 ++0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(5, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(5, ..., ...) = 0 ++0 sendto(5, NULL, 1, 0, ..., ...) = -1 ++0 close(5) = 0 + +// Test with both TCP_FASTOPEN_CONNECT and MSG_FASTOPEN. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 6 ++0 fcntl(6, F_SETFL, O_RDWR|O_NONBLOCK) = 0 ++0 setsockopt(6, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0 ++0 connect(6, ..., ...) = 0 ++0 sendto(6, NULL, 1, MSG_FASTOPEN, ..., ...) = -1 ++0 close(6) = 0 + +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt new file mode 100644 index 000000000000..b2b2cdf27e20 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we correctly skip zero-length IOVs. +`./defaults.sh` + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 257 + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 40}, {..., 0}, {..., 20}], + msg_flags=0}, 0) = 60 + +0 > P. 1:61(60) ack 1 + +.01 < . 1:1(0) ack 61 win 257 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 0}, {..., 0}, {..., 0}], + msg_flags=0}, MSG_ZEROCOPY) = 0 + + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(4)=[{..., 0}, {..., 10}, {..., 0}, {..., 50}], + msg_flags=0}, MSG_ZEROCOPY) = 60 + +0 > P. 61:121(60) ack 1 + +.01 < . 1:1(0) ack 121 win 257 diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt new file mode 100644 index 000000000000..59f5903f285c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test kernel behavior with NULL as buffer pointer + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.2 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + + +0 write(4, NULL, 1000) = -1 EFAULT (Bad address) + +0 send(4, NULL, 1000, 0) = -1 EFAULT (Bad address) + +0 sendto(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address) + + +0 < . 1:1001(1000) ack 1 win 200 + +0 read(4, NULL, 1000) = -1 EFAULT (Bad address) + +0 recv(4, NULL, 1000, 0) = -1 EFAULT (Bad address) + +0 recvfrom(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address) diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt new file mode 100644 index 000000000000..d7fdb43a8e89 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tcpi_last_data_recv for active session +`./defaults.sh` + +// Create a socket and set it to non-blocking. ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + ++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) ++0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> ++.030 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8> ++0 > . 1:1(0) ack 1 + ++1 %{ assert 990 <= tcpi_last_data_recv <= 1010, tcpi_last_data_recv }% + ++0 < . 1:1001(1000) ack 1 win 300 ++0 > . 1:1(0) ack 1001 + ++0 %{ assert tcpi_last_data_recv <= 10, tcpi_last_data_recv }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt new file mode 100644 index 000000000000..a9bcd46f6cb6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test rwnd limited time in tcp_info for client side. + +`./defaults.sh` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +// Server advertises 0 receive window. + +.01 < S. 0:0(0) ack 1 win 0 <mss 1000,nop,nop,sackOK> + + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + +// Make sure that initial rwnd limited time is 0. + +0 %{ assert tcpi_rwnd_limited == 0, tcpi_rwnd_limited }% + +// Receive window limited time starts here. + +0 write(3, ..., 1000) = 1000 + +// Check that rwnd limited time in tcp_info is around 0.1s. + +.1 %{ assert 98000 <= tcpi_rwnd_limited <= 110000, tcpi_rwnd_limited }% + +// Server opens the receive window. + +.1 < . 1:1(0) ack 1 win 2000 + +// Check that rwnd limited time in tcp_info is around 0.2s. + +0 %{ assert 198000 <= tcpi_rwnd_limited <= 210000, tcpi_rwnd_limited }% + + +0 > P. 1:1001(1000) ack 1 + +// Server advertises a very small receive window. + +.03 < . 1:1(0) ack 1001 win 10 + +// Receive window limited time starts again. + +0 write(3, ..., 1000) = 1000 + +// Server opens the receive window again. + +.1 < . 1:1(0) ack 1001 win 2000 +// Check that rwnd limited time in tcp_info is around 0.3s +// and busy time is 0.3 + 0.03 (server opened small window temporarily). + +0 %{ assert 298000 <= tcpi_rwnd_limited <= 310000, tcpi_rwnd_limited;\ + assert 328000 <= tcpi_busy_time <= 340000, tcpi_busy_time;\ +}% + + +0 > P. 1001:2001(1000) ack 1 + +.02 < . 1:1(0) ack 2001 win 2000 + +0 %{ assert 348000 <= tcpi_busy_time <= 360000, tcpi_busy_time }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt new file mode 100644 index 000000000000..f0de2acd0f8e --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test send-buffer-limited time in tcp_info for client side. +`./defaults.sh` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8> + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [10000], 4) = 0 + +0 getsockopt(3, SOL_SOCKET, SO_SNDBUF, [20000], [4]) = 0 + + +.09...0.14 write(3, ..., 150000) = 150000 + + +.01 < . 1:1(0) ack 10001 win 10000 + + +.01 < . 1:1(0) ack 30001 win 10000 + +// cwnd goes from 40(60KB) to 80(120KB), and that we hit the tiny sndbuf limit 10KB + +.01 < . 1:1(0) ack 70001 win 10000 + + +.02 < . 1:1(0) ack 95001 win 10000 + +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited; \ + assert 49000 <= tcpi_busy_time <= 52000, tcpi_busy_time; \ + assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }% + +// This ack frees up enough buffer so we are no longer +// buffer limited (socket flag SOCK_NOSPACE is cleared) + +.02 < . 1:1(0) ack 150001 win 10000 + +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited;\ + assert 69000 <= tcpi_busy_time <= 73000, tcpi_busy_time;\ + assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }% diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt new file mode 100644 index 000000000000..2087ec0c746a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that tx timestamping sends timestamps only for +// the last byte of each sendmsg. +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Establish connection and verify that there was no error. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 20000 <mss 1000,nop,nop,sackOK> + +0 > . 1:1(0) ack 1 + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking + + +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + + +0 write(3, ..., 11000) = 11000 + +0 > P. 1:10001(10000) ack 1 + +.01 < . 1:1(0) ack 10001 win 4000 + +0 > P. 10001:11001(1000) ack 1 + +.01 < . 1:1(0) ack 11001 win 4000 + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the last byte should be received almost immediately +// once 10001 is acked at t=20ms. +// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...) +// is called after when SYN is acked. So, we expect the last byte of the first +// chunk to have a timestamp key of 10999 (i.e., 11000 - 1). + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the last byte should be received almost immediately +// once 10001 is acked at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the last byte should be received at t=30ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=10999}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt new file mode 100644 index 000000000000..876024a31110 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt @@ -0,0 +1,91 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tx timestamping for partial writes (IPv4). +`./defaults.sh +` + +// Create a socket and set it to non-blocking. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR) + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +// Establish connection and verify that there was no error. + +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8> + +.01 < S. 0:0(0) ack 1 win 2000 <mss 1000,sackOK,TS val 700 ecr 100,nop,wscale 7> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700> + +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0 + + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [1000], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + +// We have a partial write. + +0 write(3, ..., 10000) = 2964 + +0 > . 1:989(988) ack 1 <nop,nop,TS val 110 ecr 700> + +0 > P. 989:1977(988) ack 1 <nop,nop,TS val 110 ecr 700> + +.01 < . 1:1(0) ack 1977 win 92 <nop,nop,TS val 800 ecr 200> + +0 > P. 1977:2965(988) ack 1 <nop,nop,TS val 114 ecr 800> + +.01 < . 1:1(0) ack 2965 win 92 <nop,nop,TS val 800 ecr 200> + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately +// after the first ack at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the first chunk should be received almost immediately +// after the first ack at t=20ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the first chunk should be received after the last ack at +// t=30ms. + +0 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=2963}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt new file mode 100644 index 000000000000..84d94780e6be --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test tx timestamping for server-side (IPv4). +`./defaults.sh +` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 514 + + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_TIMESTAMPING, + [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_ID], 4) = 0 + +// Write two 2KB chunks. +// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...) +// is called after when SYN is acked. So, we expect the last byte of the first +// and the second chunks to have timestamp keys of 1999 (i.e., 2000 - 1) and +// 3999 (i.e., 4000 - 1) respectively. + +0 write(4, ..., 2000) = 2000 + +0 write(4, ..., 2000) = 2000 + +0 > P. 1:2001(2000) ack 1 + +0 > P. 2001:4001(2000) ack 1 + +.01 < . 1:1(0) ack 2001 win 514 + +.01 < . 1:1(0) ack 4001 win 514 + +// Make sure that internal TCP timestamps are not overwritten and we have sane +// RTT measurement. + +0 %{ +assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt +}% + +// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately +// after write at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the first chunk should be received almost immediately +// after write at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SCHED for the second chunk should be received almost immediately +// after that at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SCHED, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_SND for the second chunk should be received almost immediately +// after that at t=10ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=10000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_SND, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the first chunk should be received at t=20ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=20000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=1999}} + ]}, MSG_ERRQUEUE) = 0 +// SCM_TSTAMP_ACK for the second chunk should be received at t=30ms. + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE|MSG_TRUNC, + msg_control=[ + {cmsg_level=SOL_SOCKET, + cmsg_type=SCM_TIMESTAMPING, + cmsg_data={scm_sec=0,scm_nsec=30000000}}, + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=ENOMSG, + ee_origin=SO_EE_ORIGIN_TIMESTAMPING, + ee_type=0, + ee_code=0, + ee_info=SCM_TSTAMP_ACK, + ee_data=3999}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt new file mode 100644 index 000000000000..e61424a7bd0a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we send FIN packet with correct TSval +--tcp_ts_tick_usecs=1000 +--tolerance_usecs=7000 + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0> + +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100> + +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100> + +0 accept(3, ..., ...) = 4 + + +1 close(4) = 0 +// Check that FIN TSval is updated properly, one second has passed since last sent packet. + +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1200 ecr 200> diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt new file mode 100644 index 000000000000..174ce9a1bfc0 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we reject TS val updates on a packet with invalid ACK sequence + +`./defaults.sh +` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +.1 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0> + +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100> + +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100> + +0 accept(3, ..., ...) = 4 + +// bad packet with high tsval (its ACK sequence is above our sndnxt) + +0 < F. 1:1(0) ack 9999 win 20000 <nop,nop,TS val 200000 ecr 100> + + + +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100> + +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201> diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt new file mode 100644 index 000000000000..2e3b3bb7493a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +// Test that we send RST packet with correct TSval +--tcp_ts_tick_usecs=1000 + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0> + +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100> + +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100> + +0 accept(3, ..., ...) = 4 + + +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100> + +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201> + + +1 close(4) = 0 +// Check that RST TSval is updated properly, one second has passed since last sent packet. + +0 > R. 1:1(0) ack 1001 <nop,nop,TS val 1200 ecr 201> diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt new file mode 100644 index 000000000000..183051ba0cae --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + + +0 < S 0:0(0) win 0 <mss 1460> + +0 > S. 0:0(0) ack 1 <mss 1460> + + +.1 < . 1:1(0) ack 1 win 65530 + +0 accept(3, ..., ...) = 4 + + +0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +0 write(4, ..., 24) = 24 + +0 > P. 1:25(24) ack 1 + +.1 < . 1:1(0) ack 25 win 65530 + +0 %{ assert tcpi_probes == 0, tcpi_probes; \ + assert tcpi_backoff == 0, tcpi_backoff }% + +// install a qdisc dropping all packets + +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0` + +0 write(4, ..., 24) = 24 + // When qdisc is congested we retry every 500ms + // (TCP_RESOURCE_PROBE_INTERVAL) and therefore + // we retry 6 times before hitting 3s timeout. + // First verify that the connection is alive: ++3.250 write(4, ..., 24) = 24 + // Now verify that shortly after that the socket is dead: + +.100 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out) + + +0 %{ assert tcpi_probes == 6, tcpi_probes; \ + assert tcpi_backoff == 0, tcpi_backoff }% + +0 close(4) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt new file mode 100644 index 000000000000..2efe02bfba9c --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +`./defaults.sh` + +// Initialize connection + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +.1 < . 1:1(0) ack 1 win 32792 + + + +0 accept(3, ..., ...) = 4 + +// Okay, we received nothing, and decide to close this idle socket. +// We set TCP_USER_TIMEOUT to 3 seconds because really it is not worth +// trying hard to cleanly close this flow, at the price of keeping +// a TCP structure in kernel for about 1 minute ! + +2 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0 + +0 close(4) = 0 + + +0 > F. 1:1(0) ack 1 + +.3~+.400 > F. 1:1(0) ack 1 + +.3~+.400 > F. 1:1(0) ack 1 + +.6~+.800 > F. 1:1(0) ack 1 + +// We finally receive something from the peer, but it is way too late +// Our socket vanished because TCP_USER_TIMEOUT was really small + +0 < . 1:2(1) ack 1 win 32792 + +0 > R 1:1(0) diff --git a/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt new file mode 100644 index 000000000000..8bd60226ccfc --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// Verify that established connections drop a segment without the ACK flag set. + +`./defaults.sh` + +// Create a socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +// Establish a connection. + +0 < S 0:0(0) win 20000 <mss 1000,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK> + +.01 < . 1:1(0) ack 1 win 20000 + +0 accept(3, ..., ...) = 4 + +// Receive a segment with no flags set, verify that it's not enqueued. + +.01 < - 1:1001(1000) win 20000 + +0 ioctl(4, SIOCINQ, [0]) = 0 + +// Receive a segment with ACK flag set, verify that it is enqueued. + +.01 < . 1:1001(1000) ack 1 win 20000 + +0 ioctl(4, SIOCINQ, [1000]) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt new file mode 100644 index 000000000000..a82c8899d36b --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_basic.pkt @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +// basic zerocopy test: +// +// send a packet with MSG_ZEROCOPY and receive the notification ID +// repeat and verify IDs are consecutive + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=1, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt new file mode 100644 index 000000000000..c01915e7f4a1 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_batch.pkt @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +// batch zerocopy test: +// +// send multiple packets, then read one range of all notifications. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_MARK, [666], 4) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt new file mode 100644 index 000000000000..6509882932e9 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_client.pkt @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +// Minimal client-side zerocopy test + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0...0 connect(4, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8> + +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > . 1:1(0) ack 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt new file mode 100644 index 000000000000..2cd78755cb2a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_closed.pkt @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +// send with MSG_ZEROCOPY on a non-established socket +// +// verify that a send in state TCP_CLOSE correctly aborts the zerocopy +// operation, specifically it does not increment the zerocopy counter. +// +// First send on a closed socket and wait for (absent) notification. +// Then connect and send and verify that notification nr. is zero. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4 + +0 setsockopt(4, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = -1 EPIPE (Broken pipe) + + +0.1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable) + + +0...0 connect(4, ..., ...) = 0 + + +0 > S 0:0(0) <mss 1460,sackOK,TS val 0 ecr 0,nop,wscale 8> + +0 < S. 0:0(0) ack 1 win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > . 1:1(0) ack 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt new file mode 100644 index 000000000000..7671c20e01cf --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_edge.pkt @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +// epoll zerocopy test: +// +// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but +// it is not level-triggered either. +// +// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR +// is correctly fired only once, when EPOLLET is set. send another packet with +// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 epoll_create(1) = 5 + +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, {events=EPOLLOUT|EPOLLET, fd=4}) = 0 + +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + +// receive only one EPOLLERR for the two sends above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 8001:12001(4000) ack 1 + +0 < . 1:1(0) ack 12001 win 257 + +// receive only one EPOLLERR for the third send above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=2}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt new file mode 100644 index 000000000000..fadc480fdb7f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_exclusive.pkt @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +// epoll zerocopy test: +// +// EPOLLERR is known to be not edge-triggered unlike EPOLLIN and EPOLLOUT but +// it is not level-triggered either. this tests verify that the same behavior is +// maintained when we have EPOLLEXCLUSIVE. +// +// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR +// is correctly fired only once, when EPOLLET is set. send another packet with +// MSG_ZEROCOPY. confirm that EPOLLERR is correctly fired again only once. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 epoll_create(1) = 5 + +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, + {events=EPOLLOUT|EPOLLET|EPOLLEXCLUSIVE, fd=4}) = 0 + +0 epoll_wait(5, {events=EPOLLOUT, fd=4}, 1, 0) = 1 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + +// receive only one EPOLLERR for the two sends above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 8001:12001(4000) ack 1 + +0 < . 1:1(0) ack 12001 win 257 + +// receive only one EPOLLERR for the third send above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=2}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt new file mode 100644 index 000000000000..5bfa0d1d2f4a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_epoll_oneshot.pkt @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0 +// epoll zerocopy test: +// +// This is a test to confirm that EPOLLERR is only fired once for an FD when +// EPOLLONESHOT is set. +// +// fire two sends with MSG_ZEROCOPY and receive the acks. confirm that EPOLLERR +// is correctly fired only once, when EPOLLONESHOT is set. send another packet +// with MSG_ZEROCOPY. confirm that EPOLLERR is not fired. Rearm the FD and +// confirm that EPOLLERR is correctly set. +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 epoll_create(1) = 5 + +0 epoll_ctl(5, EPOLL_CTL_ADD, 4, + {events=EPOLLOUT|EPOLLET|EPOLLONESHOT, fd=4}) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 1:4001(4000) ack 1 + +0 < . 1:1(0) ack 4001 win 257 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 4001:8001(4000) ack 1 + +0 < . 1:1(0) ack 8001 win 257 + +// receive only one EPOLLERR for the two sends above. + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 send(4, ..., 4000, MSG_ZEROCOPY) = 4000 + +0 > P. 8001:12001(4000) ack 1 + +0 < . 1:1(0) ack 12001 win 257 + +// receive no EPOLLERR for the third send above. + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + +// rearm the FD and verify the EPOLLERR is fired again. + +0 epoll_ctl(5, EPOLL_CTL_MOD, 4, {events=EPOLLOUT|EPOLLONESHOT, fd=4}) = 0 + +0 epoll_wait(5, {events=EPOLLERR|EPOLLOUT, fd=4}, 1, 0) = 1 + +0 epoll_wait(5, {events=0, ptr=0}, 1, 0) = 0 + + +0 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=2}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt new file mode 100644 index 000000000000..4a73bbf46961 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-client.pkt @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +// Fastopen client zerocopy test: +// +// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the +// kernel returns the notification ID. +// +// Fastopen requires a stored cookie. Create two sockets. The first +// one will have no data in the initial send. On return 0 the +// zerocopy notification counter is not incremented. Verify this too. + +`./defaults.sh` + +// Send a FastOpen request, no cookie yet so no data in SYN + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 sendto(3, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0 > S 0:0(0) <mss 1460,sackOK,TS val 1000 ecr 0,nop,wscale 8,FO,nop,nop> + +.01 < S. 123:123(0) ack 1 win 14600 <mss 940,TS val 2000 ecr 1000,sackOK,nop,wscale 6, FO abcd1234,nop,nop> + +0 > . 1:1(0) ack 1 <nop,nop,TS val 1001 ecr 2000> + +// Read from error queue: no zerocopy notification + +1 recvmsg(3, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[]}, MSG_ERRQUEUE) = -1 EAGAIN (Resource temporarily unavailable) + + +.01 close(3) = 0 + +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1002 ecr 2000> + +.01 < F. 1:1(0) ack 2 win 92 <nop,nop,TS val 2001 ecr 1002> + +0 > . 2:2(0) ack 2 <nop,nop,TS val 1003 ecr 2001> + +// Send another Fastopen request, now SYN will have data + +.07 `sysctl -q net.ipv4.tcp_timestamps=0` + +.1 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5 + +0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0 setsockopt(5, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 sendto(5, ..., 500, MSG_FASTOPEN|MSG_ZEROCOPY, ..., ...) = 500 + +0 > S 0:500(500) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO abcd1234,nop,nop> + +.05 < S. 5678:5678(0) ack 501 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6> + +0 > . 501:501(0) ack 1 + +// Read from error queue: now has first zerocopy notification + +0.5 recvmsg(5, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt new file mode 100644 index 000000000000..36086c5877ce --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_fastopen-server.pkt @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 +// Fastopen server zerocopy test: +// +// send data with MSG_FASTOPEN | MSG_ZEROCOPY and verify that the +// kernel returns the notification ID. + +`./defaults.sh + ./set_sysctls.py /proc/sys/net/ipv4/tcp_fastopen=0x207` + +// Set up a TFO server listening socket. + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +.1 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + +0 setsockopt(3, SOL_TCP, TCP_FASTOPEN, [2], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +// Client sends a SYN with data. + +.1 < S 0:1000(1000) win 32792 <mss 1460,sackOK,nop,nop> + +0 > S. 0:0(0) ack 1001 <mss 1460,nop,nop,sackOK> + +// Server accepts and replies with data. ++.005 accept(3, ..., ...) = 4 + +0 read(4, ..., 1024) = 1000 + +0 sendto(4, ..., 1000, MSG_ZEROCOPY, ..., ...) = 1000 + +0 > P. 1:1001(1000) ack 1001 + +.05 < . 1001:1001(0) ack 1001 win 32792 + +// Read from error queue: now has first zerocopy notification + +0.1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + +`/tmp/sysctl_restore_${PPID}.sh` diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt new file mode 100644 index 000000000000..672f817faca0 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_maxfrags.pkt @@ -0,0 +1,118 @@ +// SPDX-License-Identifier: GPL-2.0 +// tcp_MAX_SKB_FRAGS test +// +// Verify that sending an iovec of tcp_MAX_SKB_FRAGS + 1 elements will +// 1) fit in a single packet without zerocopy +// 2) spill over into a second packet with zerocopy, +// because each iovec element becomes a frag +// 3) the PSH bit is set on an skb when it runs out of fragments + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + + // Each pinned zerocopy page is fully accounted to skb->truesize. + // This test generates a worst case packet with each frag storing + // one byte, but increasing truesize with a page (64KB on PPC). + +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [2000000], 4) = 0 + + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + // send an iov of 18 elements: just becomes a linear skb + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}], + msg_flags=0}, 0) = 18 + + +0 > P. 1:19(18) ack 1 + +0 < . 1:1(0) ack 19 win 257 + + // send a zerocopy iov of 18 elements: + +1 sendmsg(4, {msg_name(...)=..., + msg_iov(18)=[{..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}], + msg_flags=0}, MSG_ZEROCOPY) = 18 + + // verify that it is split in one skb of 17 frags + 1 of 1 frag + // verify that both have the PSH bit set + +0 > P. 19:36(17) ack 1 + +0 < . 1:1(0) ack 36 win 257 + + +0 > P. 36:37(1) ack 1 + +0 < . 1:1(0) ack 37 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + + // send a zerocopy iov of 64 elements: + +0 sendmsg(4, {msg_name(...)=..., + msg_iov(64)=[{..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}, + {..., 1}, {..., 1}, {..., 1}, {..., 1}], + msg_flags=0}, MSG_ZEROCOPY) = 64 + + // verify that it is split in skbs with 17 frags + +0 > P. 37:54(17) ack 1 + +0 < . 1:1(0) ack 54 win 257 + + +0 > P. 54:71(17) ack 1 + +0 < . 1:1(0) ack 71 win 257 + + +0 > P. 71:88(17) ack 1 + +0 < . 1:1(0) ack 88 win 257 + + +0 > P. 88:101(13) ack 1 + +0 < . 1:1(0) ack 101 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=1, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 diff --git a/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt new file mode 100644 index 000000000000..a9a1ac0aea4f --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_zerocopy_small.pkt @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0 +// small packet zerocopy test: +// +// verify that SO_EE_CODE_ZEROCOPY_COPIED is set on zerocopy +// packets of all sizes, including the smallest payload, 1B. + +`./defaults.sh` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8> + +0 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + // send 1B + +0 send(4, ..., 1, MSG_ZEROCOPY) = 1 + +0 > P. 1:2(1) ack 1 + +0 < . 1:1(0) ack 2 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=0, + ee_data=0}} + ]}, MSG_ERRQUEUE) = 0 + + // send 1B again + +0 send(4, ..., 1, MSG_ZEROCOPY) = 1 + +0 > P. 2:3(1) ack 1 + +0 < . 1:1(0) ack 3 win 257 + + +1 recvmsg(4, {msg_name(...)=..., + msg_iov(1)=[{...,0}], + msg_flags=MSG_ERRQUEUE, + msg_control=[ + {cmsg_level=CMSG_LEVEL_IP, + cmsg_type=CMSG_TYPE_RECVERR, + cmsg_data={ee_errno=0, + ee_origin=SO_EE_ORIGIN_ZEROCOPY, + ee_type=0, + ee_code=SO_EE_CODE_ZEROCOPY_COPIED, + ee_info=1, + ee_data=1}} + ]}, MSG_ERRQUEUE) = 0 |