diff options
| -rw-r--r-- | Documentation/networking/net_cachelines/snmp.rst | 1 | ||||
| -rw-r--r-- | include/net/dropreason-core.h | 9 | ||||
| -rw-r--r-- | include/net/sock.h | 2 | ||||
| -rw-r--r-- | include/uapi/linux/snmp.h | 1 | ||||
| -rw-r--r-- | net/ipv4/proc.c | 1 | ||||
| -rw-r--r-- | net/ipv4/tcp_input.c | 48 | ||||
| -rw-r--r-- | tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt | 27 | ||||
| -rw-r--r-- | tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt | 44 | ||||
| -rw-r--r-- | tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt | 33 |
9 files changed, 152 insertions, 14 deletions
diff --git a/Documentation/networking/net_cachelines/snmp.rst b/Documentation/networking/net_cachelines/snmp.rst index bd44b3eebbef..bce4eb35ec48 100644 --- a/Documentation/networking/net_cachelines/snmp.rst +++ b/Documentation/networking/net_cachelines/snmp.rst @@ -36,6 +36,7 @@ unsigned_long LINUX_MIB_TIMEWAITRECYCLED unsigned_long LINUX_MIB_TIMEWAITKILLED unsigned_long LINUX_MIB_PAWSACTIVEREJECTED unsigned_long LINUX_MIB_PAWSESTABREJECTED +unsigned_long LINUX_MIB_BEYOND_WINDOW unsigned_long LINUX_MIB_TSECR_REJECTED unsigned_long LINUX_MIB_PAWS_OLD_ACK unsigned_long LINUX_MIB_PAWS_TW_REJECTED diff --git a/include/net/dropreason-core.h b/include/net/dropreason-core.h index b9e78290269e..229bb1826f2a 100644 --- a/include/net/dropreason-core.h +++ b/include/net/dropreason-core.h @@ -45,6 +45,7 @@ FN(TCP_LISTEN_OVERFLOW) \ FN(TCP_OLD_SEQUENCE) \ FN(TCP_INVALID_SEQUENCE) \ + FN(TCP_INVALID_END_SEQUENCE) \ FN(TCP_INVALID_ACK_SEQUENCE) \ FN(TCP_RESET) \ FN(TCP_INVALID_SYN) \ @@ -303,9 +304,15 @@ enum skb_drop_reason { SKB_DROP_REASON_TCP_LISTEN_OVERFLOW, /** @SKB_DROP_REASON_TCP_OLD_SEQUENCE: Old SEQ field (duplicate packet) */ SKB_DROP_REASON_TCP_OLD_SEQUENCE, - /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field */ + /** @SKB_DROP_REASON_TCP_INVALID_SEQUENCE: Not acceptable SEQ field. */ SKB_DROP_REASON_TCP_INVALID_SEQUENCE, /** + * @SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE: + * Not acceptable END_SEQ field. + * Corresponds to LINUX_MIB_BEYOND_WINDOW. + */ + SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE, + /** * @SKB_DROP_REASON_TCP_INVALID_ACK_SEQUENCE: Not acceptable ACK SEQ * field because ack sequence is not in the window between snd_una * and snd_nxt diff --git a/include/net/sock.h b/include/net/sock.h index 0f2443d4ec58..c8a4b283df6f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1553,7 +1553,7 @@ __sk_rmem_schedule(struct sock *sk, int size, bool pfmemalloc) } static inline bool -sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, int size) +sk_rmem_schedule(struct sock *sk, const struct sk_buff *skb, int size) { return __sk_rmem_schedule(sk, size, skb_pfmemalloc(skb)); } diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h index 1d234d7e1892..49f5640092a0 100644 --- a/include/uapi/linux/snmp.h +++ b/include/uapi/linux/snmp.h @@ -186,6 +186,7 @@ enum LINUX_MIB_TIMEWAITKILLED, /* TimeWaitKilled */ LINUX_MIB_PAWSACTIVEREJECTED, /* PAWSActiveRejected */ LINUX_MIB_PAWSESTABREJECTED, /* PAWSEstabRejected */ + LINUX_MIB_BEYOND_WINDOW, /* BeyondWindow */ LINUX_MIB_TSECRREJECTED, /* TSEcrRejected */ LINUX_MIB_PAWS_OLD_ACK, /* PAWSOldAck */ LINUX_MIB_PAWS_TW_REJECTED, /* PAWSTimewait */ diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index ea2f01584379..65b0d0ab0084 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -189,6 +189,7 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TWKilled", LINUX_MIB_TIMEWAITKILLED), SNMP_MIB_ITEM("PAWSActive", LINUX_MIB_PAWSACTIVEREJECTED), SNMP_MIB_ITEM("PAWSEstab", LINUX_MIB_PAWSESTABREJECTED), + SNMP_MIB_ITEM("BeyondWindow", LINUX_MIB_BEYOND_WINDOW), SNMP_MIB_ITEM("TSEcrRejected", LINUX_MIB_TSECRREJECTED), SNMP_MIB_ITEM("PAWSOldAck", LINUX_MIB_PAWS_OLD_ACK), SNMP_MIB_ITEM("PAWSTimewait", LINUX_MIB_PAWS_TW_REJECTED), diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 9b03c44c12b8..9c5baace4b7b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -4391,14 +4391,22 @@ static enum skb_drop_reason tcp_disordered_ack_check(const struct sock *sk, * (borrowed from freebsd) */ -static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp, +static enum skb_drop_reason tcp_sequence(const struct sock *sk, u32 seq, u32 end_seq) { + const struct tcp_sock *tp = tcp_sk(sk); + if (before(end_seq, tp->rcv_wup)) return SKB_DROP_REASON_TCP_OLD_SEQUENCE; - if (after(seq, tp->rcv_nxt + tcp_receive_window(tp))) - return SKB_DROP_REASON_TCP_INVALID_SEQUENCE; + if (after(end_seq, tp->rcv_nxt + tcp_receive_window(tp))) { + if (after(seq, tp->rcv_nxt + tcp_receive_window(tp))) + return SKB_DROP_REASON_TCP_INVALID_SEQUENCE; + + /* Only accept this packet if receive queue is empty. */ + if (skb_queue_len(&sk->sk_receive_queue)) + return SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE; + } return SKB_NOT_DROPPED_YET; } @@ -4880,10 +4888,20 @@ static void tcp_ofo_queue(struct sock *sk) static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb); static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb); -static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, +/* Check if this incoming skb can be added to socket receive queues + * while satisfying sk->sk_rcvbuf limit. + */ +static bool tcp_can_ingest(const struct sock *sk, const struct sk_buff *skb) +{ + unsigned int new_mem = atomic_read(&sk->sk_rmem_alloc) + skb->truesize; + + return new_mem <= sk->sk_rcvbuf; +} + +static int tcp_try_rmem_schedule(struct sock *sk, const struct sk_buff *skb, unsigned int size) { - if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || + if (!tcp_can_ingest(sk, skb) || !sk_rmem_schedule(sk, skb, size)) { if (tcp_prune_queue(sk, skb) < 0) @@ -4915,6 +4933,7 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) return; } + tcp_measure_rcv_mss(sk, skb); /* Disable header prediction. */ tp->pred_flags = 0; inet_csk_schedule_ack(sk); @@ -5498,7 +5517,7 @@ static bool tcp_prune_ofo_queue(struct sock *sk, const struct sk_buff *in_skb) tcp_drop_reason(sk, skb, SKB_DROP_REASON_TCP_OFO_QUEUE_PRUNE); tp->ooo_last_skb = rb_to_skb(prev); if (!prev || goal <= 0) { - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && + if (tcp_can_ingest(sk, skb) && !tcp_under_memory_pressure(sk)) break; goal = sk->sk_rcvbuf >> 3; @@ -5532,12 +5551,12 @@ static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_PRUNECALLED); - if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf) + if (!tcp_can_ingest(sk, in_skb)) tcp_clamp_window(sk); else if (tcp_under_memory_pressure(sk)) tcp_adjust_rcv_ssthresh(sk); - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) + if (tcp_can_ingest(sk, in_skb)) return 0; tcp_collapse_ofo_queue(sk); @@ -5547,7 +5566,7 @@ static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb) NULL, tp->copied_seq, tp->rcv_nxt); - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) + if (tcp_can_ingest(sk, in_skb)) return 0; /* Collapsing did not help, destructive actions follow. @@ -5555,7 +5574,7 @@ static int tcp_prune_queue(struct sock *sk, const struct sk_buff *in_skb) tcp_prune_ofo_queue(sk, in_skb); - if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf) + if (tcp_can_ingest(sk, in_skb)) return 0; /* If we are really being abused, tell the caller to silently @@ -5881,7 +5900,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb, step1: /* Step 1: check sequence number */ - reason = tcp_sequence(tp, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); + reason = tcp_sequence(sk, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq); if (reason) { /* RFC793, page 37: "In all states except SYN-SENT, all reset * (RST) segments are validated by checking their SEQ-fields." @@ -5892,6 +5911,7 @@ step1: if (!th->rst) { if (th->syn) goto syn_challenge; + NET_INC_STATS(sock_net(sk), LINUX_MIB_BEYOND_WINDOW); if (!tcp_oow_rate_limited(sock_net(sk), skb, LINUX_MIB_TCPACKSKIPPEDSEQ, &tp->last_oow_ack_time)) @@ -6110,6 +6130,10 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb) if (tcp_checksum_complete(skb)) goto csum_error; + if (after(TCP_SKB_CB(skb)->end_seq, + tp->rcv_nxt + tcp_receive_window(tp))) + goto validate; + if ((int)skb->truesize > sk->sk_forward_alloc) goto step5; @@ -6165,7 +6189,7 @@ slow_path: /* * Standard slow path. */ - +validate: if (!tcp_validate_incoming(sk, skb, th, 1)) return; diff --git a/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt new file mode 100644 index 000000000000..7e6bc5fb0c8d --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_ooo_rcv_mss.pkt @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh +sysctl -q net.ipv4.tcp_rmem="4096 131072 $((32*1024*1024))"` + + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 65535 <mss 1000,nop,nop,sackOK,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 10> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < . 2001:11001(9000) ack 1 win 257 + +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:11001> + +// check that ooo packet properly updates tcpi_rcv_mss + +0 %{ assert tcpi_rcv_mss == 1000, tcpi_rcv_mss }% + + +0 < . 11001:21001(10000) ack 1 win 257 + +0 > . 1:1(0) ack 1 win 81 <nop,nop,sack 2001:21001> + diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt new file mode 100644 index 000000000000..7e170b94fd36 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_big_endseq.pkt @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + + 0 `nstat -n` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [10000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 0> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:4001(4000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// packet in sequence : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +0 < P. 4001:54001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// ooo packet. : SKB_DROP_REASON_TCP_INVALID_END_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +1 < P. 5001:55001(50000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + +// SKB_DROP_REASON_TCP_INVALID_SEQUENCE / LINUX_MIB_BEYOND_WINDOW + +0 < P. 70001:80001(10000) ack 1 win 257 + +0 > . 1:1(0) ack 4001 win 5000 + + +0 read(4, ..., 100000) = 4000 + +// If queue is empty, accept a packet even if its end_seq is above wup + rcv_wnd + +0 < P. 4001:54001(50000) ack 1 win 257 + +.040 > . 1:1(0) ack 54001 win 0 + +// Check LINUX_MIB_BEYOND_WINDOW has been incremented 3 times. ++0 `nstat | grep TcpExtBeyondWindow | grep -q " 3 "` diff --git a/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt new file mode 100644 index 000000000000..f575c0ff89da --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_rcv_toobig.pkt @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 + +--mss=1000 + +`./defaults.sh` + + 0 `nstat -n` + +// Establish a connection. + +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 setsockopt(3, SOL_SOCKET, SO_RCVBUF, [20000], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 1) = 0 + + +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7> + +0 > S. 0:0(0) ack 1 win 18980 <mss 1460,nop,wscale 0> + +.1 < . 1:1(0) ack 1 win 257 + + +0 accept(3, ..., ...) = 4 + + +0 < P. 1:20001(20000) ack 1 win 257 + +.04 > . 1:1(0) ack 20001 win 18000 + + +0 setsockopt(4, SOL_SOCKET, SO_RCVBUF, [12000], 4) = 0 + +0 < P. 20001:80001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 20001 win 18000 + + +0 read(4, ..., 20000) = 20000 +// A too big packet is accepted if the receive queue is empty + +0 < P. 20001:80001(60000) ack 1 win 257 + +0 > . 1:1(0) ack 80001 win 0 + |
