diff options
author | David S. Miller <davem@davemloft.net> | 2021-03-11 18:35:31 -0800 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2021-03-11 18:35:31 -0800 |
commit | 5215206d8b1574c4ba8915a61fe841c376d51ed2 (patch) | |
tree | 7c530d5490a1fc493fd3df9483501b01bd040d3a | |
parent | 8176f8c0f095c9df44994a33f19b55e7c847df7f (diff) | |
parent | ac3959fd0dcc0e49298ea5cadfe257db0e58ef8b (diff) |
Merge branch 'tcp-delayed-completions'
Eric Dumazet says:
====================
tcp: better deal with delayed TX completions
Jakub and Neil reported an increase of RTO timers whenever
TX completions are delayed a bit more (by increasing
NIC TX coalescing parameters)
While problems have been there forever, second patch might
introduce some regressions so I prefer not backport
them to stable releases before things settle.
Many thanks to FB team for their help and tests.
Few packetdrill tests need to be changed to reflect
the improvements brought by this series.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/skbuff.h | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 10 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 20 |
3 files changed, 13 insertions, 19 deletions
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0503c917d773..483e89348f78 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1140,7 +1140,7 @@ static inline bool skb_fclone_busy(const struct sock *sk, return skb->fclone == SKB_FCLONE_ORIG && refcount_read(&fclones->fclone_ref) > 1 && - fclones->skb2.sk == sk; + READ_ONCE(fclones->skb2.sk) == sk; } /** diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 69a545db80d2..4cf4dd532d1c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2914,7 +2914,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, /* D. Check state exit conditions. State can be terminated * when high_seq is ACKed. */ if (icsk->icsk_ca_state == TCP_CA_Open) { - WARN_ON(tp->retrans_out != 0); + WARN_ON(tp->retrans_out != 0 && !tp->syn_data); tp->retrans_stamp = 0; } else if (!before(tp->snd_una, tp->high_seq)) { switch (icsk->icsk_ca_state) { @@ -5994,11 +5994,9 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, tp->fastopen_client_fail = TFO_SYN_RETRANSMITTED; else tp->fastopen_client_fail = TFO_DATA_NOT_ACKED; - skb_rbtree_walk_from(data) { - if (__tcp_retransmit_skb(sk, data, 1)) - break; - } - tcp_rearm_rto(sk); + skb_rbtree_walk_from(data) + tcp_mark_skb_lost(sk, data); + tcp_xmit_retransmit_queue(sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENACTIVEFAIL); return true; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index fbf140a770d8..bde781f46b41 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2775,13 +2775,17 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto) * a packet is still in a qdisc or driver queue. * In this case, there is very little point doing a retransmit ! */ -static bool skb_still_in_host_queue(const struct sock *sk, +static bool skb_still_in_host_queue(struct sock *sk, const struct sk_buff *skb) { if (unlikely(skb_fclone_busy(sk, skb))) { - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); - return true; + set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); + smp_mb__after_atomic(); + if (skb_fclone_busy(sk, skb)) { + NET_INC_STATS(sock_net(sk), + LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES); + return true; + } } return false; } @@ -3147,14 +3151,6 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) if (icsk->icsk_mtup.probe_size) icsk->icsk_mtup.probe_size = 0; - /* Do not sent more than we queued. 1/4 is reserved for possible - * copying overhead: fragmentation, tunneling, mangling etc. - */ - if (refcount_read(&sk->sk_wmem_alloc) > - min_t(u32, sk->sk_wmem_queued + (sk->sk_wmem_queued >> 2), - sk->sk_sndbuf)) - return -EAGAIN; - if (skb_still_in_host_queue(sk, skb)) return -EBUSY; |