From 72bf4f1767f0386970dc04726dc5bc2e3991dc19 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Oct 2023 11:24:57 +0000 Subject: net: do not leave an empty skb in write queue Under memory stress conditions, tcp_sendmsg_locked() might call sk_stream_wait_memory(), thus releasing the socket lock. If a fresh skb has been allocated prior to this, we should not leave it in the write queue otherwise tcp_write_xmit() could panic. This apparently does not happen often, but a future change in __sk_mem_raise_allocated() that Shakeel and others are considering would increase chances of being hurt. Under discussion is to remove this controversial part: /* Fail only if socket is _under_ its sndbuf. * In this case we cannot block, so that we have to fail. */ if (sk->sk_wmem_queued + size >= sk->sk_sndbuf) { /* Force charge with __GFP_NOFAIL */ if (memcg_charge && !charged) { mem_cgroup_charge_skmem(sk->sk_memcg, amt, gfp_memcg_charge() | __GFP_NOFAIL); } return 1; } Fixes: fdfc5c8594c2 ("tcp: remove empty skb from write queue in error cases") Signed-off-by: Eric Dumazet Reviewed-by: Shakeel Butt Link: https://lore.kernel.org/r/20231019112457.1190114-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- net/ipv4/tcp.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/ipv4/tcp.c') diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index d3456cf840de..3d3a24f79573 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -927,10 +927,11 @@ int tcp_send_mss(struct sock *sk, int *size_goal, int flags) return mss_now; } -/* In some cases, both sendmsg() could have added an skb to the write queue, - * but failed adding payload on it. We need to remove it to consume less +/* In some cases, sendmsg() could have added an skb to the write queue, + * but failed adding payload on it. We need to remove it to consume less * memory, but more importantly be able to generate EPOLLOUT for Edge Trigger - * epoll() users. + * epoll() users. Another reason is that tcp_write_xmit() does not like + * finding an empty skb in the write queue. */ void tcp_remove_empty_skb(struct sock *sk) { @@ -1289,6 +1290,7 @@ new_segment: wait_for_space: set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + tcp_remove_empty_skb(sk); if (copied) tcp_push(sk, flags & ~MSG_MORE, mss_now, TCP_NAGLE_PUSH, size_goal); -- cgit