summaryrefslogtreecommitdiff
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2024-05-29 17:21:37 -0700
committerJakub Kicinski <kuba@kernel.org>2024-05-29 17:21:38 -0700
commit0f4b437b5fbf5141ff886bb47581123eb222c543 (patch)
treefc8014baaf67f7eb33fa5249a7c068a43438c7cc /net/ipv4/tcp_input.c
parentc3390677f6258748a91bf37b9bb21eab89f63b42 (diff)
parentfde6f897f2a184546bf5516ac736523ef24dc6a7 (diff)
Merge branch 'tcp-fix-tcp_poll-races'
Eric Dumazet says: ==================== tcp: fix tcp_poll() races Flakes in packetdrill tests stressing epoll_wait() were root caused to bad ordering in tcp_write_err() Precisely, we have to call sk_error_report() after tcp_done(). When fixing this issue, we discovered tcp_abort(), tcp_v4_err() and tcp_v6_err() had similar issues. Since tcp_reset() has the correct ordering, first patch takes part of it and creates tcp_done_with_error() helper. ==================== Link: https://lore.kernel.org/r/20240528125253.1966136-1-edumazet@google.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c32
1 files changed, 21 insertions, 11 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9c04a9c8be9d..5aadf64e554d 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4436,9 +4436,26 @@ static enum skb_drop_reason tcp_sequence(const struct tcp_sock *tp,
return SKB_NOT_DROPPED_YET;
}
+
+void tcp_done_with_error(struct sock *sk, int err)
+{
+ /* This barrier is coupled with smp_rmb() in tcp_poll() */
+ WRITE_ONCE(sk->sk_err, err);
+ smp_wmb();
+
+ tcp_write_queue_purge(sk);
+ tcp_done(sk);
+
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk_error_report(sk);
+}
+EXPORT_SYMBOL(tcp_done_with_error);
+
/* When we get a reset we do this. */
void tcp_reset(struct sock *sk, struct sk_buff *skb)
{
+ int err;
+
trace_tcp_receive_reset(sk);
/* mptcp can't tell us to ignore reset pkts,
@@ -4450,24 +4467,17 @@ void tcp_reset(struct sock *sk, struct sk_buff *skb)
/* We want the right error as BSD sees it (and indeed as we do). */
switch (sk->sk_state) {
case TCP_SYN_SENT:
- WRITE_ONCE(sk->sk_err, ECONNREFUSED);
+ err = ECONNREFUSED;
break;
case TCP_CLOSE_WAIT:
- WRITE_ONCE(sk->sk_err, EPIPE);
+ err = EPIPE;
break;
case TCP_CLOSE:
return;
default:
- WRITE_ONCE(sk->sk_err, ECONNRESET);
+ err = ECONNRESET;
}
- /* This barrier is coupled with smp_rmb() in tcp_poll() */
- smp_wmb();
-
- tcp_write_queue_purge(sk);
- tcp_done(sk);
-
- if (!sock_flag(sk, SOCK_DEAD))
- sk_error_report(sk);
+ tcp_done_with_error(sk, err);
}
/*