diff options
author | David S. Miller <davem@davemloft.net> | 2024-08-07 10:24:46 +0100 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2024-08-07 10:24:46 +0100 |
commit | e66f33bdf0c3adda068b1e2c70c768c56166b58a (patch) | |
tree | eaa0e111b4df399b2e13eb06e8f0451546e47e40 | |
parent | 2c14119ab8f356cb429a5f4855b7880f33cfb981 (diff) | |
parent | ba0ca286c919508ac32d036509b082b3968c0bb2 (diff) |
Merge branch 'tcp-active-reset'
Jason Xing says:
===================
tcp: completely support active reset
This time the patch series finally covers all the cases in the active
reset logic. After this, we can know the related exact reason(s).
v4
Link:
1. revise the changelog to avoid future confusion in patch [5/7] (Eric)
2. revise the changelog of patch [6/7] like above.
3. add reviewed-by tags (Eric)
v3
Link: https://lore.kernel.org/all/20240731120955.23542-1-kerneljasonxing@gmail.com/
1. introduce TCP_DISCONNECT_WITH_DATA reason (Eric)
2. use a better name 'TCP_KEEPALIVE_TIMEOUT' (Eric)
3. add three reviewed-by tags (Eric)
v2
Link: https://lore.kernel.org/all/20240730133513.99986-1-kerneljasonxing@gmail.com/
1. use RFC 9293 in the comment and changelog instead of old RFC 793
2. correct the comment and changelog in patch 5
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/rstreason.h | 39 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 19 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 2 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 6 |
4 files changed, 54 insertions, 12 deletions
diff --git a/include/net/rstreason.h b/include/net/rstreason.h index 2575c85d7f7a..69cb2e52b7da 100644 --- a/include/net/rstreason.h +++ b/include/net/rstreason.h @@ -17,6 +17,12 @@ FN(TCP_ABORT_ON_DATA) \ FN(TCP_TIMEWAIT_SOCKET) \ FN(INVALID_SYN) \ + FN(TCP_ABORT_ON_CLOSE) \ + FN(TCP_ABORT_ON_LINGER) \ + FN(TCP_ABORT_ON_MEMORY) \ + FN(TCP_STATE) \ + FN(TCP_KEEPALIVE_TIMEOUT) \ + FN(TCP_DISCONNECT_WITH_DATA) \ FN(MPTCP_RST_EUNSPEC) \ FN(MPTCP_RST_EMPTCP) \ FN(MPTCP_RST_ERESOURCE) \ @@ -84,6 +90,39 @@ enum sk_rst_reason { * an error, send a reset" */ SK_RST_REASON_INVALID_SYN, + /** + * @SK_RST_REASON_TCP_ABORT_ON_CLOSE: abort on close + * corresponding to LINUX_MIB_TCPABORTONCLOSE + */ + SK_RST_REASON_TCP_ABORT_ON_CLOSE, + /** + * @SK_RST_REASON_TCP_ABORT_ON_LINGER: abort on linger + * corresponding to LINUX_MIB_TCPABORTONLINGER + */ + SK_RST_REASON_TCP_ABORT_ON_LINGER, + /** + * @SK_RST_REASON_TCP_ABORT_ON_MEMORY: abort on memory + * corresponding to LINUX_MIB_TCPABORTONMEMORY + */ + SK_RST_REASON_TCP_ABORT_ON_MEMORY, + /** + * @SK_RST_REASON_TCP_STATE: abort on tcp state + * Please see RFC 9293 for all possible reset conditions + */ + SK_RST_REASON_TCP_STATE, + /** + * @SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT: time to timeout + * When we have already run out of all the chances, which means + * keepalive timeout, we have to reset the connection + */ + SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT, + /** + * @SK_RST_REASON_TCP_DISCONNECT_WITH_DATA: disconnect when write + * queue is not empty + * It means user has written data into the write queue when doing + * disconnecting, so we have to send an RST. + */ + SK_RST_REASON_TCP_DISCONNECT_WITH_DATA, /* Copy from include/uapi/linux/mptcp.h. * These reset fields will not be changed since they adhere to diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e03a342c9162..8514257f4ecd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2833,7 +2833,7 @@ void __tcp_close(struct sock *sk, long timeout) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONCLOSE); tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, sk->sk_allocation, - SK_RST_REASON_NOT_SPECIFIED); + SK_RST_REASON_TCP_ABORT_ON_CLOSE); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); @@ -2908,7 +2908,7 @@ adjudge_to_death: if (READ_ONCE(tp->linger2) < 0) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC, - SK_RST_REASON_NOT_SPECIFIED); + SK_RST_REASON_TCP_ABORT_ON_LINGER); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONLINGER); } else { @@ -2927,7 +2927,7 @@ adjudge_to_death: if (tcp_check_oom(sk, 0)) { tcp_set_state(sk, TCP_CLOSE); tcp_send_active_reset(sk, GFP_ATOMIC, - SK_RST_REASON_NOT_SPECIFIED); + SK_RST_REASON_TCP_ABORT_ON_MEMORY); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); } else if (!check_net(sock_net(sk))) { @@ -3025,13 +3025,16 @@ int tcp_disconnect(struct sock *sk, int flags) inet_csk_listen_stop(sk); } else if (unlikely(tp->repair)) { WRITE_ONCE(sk->sk_err, ECONNABORTED); - } else if (tcp_need_reset(old_state) || - (tp->snd_nxt != tp->write_seq && - (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK))) { + } else if (tcp_need_reset(old_state)) { + tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_TCP_STATE); + WRITE_ONCE(sk->sk_err, ECONNRESET); + } else if (tp->snd_nxt != tp->write_seq && + (1 << old_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) { /* The last check adjusts for discrepancy of Linux wrt. RFC * states */ - tcp_send_active_reset(sk, gfp_any(), SK_RST_REASON_NOT_SPECIFIED); + tcp_send_active_reset(sk, gfp_any(), + SK_RST_REASON_TCP_DISCONNECT_WITH_DATA); WRITE_ONCE(sk->sk_err, ECONNRESET); } else if (old_state == TCP_SYN_SENT) WRITE_ONCE(sk->sk_err, ECONNRESET); @@ -4649,7 +4652,7 @@ int tcp_abort(struct sock *sk, int err) if (!sock_flag(sk, SOCK_DEAD)) { if (tcp_need_reset(sk->sk_state)) tcp_send_active_reset(sk, GFP_ATOMIC, - SK_RST_REASON_NOT_SPECIFIED); + SK_RST_REASON_TCP_STATE); tcp_done_with_error(sk, err); } diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 16c48df8df4c..cdd0def14427 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3649,7 +3649,7 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority, /* skb of trace_tcp_send_reset() keeps the skb that caused RST, * skb here is different to the troublesome skb, so use NULL */ - trace_tcp_send_reset(sk, NULL, SK_RST_REASON_NOT_SPECIFIED); + trace_tcp_send_reset(sk, NULL, reason); } /* Send a crossed SYN-ACK during socket establishment. diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 4d40615dc8fc..86169127e4d1 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -125,7 +125,7 @@ static int tcp_out_of_resources(struct sock *sk, bool do_reset) do_reset = true; if (do_reset) tcp_send_active_reset(sk, GFP_ATOMIC, - SK_RST_REASON_NOT_SPECIFIED); + SK_RST_REASON_TCP_ABORT_ON_MEMORY); tcp_done(sk); __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTONMEMORY); return 1; @@ -779,7 +779,7 @@ static void tcp_keepalive_timer (struct timer_list *t) goto out; } } - tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_NOT_SPECIFIED); + tcp_send_active_reset(sk, GFP_ATOMIC, SK_RST_REASON_TCP_STATE); goto death; } @@ -807,7 +807,7 @@ static void tcp_keepalive_timer (struct timer_list *t) (user_timeout == 0 && icsk->icsk_probes_out >= keepalive_probes(tp))) { tcp_send_active_reset(sk, GFP_ATOMIC, - SK_RST_REASON_NOT_SPECIFIED); + SK_RST_REASON_TCP_KEEPALIVE_TIMEOUT); tcp_write_err(sk); goto out; } |