diff options
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r-- | net/mptcp/protocol.c | 104 |
1 files changed, 80 insertions, 24 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index edf14c2c2062..9a287b75c1b3 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -11,6 +11,7 @@ #include <linux/netdevice.h> #include <linux/sched/signal.h> #include <linux/atomic.h> +#include <net/aligned_data.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> @@ -67,6 +68,26 @@ static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk) return &inet_stream_ops; } +bool __mptcp_try_fallback(struct mptcp_sock *msk, int fb_mib) +{ + struct net *net = sock_net((struct sock *)msk); + + if (__mptcp_check_fallback(msk)) + return true; + + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_infinite_fallback) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } + + msk->allow_subflows = false; + set_bit(MPTCP_FALLBACK_DONE, &msk->flags); + __MPTCP_INC_STATS(net, fb_mib); + spin_unlock_bh(&msk->fallback_lock); + return true; +} + static int __mptcp_socket_create(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -560,11 +581,7 @@ static bool mptcp_check_data_fin(struct sock *sk) static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk) { - if (READ_ONCE(msk->allow_infinite_fallback)) { - MPTCP_INC_STATS(sock_net(ssk), - MPTCP_MIB_DSSCORRUPTIONFALLBACK); - mptcp_do_fallback(ssk); - } else { + if (!mptcp_try_fallback(ssk, MPTCP_MIB_DSSCORRUPTIONFALLBACK)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET); mptcp_subflow_reset(ssk); } @@ -792,7 +809,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk) { mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq); - WRITE_ONCE(msk->allow_infinite_fallback, false); + msk->allow_infinite_fallback = false; mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); } @@ -803,6 +820,14 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) if (sk->sk_state != TCP_ESTABLISHED) return false; + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_subflows) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } + mptcp_subflow_joined(msk, ssk); + spin_unlock_bh(&msk->fallback_lock); + /* attach to msk socket only after we are sure we will deal with it * at close time */ @@ -811,7 +836,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++; mptcp_sockopt_sync_locked(msk, ssk); - mptcp_subflow_joined(msk, ssk); mptcp_stop_tout_timer(sk); __mptcp_propagate_sndbuf(sk, ssk); return true; @@ -1136,10 +1160,13 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk, mpext->infinite_map = 1; mpext->data_len = 0; - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX); + if (!mptcp_try_fallback(ssk, MPTCP_MIB_INFINITEMAPTX)) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_FALLBACKFAILED); + mptcp_subflow_reset(ssk); + return; + } + mptcp_subflow_ctx(ssk)->send_infinite_map = 0; - pr_fallback(msk); - mptcp_do_fallback(ssk); } #define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1)) @@ -1376,7 +1403,7 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) * - estimate the faster flow linger time * - use the above to estimate the amount of byte transferred * by the faster flow - * - check that the amount of queued data is greter than the above, + * - check that the amount of queued data is greater than the above, * otherwise do not use the picked, slower, subflow * We select the subflow with the shorter estimated time to flush * the queued mem, which basically ensure the above. We just need @@ -2543,9 +2570,9 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) static void __mptcp_retrans(struct sock *sk) { + struct mptcp_sendmsg_info info = { .data_lock_held = true, }; struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; - struct mptcp_sendmsg_info info = {}; struct mptcp_data_frag *dfrag; struct sock *ssk; int ret, err; @@ -2590,6 +2617,18 @@ static void __mptcp_retrans(struct sock *sk) info.sent = 0; info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent; + + /* + * make the whole retrans decision, xmit, disallow + * fallback atomic + */ + spin_lock_bh(&msk->fallback_lock); + if (__mptcp_check_fallback(msk)) { + spin_unlock_bh(&msk->fallback_lock); + release_sock(ssk); + return; + } + while (info.sent < info.limit) { ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); if (ret <= 0) @@ -2603,8 +2642,9 @@ static void __mptcp_retrans(struct sock *sk) len = max(copied, len); tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); - WRITE_ONCE(msk->allow_infinite_fallback, false); + msk->allow_infinite_fallback = false; } + spin_unlock_bh(&msk->fallback_lock); release_sock(ssk); } @@ -2730,7 +2770,8 @@ static void __mptcp_init_sock(struct sock *sk) WRITE_ONCE(msk->first, NULL); inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); - WRITE_ONCE(msk->allow_infinite_fallback, true); + msk->allow_infinite_fallback = true; + msk->allow_subflows = true; msk->recovery = false; msk->subflow_id = 1; msk->last_data_sent = tcp_jiffies32; @@ -2738,6 +2779,7 @@ static void __mptcp_init_sock(struct sock *sk) msk->last_ack_recv = tcp_jiffies32; mptcp_pm_data_init(msk); + spin_lock_init(&msk->fallback_lock); /* re-use the csk retrans timer for MPTCP-level retrans */ timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); @@ -3117,7 +3159,16 @@ static int mptcp_disconnect(struct sock *sk, int flags) * subflow */ mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE); + + /* The first subflow is already in TCP_CLOSE status, the following + * can't overlap with a fallback anymore + */ + spin_lock_bh(&msk->fallback_lock); + msk->allow_subflows = true; + msk->allow_infinite_fallback = true; WRITE_ONCE(msk->flags, 0); + spin_unlock_bh(&msk->fallback_lock); + msk->cb_flags = 0; msk->recovery = false; WRITE_ONCE(msk->can_ack, false); @@ -3503,7 +3554,7 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent) write_lock_bh(&sk->sk_callback_lock); rcu_assign_pointer(sk->sk_wq, &parent->wq); sk_set_socket(sk, parent); - sk->sk_uid = SOCK_INODE(parent)->i_uid; + WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid); write_unlock_bh(&sk->sk_callback_lock); } @@ -3524,7 +3575,13 @@ bool mptcp_finish_join(struct sock *ssk) /* active subflow, already present inside the conn_list */ if (!list_empty(&subflow->node)) { + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_subflows) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } mptcp_subflow_joined(msk, ssk); + spin_unlock_bh(&msk->fallback_lock); mptcp_propagate_sndbuf(parent, ssk); return true; } @@ -3648,16 +3705,15 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * TCP option space. */ if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info)) - mptcp_subflow_early_fallback(msk, subflow); + mptcp_early_fallback(msk, subflow, MPTCP_MIB_MD5SIGFALLBACK); #endif if (subflow->request_mptcp) { - if (mptcp_active_should_disable(sk)) { - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDISABLED); - mptcp_subflow_early_fallback(msk, subflow); - } else if (mptcp_token_new_connect(ssk) < 0) { - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT); - mptcp_subflow_early_fallback(msk, subflow); - } + if (mptcp_active_should_disable(sk)) + mptcp_early_fallback(msk, subflow, + MPTCP_MIB_MPCAPABLEACTIVEDISABLED); + else if (mptcp_token_new_connect(ssk) < 0) + mptcp_early_fallback(msk, subflow, + MPTCP_MIB_TOKENFALLBACKINIT); } WRITE_ONCE(msk->write_seq, subflow->idsn); @@ -3729,7 +3785,7 @@ static struct proto mptcp_prot = { .stream_memory_free = mptcp_stream_memory_free, .sockets_allocated = &mptcp_sockets_allocated, - .memory_allocated = &tcp_memory_allocated, + .memory_allocated = &net_aligned_data.tcp_memory_allocated, .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, .memory_pressure = &tcp_memory_pressure, |