diff options
Diffstat (limited to 'net/mptcp')
-rw-r--r-- | net/mptcp/ctrl.c | 4 | ||||
-rw-r--r-- | net/mptcp/mib.c | 6 | ||||
-rw-r--r-- | net/mptcp/mib.h | 8 | ||||
-rw-r--r-- | net/mptcp/options.c | 6 | ||||
-rw-r--r-- | net/mptcp/pm.c | 16 | ||||
-rw-r--r-- | net/mptcp/protocol.c | 122 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 46 | ||||
-rw-r--r-- | net/mptcp/sched.c | 35 | ||||
-rw-r--r-- | net/mptcp/sockopt.c | 33 | ||||
-rw-r--r-- | net/mptcp/subflow.c | 52 |
10 files changed, 229 insertions, 99 deletions
diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index d9290c5bb6c7..fed40dae5583 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -533,9 +533,9 @@ void mptcp_active_detect_blackhole(struct sock *ssk, bool expired) to_max = mptcp_get_pernet(net)->syn_retrans_before_tcp_fallback; if (timeouts == to_max || (timeouts < to_max && expired)) { - MPTCP_INC_STATS(net, MPTCP_MIB_MPCAPABLEACTIVEDROP); subflow->mpc_drop = 1; - mptcp_subflow_early_fallback(mptcp_sk(subflow->conn), subflow); + mptcp_early_fallback(mptcp_sk(subflow->conn), subflow, + MPTCP_MIB_MPCAPABLEACTIVEDROP); } } diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 19eb9292bd60..cf879c188ca2 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -28,6 +28,7 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("MPJoinSynAckHMacFailure", MPTCP_MIB_JOINSYNACKMAC), SNMP_MIB_ITEM("MPJoinAckRx", MPTCP_MIB_JOINACKRX), SNMP_MIB_ITEM("MPJoinAckHMacFailure", MPTCP_MIB_JOINACKMAC), + SNMP_MIB_ITEM("MPJoinRejected", MPTCP_MIB_JOINREJECTED), SNMP_MIB_ITEM("MPJoinSynTx", MPTCP_MIB_JOINSYNTX), SNMP_MIB_ITEM("MPJoinSynTxCreatSkErr", MPTCP_MIB_JOINSYNTXCREATSKERR), SNMP_MIB_ITEM("MPJoinSynTxBindErr", MPTCP_MIB_JOINSYNTXBINDERR), @@ -79,6 +80,11 @@ static const struct snmp_mib mptcp_snmp_list[] = { SNMP_MIB_ITEM("RcvWndConflict", MPTCP_MIB_RCVWNDCONFLICT), SNMP_MIB_ITEM("MPCurrEstab", MPTCP_MIB_CURRESTAB), SNMP_MIB_ITEM("Blackhole", MPTCP_MIB_BLACKHOLE), + SNMP_MIB_ITEM("MPCapableDataFallback", MPTCP_MIB_MPCAPABLEDATAFALLBACK), + SNMP_MIB_ITEM("MD5SigFallback", MPTCP_MIB_MD5SIGFALLBACK), + SNMP_MIB_ITEM("DssFallback", MPTCP_MIB_DSSFALLBACK), + SNMP_MIB_ITEM("SimultConnectFallback", MPTCP_MIB_SIMULTCONNFALLBACK), + SNMP_MIB_ITEM("FallbackFailed", MPTCP_MIB_FALLBACKFAILED), SNMP_MIB_SENTINEL }; diff --git a/net/mptcp/mib.h b/net/mptcp/mib.h index 128282982843..309bac6fea32 100644 --- a/net/mptcp/mib.h +++ b/net/mptcp/mib.h @@ -23,6 +23,7 @@ enum linux_mptcp_mib_field { MPTCP_MIB_JOINSYNACKMAC, /* HMAC was wrong on SYN/ACK + MP_JOIN */ MPTCP_MIB_JOINACKRX, /* Received an ACK + MP_JOIN */ MPTCP_MIB_JOINACKMAC, /* HMAC was wrong on ACK + MP_JOIN */ + MPTCP_MIB_JOINREJECTED, /* The PM rejected the JOIN request */ MPTCP_MIB_JOINSYNTX, /* Sending a SYN + MP_JOIN */ MPTCP_MIB_JOINSYNTXCREATSKERR, /* Not able to create a socket when sending a SYN + MP_JOIN */ MPTCP_MIB_JOINSYNTXBINDERR, /* Not able to bind() the address when sending a SYN + MP_JOIN */ @@ -80,6 +81,13 @@ enum linux_mptcp_mib_field { MPTCP_MIB_RCVWNDCONFLICT, /* Conflict with while updating msk rcv wnd */ MPTCP_MIB_CURRESTAB, /* Current established MPTCP connections */ MPTCP_MIB_BLACKHOLE, /* A blackhole has been detected */ + MPTCP_MIB_MPCAPABLEDATAFALLBACK, /* Missing DSS/MPC+data on first + * established packet + */ + MPTCP_MIB_MD5SIGFALLBACK, /* Conflicting TCP option enabled */ + MPTCP_MIB_DSSFALLBACK, /* Bad or missing DSS */ + MPTCP_MIB_SIMULTCONNFALLBACK, /* Simultaneous connect */ + MPTCP_MIB_FALLBACKFAILED, /* Can't fallback due to msk status */ __MPTCP_MIB_MAX }; diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 421ced031289..70c0ab0ecf90 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -978,8 +978,10 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (subflow->mp_join) goto reset; subflow->mp_capable = 0; - pr_fallback(msk); - mptcp_do_fallback(ssk); + if (!mptcp_try_fallback(ssk, MPTCP_MIB_MPCAPABLEDATAFALLBACK)) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_FALLBACKFAILED); + goto reset; + } return false; } diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 31747f974941..420d416e2603 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -151,10 +151,13 @@ bool mptcp_remove_anno_list_by_saddr(struct mptcp_sock *msk, const struct mptcp_addr_info *addr) { struct mptcp_pm_add_entry *entry; + bool ret; entry = mptcp_pm_del_add_timer(msk, addr, false); + ret = entry; kfree(entry); - return entry; + + return ret; } bool mptcp_pm_sport_in_anno_list(struct mptcp_sock *msk, const struct sock *sk) @@ -267,7 +270,8 @@ int mptcp_pm_mp_prio_send_ack(struct mptcp_sock *msk, static void mptcp_pm_add_timer(struct timer_list *timer) { - struct mptcp_pm_add_entry *entry = from_timer(entry, timer, add_timer); + struct mptcp_pm_add_entry *entry = timer_container_of(entry, timer, + add_timer); struct mptcp_sock *msk = entry->sock; struct sock *sk = (struct sock *)msk; @@ -761,8 +765,14 @@ void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq) pr_debug("fail_seq=%llu\n", fail_seq); - if (!READ_ONCE(msk->allow_infinite_fallback)) + /* After accepting the fail, we can't create any other subflows */ + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_infinite_fallback) { + spin_unlock_bh(&msk->fallback_lock); return; + } + msk->allow_subflows = false; + spin_unlock_bh(&msk->fallback_lock); if (!subflow->fail_tout) { pr_debug("send MP_FAIL response and infinite map\n"); diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 44f7ab463d75..9a287b75c1b3 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -11,6 +11,7 @@ #include <linux/netdevice.h> #include <linux/sched/signal.h> #include <linux/atomic.h> +#include <net/aligned_data.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> @@ -46,7 +47,9 @@ static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_sm static void __mptcp_destroy_sock(struct sock *sk); static void mptcp_check_send_data_fin(struct sock *sk); -DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions); +DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions) = { + .bh_lock = INIT_LOCAL_LOCK(bh_lock), +}; static struct net_device *mptcp_napi_dev; /* Returns end sequence number of the receiver's advertised window */ @@ -65,6 +68,26 @@ static const struct proto_ops *mptcp_fallback_tcp_ops(const struct sock *sk) return &inet_stream_ops; } +bool __mptcp_try_fallback(struct mptcp_sock *msk, int fb_mib) +{ + struct net *net = sock_net((struct sock *)msk); + + if (__mptcp_check_fallback(msk)) + return true; + + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_infinite_fallback) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } + + msk->allow_subflows = false; + set_bit(MPTCP_FALLBACK_DONE, &msk->flags); + __MPTCP_INC_STATS(net, fb_mib); + spin_unlock_bh(&msk->fallback_lock); + return true; +} + static int __mptcp_socket_create(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; @@ -558,11 +581,7 @@ static bool mptcp_check_data_fin(struct sock *sk) static void mptcp_dss_corruption(struct mptcp_sock *msk, struct sock *ssk) { - if (READ_ONCE(msk->allow_infinite_fallback)) { - MPTCP_INC_STATS(sock_net(ssk), - MPTCP_MIB_DSSCORRUPTIONFALLBACK); - mptcp_do_fallback(ssk); - } else { + if (!mptcp_try_fallback(ssk, MPTCP_MIB_DSSCORRUPTIONFALLBACK)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DSSCORRUPTIONRESET); mptcp_subflow_reset(ssk); } @@ -790,7 +809,7 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk) static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk) { mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq); - WRITE_ONCE(msk->allow_infinite_fallback, false); + msk->allow_infinite_fallback = false; mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC); } @@ -801,6 +820,14 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) if (sk->sk_state != TCP_ESTABLISHED) return false; + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_subflows) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } + mptcp_subflow_joined(msk, ssk); + spin_unlock_bh(&msk->fallback_lock); + /* attach to msk socket only after we are sure we will deal with it * at close time */ @@ -809,7 +836,6 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk) mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++; mptcp_sockopt_sync_locked(msk, ssk); - mptcp_subflow_joined(msk, ssk); mptcp_stop_tout_timer(sk); __mptcp_propagate_sndbuf(sk, ssk); return true; @@ -1134,10 +1160,13 @@ static void mptcp_update_infinite_map(struct mptcp_sock *msk, mpext->infinite_map = 1; mpext->data_len = 0; - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX); + if (!mptcp_try_fallback(ssk, MPTCP_MIB_INFINITEMAPTX)) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_FALLBACKFAILED); + mptcp_subflow_reset(ssk); + return; + } + mptcp_subflow_ctx(ssk)->send_infinite_map = 0; - pr_fallback(msk); - mptcp_do_fallback(ssk); } #define MPTCP_MAX_GSO_SIZE (GSO_LEGACY_MAX_SIZE - (MAX_TCP_HEADER + 1)) @@ -1374,7 +1403,7 @@ struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk) * - estimate the faster flow linger time * - use the above to estimate the amount of byte transferred * by the faster flow - * - check that the amount of queued data is greter than the above, + * - check that the amount of queued data is greater than the above, * otherwise do not use the picked, slower, subflow * We select the subflow with the shorter estimated time to flush * the queued mem, which basically ensure the above. We just need @@ -2201,8 +2230,8 @@ out_err: static void mptcp_retransmit_timer(struct timer_list *t) { - struct inet_connection_sock *icsk = from_timer(icsk, t, - icsk_retransmit_timer); + struct inet_connection_sock *icsk = timer_container_of(icsk, t, + icsk_retransmit_timer); struct sock *sk = &icsk->icsk_inet.sk; struct mptcp_sock *msk = mptcp_sk(sk); @@ -2221,7 +2250,7 @@ static void mptcp_retransmit_timer(struct timer_list *t) static void mptcp_tout_timer(struct timer_list *t) { - struct sock *sk = from_timer(sk, t, sk_timer); + struct sock *sk = timer_container_of(sk, t, sk_timer); mptcp_schedule_work(sk); sock_put(sk); @@ -2541,9 +2570,9 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) static void __mptcp_retrans(struct sock *sk) { + struct mptcp_sendmsg_info info = { .data_lock_held = true, }; struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; - struct mptcp_sendmsg_info info = {}; struct mptcp_data_frag *dfrag; struct sock *ssk; int ret, err; @@ -2588,6 +2617,18 @@ static void __mptcp_retrans(struct sock *sk) info.sent = 0; info.limit = READ_ONCE(msk->csum_enabled) ? dfrag->data_len : dfrag->already_sent; + + /* + * make the whole retrans decision, xmit, disallow + * fallback atomic + */ + spin_lock_bh(&msk->fallback_lock); + if (__mptcp_check_fallback(msk)) { + spin_unlock_bh(&msk->fallback_lock); + release_sock(ssk); + return; + } + while (info.sent < info.limit) { ret = mptcp_sendmsg_frag(sk, ssk, dfrag, &info); if (ret <= 0) @@ -2601,8 +2642,9 @@ static void __mptcp_retrans(struct sock *sk) len = max(copied, len); tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle, info.size_goal); - WRITE_ONCE(msk->allow_infinite_fallback, false); + msk->allow_infinite_fallback = false; } + spin_unlock_bh(&msk->fallback_lock); release_sock(ssk); } @@ -2728,7 +2770,8 @@ static void __mptcp_init_sock(struct sock *sk) WRITE_ONCE(msk->first, NULL); inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss; WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk))); - WRITE_ONCE(msk->allow_infinite_fallback, true); + msk->allow_infinite_fallback = true; + msk->allow_subflows = true; msk->recovery = false; msk->subflow_id = 1; msk->last_data_sent = tcp_jiffies32; @@ -2736,6 +2779,7 @@ static void __mptcp_init_sock(struct sock *sk) msk->last_ack_recv = tcp_jiffies32; mptcp_pm_data_init(msk); + spin_lock_init(&msk->fallback_lock); /* re-use the csk retrans timer for MPTCP-level retrans */ timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0); @@ -3115,7 +3159,16 @@ static int mptcp_disconnect(struct sock *sk, int flags) * subflow */ mptcp_destroy_common(msk, MPTCP_CF_FASTCLOSE); + + /* The first subflow is already in TCP_CLOSE status, the following + * can't overlap with a fallback anymore + */ + spin_lock_bh(&msk->fallback_lock); + msk->allow_subflows = true; + msk->allow_infinite_fallback = true; WRITE_ONCE(msk->flags, 0); + spin_unlock_bh(&msk->fallback_lock); + msk->cb_flags = 0; msk->recovery = false; WRITE_ONCE(msk->can_ack, false); @@ -3142,9 +3195,9 @@ static int mptcp_disconnect(struct sock *sk, int flags) #if IS_ENABLED(CONFIG_MPTCP_IPV6) static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) { - unsigned int offset = sizeof(struct mptcp6_sock) - sizeof(struct ipv6_pinfo); + struct mptcp6_sock *msk6 = container_of(mptcp_sk(sk), struct mptcp6_sock, msk); - return (struct ipv6_pinfo *)(((u8 *)sk) + offset); + return &msk6->np; } static void mptcp_copy_ip6_options(struct sock *newsk, const struct sock *sk) @@ -3501,7 +3554,7 @@ void mptcp_sock_graft(struct sock *sk, struct socket *parent) write_lock_bh(&sk->sk_callback_lock); rcu_assign_pointer(sk->sk_wq, &parent->wq); sk_set_socket(sk, parent); - sk->sk_uid = SOCK_INODE(parent)->i_uid; + WRITE_ONCE(sk->sk_uid, SOCK_INODE(parent)->i_uid); write_unlock_bh(&sk->sk_callback_lock); } @@ -3522,13 +3575,21 @@ bool mptcp_finish_join(struct sock *ssk) /* active subflow, already present inside the conn_list */ if (!list_empty(&subflow->node)) { + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_subflows) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } mptcp_subflow_joined(msk, ssk); + spin_unlock_bh(&msk->fallback_lock); mptcp_propagate_sndbuf(parent, ssk); return true; } - if (!mptcp_pm_allow_new_subflow(msk)) + if (!mptcp_pm_allow_new_subflow(msk)) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_JOINREJECTED); goto err_prohibited; + } /* If we can't acquire msk socket lock here, let the release callback * handle it @@ -3644,16 +3705,15 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) * TCP option space. */ if (rcu_access_pointer(tcp_sk(ssk)->md5sig_info)) - mptcp_subflow_early_fallback(msk, subflow); + mptcp_early_fallback(msk, subflow, MPTCP_MIB_MD5SIGFALLBACK); #endif if (subflow->request_mptcp) { - if (mptcp_active_should_disable(sk)) { - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPCAPABLEACTIVEDISABLED); - mptcp_subflow_early_fallback(msk, subflow); - } else if (mptcp_token_new_connect(ssk) < 0) { - MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_TOKENFALLBACKINIT); - mptcp_subflow_early_fallback(msk, subflow); - } + if (mptcp_active_should_disable(sk)) + mptcp_early_fallback(msk, subflow, + MPTCP_MIB_MPCAPABLEACTIVEDISABLED); + else if (mptcp_token_new_connect(ssk) < 0) + mptcp_early_fallback(msk, subflow, + MPTCP_MIB_TOKENFALLBACKINIT); } WRITE_ONCE(msk->write_seq, subflow->idsn); @@ -3725,7 +3785,7 @@ static struct proto mptcp_prot = { .stream_memory_free = mptcp_stream_memory_free, .sockets_allocated = &mptcp_sockets_allocated, - .memory_allocated = &tcp_memory_allocated, + .memory_allocated = &net_aligned_data.tcp_memory_allocated, .per_cpu_fw_alloc = &tcp_memory_per_cpu_fw_alloc, .memory_pressure = &tcp_memory_pressure, diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index d409586b5977..b15d7fab5c4b 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -326,6 +326,7 @@ struct mptcp_sock { int keepalive_cnt; int keepalive_idle; int keepalive_intvl; + int maxseg; struct work_struct work; struct sk_buff *ooo_last_skb; struct rb_root out_of_order_queue; @@ -346,10 +347,16 @@ struct mptcp_sock { u64 rtt_us; /* last maximum rtt of subflows */ } rcvq_space; u8 scaling_ratio; + bool allow_subflows; u32 subflow_id; u32 setsockopt_seq; char ca_name[TCP_CA_NAME_MAX]; + + spinlock_t fallback_lock; /* protects fallback, + * allow_infinite_fallback and + * allow_join + */ }; #define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock) @@ -479,6 +486,7 @@ mptcp_subflow_rsk(const struct request_sock *rsk) struct mptcp_delegated_action { struct napi_struct napi; + local_lock_t bh_lock; struct list_head head; }; @@ -670,9 +678,11 @@ static inline void mptcp_subflow_delegate(struct mptcp_subflow_context *subflow, if (WARN_ON_ONCE(!list_empty(&subflow->delegated_node))) return; + local_lock_nested_bh(&mptcp_delegated_actions.bh_lock); delegated = this_cpu_ptr(&mptcp_delegated_actions); schedule = list_empty(&delegated->head); list_add_tail(&subflow->delegated_node, &delegated->head); + local_unlock_nested_bh(&mptcp_delegated_actions.bh_lock); sock_hold(mptcp_subflow_tcp_sock(subflow)); if (schedule) napi_schedule(&delegated->napi); @@ -684,11 +694,15 @@ mptcp_subflow_delegated_next(struct mptcp_delegated_action *delegated) { struct mptcp_subflow_context *ret; - if (list_empty(&delegated->head)) + local_lock_nested_bh(&mptcp_delegated_actions.bh_lock); + if (list_empty(&delegated->head)) { + local_unlock_nested_bh(&mptcp_delegated_actions.bh_lock); return NULL; + } ret = list_first_entry(&delegated->head, struct mptcp_subflow_context, delegated_node); list_del_init(&ret->delegated_node); + local_unlock_nested_bh(&mptcp_delegated_actions.bh_lock); return ret; } @@ -744,6 +758,7 @@ void mptcp_info2sockaddr(const struct mptcp_addr_info *info, struct sockaddr_storage *addr, unsigned short family); struct mptcp_sched_ops *mptcp_sched_find(const char *name); +int mptcp_validate_scheduler(struct mptcp_sched_ops *sched); int mptcp_register_scheduler(struct mptcp_sched_ops *sched); void mptcp_unregister_scheduler(struct mptcp_sched_ops *sched); void mptcp_sched_init(void); @@ -1208,17 +1223,6 @@ static inline bool mptcp_check_fallback(const struct sock *sk) return __mptcp_check_fallback(msk); } -static inline void __mptcp_do_fallback(struct mptcp_sock *msk) -{ - if (__mptcp_check_fallback(msk)) { - pr_debug("TCP fallback already done (msk=%p)\n", msk); - return; - } - if (WARN_ON_ONCE(!READ_ONCE(msk->allow_infinite_fallback))) - return; - set_bit(MPTCP_FALLBACK_DONE, &msk->flags); -} - static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk) { struct sock *ssk = READ_ONCE(msk->first); @@ -1228,14 +1232,17 @@ static inline bool __mptcp_has_initial_subflow(const struct mptcp_sock *msk) TCPF_SYN_RECV | TCPF_LISTEN)); } -static inline void mptcp_do_fallback(struct sock *ssk) +bool __mptcp_try_fallback(struct mptcp_sock *msk, int fb_mib); + +static inline bool mptcp_try_fallback(struct sock *ssk, int fb_mib) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); struct sock *sk = subflow->conn; struct mptcp_sock *msk; msk = mptcp_sk(sk); - __mptcp_do_fallback(msk); + if (!__mptcp_try_fallback(msk, fb_mib)) + return false; if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) { gfp_t saved_allocation = ssk->sk_allocation; @@ -1247,16 +1254,15 @@ static inline void mptcp_do_fallback(struct sock *ssk) tcp_shutdown(ssk, SEND_SHUTDOWN); ssk->sk_allocation = saved_allocation; } + return true; } -#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)\n", __func__, a) - -static inline void mptcp_subflow_early_fallback(struct mptcp_sock *msk, - struct mptcp_subflow_context *subflow) +static inline void mptcp_early_fallback(struct mptcp_sock *msk, + struct mptcp_subflow_context *subflow, + int fb_mib) { - pr_fallback(msk); subflow->request_mptcp = 0; - __mptcp_do_fallback(msk); + WARN_ON_ONCE(!__mptcp_try_fallback(msk, fb_mib)); } static inline bool mptcp_check_infinite_map(struct sk_buff *skb) diff --git a/net/mptcp/sched.c b/net/mptcp/sched.c index c16c6fbd4ba2..1e59072d478c 100644 --- a/net/mptcp/sched.c +++ b/net/mptcp/sched.c @@ -16,8 +16,7 @@ static DEFINE_SPINLOCK(mptcp_sched_list_lock); static LIST_HEAD(mptcp_sched_list); -static int mptcp_sched_default_get_send(struct mptcp_sock *msk, - struct mptcp_sched_data *data) +static int mptcp_sched_default_get_send(struct mptcp_sock *msk) { struct sock *ssk; @@ -29,8 +28,7 @@ static int mptcp_sched_default_get_send(struct mptcp_sock *msk, return 0; } -static int mptcp_sched_default_get_retrans(struct mptcp_sock *msk, - struct mptcp_sched_data *data) +static int mptcp_sched_default_get_retrans(struct mptcp_sock *msk) { struct sock *ssk; @@ -84,10 +82,23 @@ void mptcp_get_available_schedulers(char *buf, size_t maxlen) rcu_read_unlock(); } -int mptcp_register_scheduler(struct mptcp_sched_ops *sched) +int mptcp_validate_scheduler(struct mptcp_sched_ops *sched) { - if (!sched->get_send) + if (!sched->get_send) { + pr_err("%s does not implement required ops\n", sched->name); return -EINVAL; + } + + return 0; +} + +int mptcp_register_scheduler(struct mptcp_sched_ops *sched) +{ + int ret; + + ret = mptcp_validate_scheduler(sched); + if (ret) + return ret; spin_lock(&mptcp_sched_list_lock); if (mptcp_sched_find(sched->name)) { @@ -157,7 +168,6 @@ void mptcp_subflow_set_scheduled(struct mptcp_subflow_context *subflow, int mptcp_sched_get_send(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; - struct mptcp_sched_data *data = NULL; msk_owned_by_me(msk); @@ -178,14 +188,13 @@ int mptcp_sched_get_send(struct mptcp_sock *msk) } if (msk->sched == &mptcp_sched_default || !msk->sched) - return mptcp_sched_default_get_send(msk, data); - return msk->sched->get_send(msk, data); + return mptcp_sched_default_get_send(msk); + return msk->sched->get_send(msk); } int mptcp_sched_get_retrans(struct mptcp_sock *msk) { struct mptcp_subflow_context *subflow; - struct mptcp_sched_data *data = NULL; msk_owned_by_me(msk); @@ -199,8 +208,8 @@ int mptcp_sched_get_retrans(struct mptcp_sock *msk) } if (msk->sched == &mptcp_sched_default || !msk->sched) - return mptcp_sched_default_get_retrans(msk, data); + return mptcp_sched_default_get_retrans(msk); if (msk->sched->get_retrans) - return msk->sched->get_retrans(msk, data); - return msk->sched->get_send(msk, data); + return msk->sched->get_retrans(msk); + return msk->sched->get_send(msk); } diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 3caa0a9d3b38..2c267aff95be 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -798,6 +798,23 @@ unlock: return ret; } +static int mptcp_setsockopt_all_sf(struct mptcp_sock *msk, int level, + int optname, sockptr_t optval, + unsigned int optlen) +{ + struct mptcp_subflow_context *subflow; + int ret = 0; + + mptcp_for_each_subflow(msk, subflow) { + struct sock *ssk = mptcp_subflow_tcp_sock(subflow); + + ret = tcp_setsockopt(ssk, level, optname, optval, optlen); + if (ret) + break; + } + return ret; +} + static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { @@ -859,6 +876,11 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, &msk->keepalive_cnt, val); break; + case TCP_MAXSEG: + msk->maxseg = val; + ret = mptcp_setsockopt_all_sf(msk, SOL_TCP, optname, optval, + optlen); + break; default: ret = -ENOPROTOOPT; } @@ -914,10 +936,8 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int lock_sock(sk); ssk = msk->first; - if (ssk) { - ret = tcp_getsockopt(ssk, level, optname, optval, optlen); - goto out; - } + if (ssk) + goto get; ssk = __mptcp_nmpc_sk(msk); if (IS_ERR(ssk)) { @@ -925,6 +945,7 @@ static int mptcp_getsockopt_first_sf_only(struct mptcp_sock *msk, int level, int goto out; } +get: ret = tcp_getsockopt(ssk, level, optname, optval, optlen); out: @@ -1407,6 +1428,9 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return mptcp_put_int_option(msk, optval, optlen, msk->notsent_lowat); case TCP_IS_MPTCP: return mptcp_put_int_option(msk, optval, optlen, 1); + case TCP_MAXSEG: + return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, + optval, optlen); } return -EOPNOTSUPP; } @@ -1553,6 +1577,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) tcp_sock_set_keepidle_locked(ssk, msk->keepalive_idle); tcp_sock_set_keepintvl(ssk, msk->keepalive_intvl); tcp_sock_set_keepcnt(ssk, msk->keepalive_cnt); + tcp_sock_set_maxseg(ssk, msk->maxseg); inet_assign_bit(TRANSPARENT, ssk, inet_test_bit(TRANSPARENT, sk)); inet_assign_bit(FREEBIND, ssk, inet_test_bit(FREEBIND, sk)); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 24c2de1891bd..3f1b62a9fe88 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -247,6 +247,7 @@ again: if (unlikely(req->syncookie)) { if (!mptcp_can_accept_new_subflow(subflow_req->msk)) { + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINREJECTED); subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); return -EPERM; } @@ -543,10 +544,13 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) mptcp_get_options(skb, &mp_opt); if (subflow->request_mptcp) { if (!(mp_opt.suboptions & OPTION_MPTCP_MPC_SYNACK)) { - MPTCP_INC_STATS(sock_net(sk), - MPTCP_MIB_MPCAPABLEACTIVEFALLBACK); - mptcp_do_fallback(sk); - pr_fallback(msk); + if (!mptcp_try_fallback(sk, + MPTCP_MIB_MPCAPABLEACTIVEFALLBACK)) { + MPTCP_INC_STATS(sock_net(sk), + MPTCP_MIB_FALLBACKFAILED); + goto do_reset; + } + goto fallback; } @@ -745,15 +749,11 @@ struct request_sock *mptcp_subflow_reqsk_alloc(const struct request_sock_ops *op EXPORT_SYMBOL(mptcp_subflow_reqsk_alloc); /* validate hmac received in third ACK */ -static bool subflow_hmac_valid(const struct request_sock *req, +static bool subflow_hmac_valid(const struct mptcp_subflow_request_sock *subflow_req, const struct mptcp_options_received *mp_opt) { - const struct mptcp_subflow_request_sock *subflow_req; + struct mptcp_sock *msk = subflow_req->msk; u8 hmac[SHA256_DIGEST_SIZE]; - struct mptcp_sock *msk; - - subflow_req = mptcp_subflow_rsk(req); - msk = subflow_req->msk; subflow_generate_hmac(READ_ONCE(msk->remote_key), READ_ONCE(msk->local_key), @@ -899,13 +899,14 @@ create_child: goto dispose_child; } - if (!subflow_hmac_valid(req, &mp_opt)) { + if (!subflow_hmac_valid(subflow_req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); goto dispose_child; } if (!mptcp_can_accept_new_subflow(owner)) { + SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINREJECTED); subflow_add_reset_reason(skb, MPTCP_RST_EPROHIBIT); goto dispose_child; } @@ -1302,20 +1303,29 @@ static void subflow_sched_work_if_closed(struct mptcp_sock *msk, struct sock *ss mptcp_schedule_work(sk); } -static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) +static bool mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); unsigned long fail_tout; + /* we are really failing, prevent any later subflow join */ + spin_lock_bh(&msk->fallback_lock); + if (!msk->allow_infinite_fallback) { + spin_unlock_bh(&msk->fallback_lock); + return false; + } + msk->allow_subflows = false; + spin_unlock_bh(&msk->fallback_lock); + /* graceful failure can happen only on the MPC subflow */ if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first))) - return; + return false; /* since the close timeout take precedence on the fail one, * no need to start the latter when the first is already set */ if (sock_flag((struct sock *)msk, SOCK_DEAD)) - return; + return true; /* we don't need extreme accuracy here, use a zero fail_tout as special * value meaning no fail timeout at all; @@ -1327,6 +1337,7 @@ static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk) tcp_send_ack(ssk); mptcp_reset_tout_timer(msk, subflow->fail_tout); + return true; } static bool subflow_check_data_avail(struct sock *ssk) @@ -1387,17 +1398,16 @@ fallback: (subflow->mp_join || subflow->valid_csum_seen)) { subflow->send_mp_fail = 1; - if (!READ_ONCE(msk->allow_infinite_fallback)) { + if (!mptcp_subflow_fail(msk, ssk)) { subflow->reset_transient = 0; subflow->reset_reason = MPTCP_RST_EMIDDLEBOX; goto reset; } - mptcp_subflow_fail(msk, ssk); WRITE_ONCE(subflow->data_avail, true); return true; } - if (!READ_ONCE(msk->allow_infinite_fallback)) { + if (!mptcp_try_fallback(ssk, MPTCP_MIB_DSSFALLBACK)) { /* fatal protocol error, close the socket. * subflow_error_report() will introduce the appropriate barriers */ @@ -1415,8 +1425,6 @@ reset: WRITE_ONCE(subflow->data_avail, false); return false; } - - mptcp_do_fallback(ssk); } skb = skb_peek(&ssk->sk_receive_queue); @@ -1681,7 +1689,6 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_pm_local *local, /* discard the subflow socket */ mptcp_sock_graft(ssk, sk->sk_socket); iput(SOCK_INODE(sf)); - WRITE_ONCE(msk->allow_infinite_fallback, false); mptcp_stop_tout_timer(sk); return 0; @@ -1847,14 +1854,11 @@ static void subflow_state_change(struct sock *sk) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct sock *parent = subflow->conn; - struct mptcp_sock *msk; __subflow_state_change(sk); - msk = mptcp_sk(parent); if (subflow_simultaneous_connect(sk)) { - mptcp_do_fallback(sk); - pr_fallback(msk); + WARN_ON_ONCE(!mptcp_try_fallback(sk, MPTCP_MIB_SIMULTCONNFALLBACK)); subflow->conn_finished = 1; mptcp_propagate_state(parent, sk, subflow, NULL); } |