diff options
Diffstat (limited to 'net/mptcp/options.c')
| -rw-r--r-- | net/mptcp/options.c | 238 |
1 files changed, 158 insertions, 80 deletions
diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 5ded85e2c374..f24ae7d40e88 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -116,51 +116,46 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->suboptions |= OPTION_MPTCP_CSUMREQD; ptr += 2; } - pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u", + pr_debug("MP_CAPABLE version=%x, flags=%x, optlen=%d sndr=%llu, rcvr=%llu len=%d csum=%u\n", version, flags, opsize, mp_opt->sndr_key, mp_opt->rcvr_key, mp_opt->data_len, mp_opt->csum); break; case MPTCPOPT_MP_JOIN: - mp_opt->suboptions |= OPTIONS_MPTCP_MPJ; if (opsize == TCPOLEN_MPTCP_MPJ_SYN) { + mp_opt->suboptions |= OPTION_MPTCP_MPJ_SYN; mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP; mp_opt->join_id = *ptr++; mp_opt->token = get_unaligned_be32(ptr); ptr += 4; mp_opt->nonce = get_unaligned_be32(ptr); ptr += 4; - pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u", + pr_debug("MP_JOIN bkup=%u, id=%u, token=%u, nonce=%u\n", mp_opt->backup, mp_opt->join_id, mp_opt->token, mp_opt->nonce); } else if (opsize == TCPOLEN_MPTCP_MPJ_SYNACK) { + mp_opt->suboptions |= OPTION_MPTCP_MPJ_SYNACK; mp_opt->backup = *ptr++ & MPTCPOPT_BACKUP; mp_opt->join_id = *ptr++; mp_opt->thmac = get_unaligned_be64(ptr); ptr += 8; mp_opt->nonce = get_unaligned_be32(ptr); ptr += 4; - pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u", + pr_debug("MP_JOIN bkup=%u, id=%u, thmac=%llu, nonce=%u\n", mp_opt->backup, mp_opt->join_id, mp_opt->thmac, mp_opt->nonce); } else if (opsize == TCPOLEN_MPTCP_MPJ_ACK) { + mp_opt->suboptions |= OPTION_MPTCP_MPJ_ACK; ptr += 2; memcpy(mp_opt->hmac, ptr, MPTCPOPT_HMAC_LEN); - pr_debug("MP_JOIN hmac"); - } else { - mp_opt->suboptions &= ~OPTIONS_MPTCP_MPJ; + pr_debug("MP_JOIN hmac\n"); } break; case MPTCPOPT_DSS: - pr_debug("DSS"); + pr_debug("DSS\n"); ptr++; - /* we must clear 'mpc_map' be able to detect MP_CAPABLE - * map vs DSS map in mptcp_incoming_options(), and reconstruct - * map info accordingly - */ - mp_opt->mpc_map = 0; flags = (*ptr++) & MPTCP_DSS_FLAG_MASK; mp_opt->data_fin = (flags & MPTCP_DSS_DATA_FIN) != 0; mp_opt->dsn64 = (flags & MPTCP_DSS_DSN64) != 0; @@ -168,7 +163,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->ack64 = (flags & MPTCP_DSS_ACK64) != 0; mp_opt->use_ack = (flags & MPTCP_DSS_HAS_ACK); - pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d", + pr_debug("data_fin=%d dsn64=%d use_map=%d ack64=%d use_ack=%d\n", mp_opt->data_fin, mp_opt->dsn64, mp_opt->use_map, mp_opt->ack64, mp_opt->use_ack); @@ -206,7 +201,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 4; } - pr_debug("data_ack=%llu", mp_opt->data_ack); + pr_debug("data_ack=%llu\n", mp_opt->data_ack); } if (mp_opt->use_map) { @@ -230,7 +225,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 2; } - pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u", + pr_debug("data_seq=%llu subflow_seq=%u data_len=%u csum=%d:%u\n", mp_opt->data_seq, mp_opt->subflow_seq, mp_opt->data_len, !!(mp_opt->suboptions & OPTION_MPTCP_CSUMREQD), mp_opt->csum); @@ -292,7 +287,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->ahmac = get_unaligned_be64(ptr); ptr += 8; } - pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d", + pr_debug("ADD_ADDR%s: id=%d, ahmac=%llu, echo=%d, port=%d\n", (mp_opt->addr.family == AF_INET6) ? "6" : "", mp_opt->addr.id, mp_opt->ahmac, mp_opt->echo, ntohs(mp_opt->addr.port)); break; @@ -308,7 +303,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->rm_list.nr = opsize - TCPOLEN_MPTCP_RM_ADDR_BASE; for (i = 0; i < mp_opt->rm_list.nr; i++) mp_opt->rm_list.ids[i] = *ptr++; - pr_debug("RM_ADDR: rm_list_nr=%d", mp_opt->rm_list.nr); + pr_debug("RM_ADDR: rm_list_nr=%d\n", mp_opt->rm_list.nr); break; case MPTCPOPT_MP_PRIO: @@ -317,7 +312,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->suboptions |= OPTION_MPTCP_PRIO; mp_opt->backup = *ptr++ & MPTCP_PRIO_BKUP; - pr_debug("MP_PRIO: prio=%d", mp_opt->backup); + pr_debug("MP_PRIO: prio=%d\n", mp_opt->backup); break; case MPTCPOPT_MP_FASTCLOSE: @@ -328,7 +323,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, mp_opt->rcvr_key = get_unaligned_be64(ptr); ptr += 8; mp_opt->suboptions |= OPTION_MPTCP_FASTCLOSE; - pr_debug("MP_FASTCLOSE: recv_key=%llu", mp_opt->rcvr_key); + pr_debug("MP_FASTCLOSE: recv_key=%llu\n", mp_opt->rcvr_key); break; case MPTCPOPT_RST: @@ -342,7 +337,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, flags = *ptr++; mp_opt->reset_transient = flags & MPTCP_RST_TRANSIENT; mp_opt->reset_reason = *ptr; - pr_debug("MP_RST: transient=%u reason=%u", + pr_debug("MP_RST: transient=%u reason=%u\n", mp_opt->reset_transient, mp_opt->reset_reason); break; @@ -353,7 +348,7 @@ static void mptcp_parse_option(const struct sk_buff *skb, ptr += 2; mp_opt->suboptions |= OPTION_MPTCP_FAIL; mp_opt->fail_seq = get_unaligned_be64(ptr); - pr_debug("MP_FAIL: data_seq=%llu", mp_opt->fail_seq); + pr_debug("MP_FAIL: data_seq=%llu\n", mp_opt->fail_seq); break; default: @@ -368,8 +363,11 @@ void mptcp_get_options(const struct sk_buff *skb, const unsigned char *ptr; int length; - /* initialize option status */ - mp_opt->suboptions = 0; + /* Ensure that casting the whole status to u32 is efficient and safe */ + BUILD_BUG_ON(sizeof_field(struct mptcp_options_received, status) != sizeof(u32)); + BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct mptcp_options_received, status), + sizeof(u32))); + *(u32 *)&mp_opt->status = 0; length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); @@ -416,7 +414,7 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, *size = TCPOLEN_MPTCP_MPC_SYN; return true; } else if (subflow->request_join) { - pr_debug("remote_token=%u, nonce=%u", subflow->remote_token, + pr_debug("remote_token=%u, nonce=%u\n", subflow->remote_token, subflow->local_nonce); opts->suboptions = OPTION_MPTCP_MPJ_SYN; opts->join_id = subflow->local_id; @@ -434,7 +432,6 @@ static void clear_3rdack_retransmission(struct sock *sk) struct inet_connection_sock *icsk = inet_csk(sk); sk_stop_timer(sk, &icsk->icsk_delack_timer); - icsk->icsk_ack.timeout = 0; icsk->icsk_ack.ato = 0; icsk->icsk_ack.pending &= ~(ICSK_ACK_SCHED | ICSK_ACK_TIMER); } @@ -442,7 +439,6 @@ static void clear_3rdack_retransmission(struct sock *sk) static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, bool snd_data_fin_enable, unsigned int *size, - unsigned int remaining, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); @@ -461,7 +457,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, return false; /* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */ - if (subflow->fully_established || snd_data_fin_enable || + if (READ_ONCE(subflow->fully_established) || snd_data_fin_enable || subflow->snd_isn != TCP_SKB_CB(skb)->seq || sk->sk_state != TCP_ESTABLISHED) return false; @@ -500,7 +496,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, *size = TCPOLEN_MPTCP_MPC_ACK; } - pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d", + pr_debug("subflow=%p, local_key=%llu, remote_key=%llu map_len=%d\n", subflow, subflow->local_key, subflow->remote_key, data_len); @@ -509,7 +505,7 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb, opts->suboptions = OPTION_MPTCP_MPJ_ACK; memcpy(opts->hmac, subflow->hmac, MPTCPOPT_HMAC_LEN); *size = TCPOLEN_MPTCP_MPJ_ACK; - pr_debug("subflow=%p", subflow); + pr_debug("subflow=%p\n", subflow); /* we can use the full delegate action helper only from BH context * If we are in process context - sk is flushing the backlog at @@ -556,7 +552,6 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow, static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, bool snd_data_fin_enable, unsigned int *size, - unsigned int remaining, struct mptcp_out_options *opts) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); @@ -580,7 +575,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, opts->ext_copy = *mpext; } - remaining -= map_size; dss_size = map_size; if (skb && snd_data_fin_enable) mptcp_write_data_fin(subflow, skb, &opts->ext_copy); @@ -609,7 +603,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, } opts->ext_copy.use_ack = 1; opts->suboptions = OPTION_MPTCP_DSS; - WRITE_ONCE(msk->old_wspace, __mptcp_space((struct sock *)msk)); /* Add kind/length/subtype/flag overhead if mapping is not populated */ if (dss_size == 0) @@ -657,6 +650,7 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * struct mptcp_sock *msk = mptcp_sk(subflow->conn); bool drop_other_suboptions = false; unsigned int opt_size = *size; + struct mptcp_addr_info addr; bool echo; int len; @@ -665,19 +659,26 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * */ if (!mptcp_pm_should_add_signal(msk) || (opts->suboptions & (OPTION_MPTCP_MPJ_ACK | OPTION_MPTCP_MPC_ACK)) || - !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &opts->addr, + !mptcp_pm_add_addr_signal(msk, skb, opt_size, remaining, &addr, &echo, &drop_other_suboptions)) return false; + /* + * Later on, mptcp_write_options() will enforce mutually exclusion with + * DSS, bail out if such option is set and we can't drop it. + */ if (drop_other_suboptions) remaining += opt_size; - len = mptcp_add_addr_len(opts->addr.family, echo, !!opts->addr.port); + else if (opts->suboptions & OPTION_MPTCP_DSS) + return false; + + len = mptcp_add_addr_len(addr.family, echo, !!addr.port); if (remaining < len) return false; *size = len; if (drop_other_suboptions) { - pr_debug("drop other suboptions"); + pr_debug("drop other suboptions\n"); opts->suboptions = 0; /* note that e.g. DSS could have written into the memory @@ -688,13 +689,17 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * opts->ahmac = 0; *size -= opt_size; } + opts->addr = addr; opts->suboptions |= OPTION_MPTCP_ADD_ADDR; if (!echo) { - opts->ahmac = add_addr_generate_hmac(msk->local_key, - msk->remote_key, + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ADDADDRTX); + opts->ahmac = add_addr_generate_hmac(READ_ONCE(msk->local_key), + READ_ONCE(msk->remote_key), &opts->addr); + } else { + MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_ECHOADDTX); } - pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d", + pr_debug("addr_id=%d, ahmac=%llu, echo=%d, port=%d\n", opts->addr.id, opts->ahmac, echo, ntohs(opts->addr.port)); return true; @@ -725,8 +730,8 @@ static bool mptcp_established_options_rm_addr(struct sock *sk, opts->rm_list = rm_list; for (i = 0; i < opts->rm_list.nr; i++) - pr_debug("rm_list_ids[%d]=%d", i, opts->rm_list.ids[i]); - + pr_debug("rm_list_ids[%d]=%d\n", i, opts->rm_list.ids[i]); + MPTCP_ADD_STATS(sock_net(sk), MPTCP_MIB_RMADDRTX, opts->rm_list.nr); return true; } @@ -751,7 +756,7 @@ static bool mptcp_established_options_mp_prio(struct sock *sk, opts->suboptions |= OPTION_MPTCP_PRIO; opts->backup = subflow->request_bkup; - pr_debug("prio=%d", opts->backup); + pr_debug("prio=%d\n", opts->backup); return true; } @@ -791,9 +796,9 @@ static bool mptcp_established_options_fastclose(struct sock *sk, *size = TCPOLEN_MPTCP_FASTCLOSE; opts->suboptions |= OPTION_MPTCP_FASTCLOSE; - opts->rcvr_key = msk->remote_key; + opts->rcvr_key = READ_ONCE(msk->remote_key); - pr_debug("FASTCLOSE key=%llu", opts->rcvr_key); + pr_debug("FASTCLOSE key=%llu\n", opts->rcvr_key); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX); return true; } @@ -815,7 +820,7 @@ static bool mptcp_established_options_mp_fail(struct sock *sk, opts->suboptions |= OPTION_MPTCP_FAIL; opts->fail_seq = subflow->map_seq; - pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq); + pr_debug("MP_FAIL fail_seq=%llu\n", opts->fail_seq); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX); return true; @@ -833,8 +838,11 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, opts->suboptions = 0; + /* Force later mptcp_write_options(), but do not use any actual + * option space. + */ if (unlikely(__mptcp_check_fallback(msk) && !mptcp_check_infinite_map(skb))) - return false; + return true; if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST)) { if (mptcp_established_options_fastclose(sk, &opt_size, remaining, opts) || @@ -851,9 +859,9 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb, } snd_data_fin = mptcp_data_fin_enabled(msk); - if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts)) + if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, opts)) ret = true; - else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts)) { + else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, opts)) { unsigned int mp_fail_size; ret = true; @@ -903,16 +911,16 @@ bool mptcp_synack_options(const struct request_sock *req, unsigned int *size, opts->csum_reqd = subflow_req->csum_reqd; opts->allow_join_id0 = subflow_req->allow_join_id0; *size = TCPOLEN_MPTCP_MPC_SYNACK; - pr_debug("subflow_req=%p, local_key=%llu", + pr_debug("subflow_req=%p, local_key=%llu\n", subflow_req, subflow_req->local_key); return true; } else if (subflow_req->mp_join) { opts->suboptions = OPTION_MPTCP_MPJ_SYNACK; - opts->backup = subflow_req->backup; + opts->backup = subflow_req->request_bkup; opts->join_id = subflow_req->local_id; opts->thmac = subflow_req->thmac; opts->nonce = subflow_req->local_nonce; - pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u", + pr_debug("req=%p, bkup=%u, id=%u, thmac=%llu, nonce=%u\n", subflow_req, opts->backup, opts->join_id, opts->thmac, opts->nonce); *size = TCPOLEN_MPTCP_MPJ_SYNACK; @@ -929,7 +937,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, /* here we can process OoO, in-window pkts, only in-sequence 4th ack * will make the subflow fully established */ - if (likely(subflow->fully_established)) { + if (likely(READ_ONCE(subflow->fully_established))) { /* on passive sockets, check for 3rd ack retransmission * note that msk is always set by subflow_syn_recv_sock() * for mp_join subflows @@ -957,13 +965,12 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (subflow->remote_key_valid && (((mp_opt->suboptions & OPTION_MPTCP_DSS) && mp_opt->use_ack) || - ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && !mp_opt->echo))) { + ((mp_opt->suboptions & OPTION_MPTCP_ADD_ADDR) && + (!mp_opt->echo || subflow->mp_join)))) { /* subflows are fully established as soon as we get any * additional ack, including ADD_ADDR. */ - subflow->fully_established = 1; - WRITE_ONCE(msk->fully_established, true); - goto check_notify; + goto set_fully_established; } /* If the first established packet does not contain MP_CAPABLE + data @@ -974,18 +981,23 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *ssk, if (subflow->mp_join) goto reset; subflow->mp_capable = 0; - pr_fallback(msk); - mptcp_do_fallback(ssk); + if (!mptcp_try_fallback(ssk, MPTCP_MIB_MPCAPABLEDATAFALLBACK)) { + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_FALLBACKFAILED); + goto reset; + } return false; } + if (unlikely(!READ_ONCE(msk->pm.server_side))) + pr_warn_once("bogus mpc option on established client sk"); + +set_fully_established: if (mp_opt->deny_join_id0) WRITE_ONCE(msk->pm.remote_deny_join_id0, true); -set_fully_established: - if (unlikely(!READ_ONCE(msk->pm.server_side))) - pr_warn_once("bogus mpc option on established client sk"); - mptcp_subflow_fully_established(subflow, mp_opt); + mptcp_data_lock((struct sock *)msk); + __mptcp_subflow_fully_established(msk, subflow, mp_opt); + mptcp_data_unlock((struct sock *)msk); check_notify: /* if the subflow is not already linked into the conn_list, we can't @@ -1001,7 +1013,7 @@ check_notify: clear_3rdack_retransmission(ssk); mptcp_pm_subflow_established(msk); } else { - mptcp_pm_fully_established(msk, ssk, GFP_ATOMIC); + mptcp_pm_fully_established(msk, ssk); } return true; @@ -1026,6 +1038,37 @@ u64 __mptcp_expand_seq(u64 old_seq, u64 cur_seq) return cur_seq; } +static void __mptcp_snd_una_update(struct mptcp_sock *msk, u64 new_snd_una) +{ + msk->bytes_acked += new_snd_una - msk->snd_una; + WRITE_ONCE(msk->snd_una, new_snd_una); +} + +static void rwin_update(struct mptcp_sock *msk, struct sock *ssk, + struct sk_buff *skb) +{ + struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk); + struct tcp_sock *tp = tcp_sk(ssk); + u64 mptcp_rcv_wnd; + + /* Avoid touching extra cachelines if TCP is going to accept this + * skb without filling the TCP-level window even with a possibly + * outdated mptcp-level rwin. + */ + if (!skb->len || skb->len < tcp_receive_window(tp)) + return; + + mptcp_rcv_wnd = atomic64_read(&msk->rcv_wnd_sent); + if (!after64(mptcp_rcv_wnd, subflow->rcv_wnd_sent)) + return; + + /* Some other subflow grew the mptcp-level rwin since rcv_wup, + * resync. + */ + tp->rcv_wnd += mptcp_rcv_wnd - subflow->rcv_wnd_sent; + subflow->rcv_wnd_sent = mptcp_rcv_wnd; +} + static void ack_update_msk(struct mptcp_sock *msk, struct sock *ssk, struct mptcp_options_received *mp_opt) @@ -1050,21 +1093,22 @@ static void ack_update_msk(struct mptcp_sock *msk, new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd; if (after64(new_wnd_end, msk->wnd_end)) - msk->wnd_end = new_wnd_end; + WRITE_ONCE(msk->wnd_end, new_wnd_end); /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ - if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt))) + if (after64(msk->wnd_end, snd_nxt)) __mptcp_check_push(sk, ssk); if (after64(new_snd_una, old_snd_una)) { - msk->snd_una = new_snd_una; + __mptcp_snd_una_update(msk, new_snd_una); __mptcp_data_acked(sk); } + msk->last_ack_recv = tcp_jiffies32; mptcp_data_unlock(sk); trace_ack_update_msk(mp_opt->data_ack, old_snd_una, new_snd_una, - new_wnd_end, msk->wnd_end); + new_wnd_end, READ_ONCE(msk->wnd_end)); } bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit) @@ -1092,8 +1136,8 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk, if (mp_opt->echo) return true; - hmac = add_addr_generate_hmac(msk->remote_key, - msk->local_key, + hmac = add_addr_generate_hmac(READ_ONCE(msk->remote_key), + READ_ONCE(msk->local_key), &mp_opt->addr); pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n", @@ -1102,7 +1146,9 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk, return hmac == mp_opt->ahmac; } -/* Return false if a subflow has been reset, else return true */ +/* Return false in case of error (or subflow has been reset), + * else return true. + */ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); @@ -1119,6 +1165,12 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) mptcp_data_lock(subflow->conn); if (sk_stream_memory_free(sk)) __mptcp_check_push(subflow->conn, sk); + + /* on fallback we just need to ignore the msk-level snd_una, as + * this is really plain TCP + */ + __mptcp_snd_una_update(msk, READ_ONCE(msk->snd_nxt)); + __mptcp_data_acked(subflow->conn); mptcp_data_unlock(subflow->conn); return true; @@ -1134,7 +1186,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) if (unlikely(mp_opt.suboptions != OPTION_MPTCP_DSS)) { if ((mp_opt.suboptions & OPTION_MPTCP_FASTCLOSE) && - msk->local_key == mp_opt.rcvr_key) { + READ_ONCE(msk->local_key) == mp_opt.rcvr_key) { WRITE_ONCE(msk->rcv_fastclose, true); mptcp_schedule_work((struct sock *)msk); MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSERX); @@ -1184,6 +1236,7 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) */ if (mp_opt.use_ack) ack_update_msk(msk, sk, &mp_opt); + rwin_update(msk, sk, skb); /* Zero-data-length packets are dropped by the caller and not * propagated to the MPTCP layer, so the skb extension does not @@ -1192,16 +1245,15 @@ bool mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) */ if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { if (mp_opt.data_fin && mp_opt.data_len == 1 && - mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64) && - schedule_work(&msk->work)) - sock_hold(subflow->conn); + mptcp_update_rcv_data_fin(msk, mp_opt.data_seq, mp_opt.dsn64)) + mptcp_schedule_work((struct sock *)msk); return true; } mpext = skb_ext_add(skb, SKB_EXT_MPTCP); if (!mpext) - return true; + return false; memset(mpext, 0, sizeof(*mpext)); @@ -1258,18 +1310,23 @@ static void mptcp_set_rwin(struct tcp_sock *tp, struct tcphdr *th) if (rcv_wnd == rcv_wnd_old) break; - if (before64(rcv_wnd_new, rcv_wnd)) { + + rcv_wnd_old = rcv_wnd; + if (before64(rcv_wnd_new, rcv_wnd_old)) { MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICTUPDATE); goto raise_win; } MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDCONFLICT); - rcv_wnd_old = rcv_wnd; } - return; + goto update_wspace; } if (rcv_wnd_new != rcv_wnd_old) { raise_win: + /* The msk-level rcv wnd is after the tcp level one, + * sync the latter. + */ + rcv_wnd_new = rcv_wnd_old; win = rcv_wnd_old - ack_seq; tp->rcv_wnd = min_t(u64, win, U32_MAX); new_win = tp->rcv_wnd; @@ -1290,6 +1347,24 @@ raise_win: th->window = htons(new_win); MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_RCVWNDSHARED); } + +update_wspace: + WRITE_ONCE(msk->old_wspace, tp->rcv_wnd); + subflow->rcv_wnd_sent = rcv_wnd_new; +} + +static void mptcp_track_rwin(struct tcp_sock *tp) +{ + const struct sock *ssk = (const struct sock *)tp; + struct mptcp_subflow_context *subflow; + struct mptcp_sock *msk; + + if (!ssk) + return; + + subflow = mptcp_subflow_ctx(ssk); + msk = mptcp_sk(subflow->conn); + WRITE_ONCE(msk->old_wspace, tp->rcv_wnd); } __sum16 __mptcp_make_csum(u64 data_seq, u32 subflow_seq, u16 data_len, __wsum sum) @@ -1584,6 +1659,10 @@ mp_rst: opts->reset_transient, opts->reset_reason); return; + } else if (unlikely(!opts->suboptions)) { + /* Fallback to TCP */ + mptcp_track_rwin(tp); + return; } if (OPTION_MPTCP_PRIO & opts->suboptions) { @@ -1594,8 +1673,7 @@ mp_rst: TCPOLEN_MPTCP_PRIO, opts->backup, TCPOPT_NOP); - MPTCP_INC_STATS(sock_net((const struct sock *)tp), - MPTCP_MIB_MPPRIOTX); + MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPPRIOTX); } mp_capable_done: |
