From df1036da90108b1a9969721beab34f4c76228bcc Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 17 Apr 2020 09:28:22 +0200 Subject: mptcp: fix splat when incoming connection is never accepted before exit/close Following snippet (replicated from syzkaller reproducer) generates warning: "IPv4: Attempt to release TCP socket in state 1". int main(void) { struct sockaddr_in sin1 = { .sin_family = 2, .sin_port = 0x4e20, .sin_addr.s_addr = 0x010000e0, }; struct sockaddr_in sin2 = { .sin_family = 2, .sin_addr.s_addr = 0x0100007f, }; struct sockaddr_in sin3 = { .sin_family = 2, .sin_port = 0x4e20, .sin_addr.s_addr = 0x0100007f, }; int r0 = socket(0x2, 0x1, 0x106); int r1 = socket(0x2, 0x1, 0x106); bind(r1, (void *)&sin1, sizeof(sin1)); connect(r1, (void *)&sin2, sizeof(sin2)); listen(r1, 3); return connect(r0, (void *)&sin3, 0x4d); } Reason is that the newly generated mptcp socket is closed via the ulp release of the tcp listener socket when its accept backlog gets purged. To fix this, delay setting the ESTABLISHED state until after userspace calls accept and via mptcp specific destructor. Fixes: 58b09919626bf ("mptcp: create msk early") Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/9 Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 1 + net/mptcp/subflow.c | 25 ++++++++++++++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) (limited to 'net/mptcp') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 9936e33ac351..1c8b021b4537 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1431,6 +1431,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err, newsk = new_mptcp_sock; mptcp_copy_inaddrs(newsk, ssk); list_add(&subflow->node, &msk->conn_list); + inet_sk_state_store(newsk, TCP_ESTABLISHED); bh_unlock_sock(new_mptcp_sock); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 50a8bea987c6..57a836fe4988 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -347,6 +347,29 @@ static bool subflow_hmac_valid(const struct request_sock *req, return ret; } +static void mptcp_sock_destruct(struct sock *sk) +{ + /* if new mptcp socket isn't accepted, it is free'd + * from the tcp listener sockets request queue, linked + * from req->sk. The tcp socket is released. + * This calls the ULP release function which will + * also remove the mptcp socket, via + * sock_put(ctx->conn). + * + * Problem is that the mptcp socket will not be in + * SYN_RECV state and doesn't have SOCK_DEAD flag. + * Both result in warnings from inet_sock_destruct. + */ + + if (sk->sk_state == TCP_SYN_RECV) { + sk->sk_state = TCP_CLOSE; + WARN_ON_ONCE(sk->sk_socket); + sock_orphan(sk); + } + + inet_sock_destruct(sk); +} + static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -422,7 +445,7 @@ create_child: /* new mpc subflow takes ownership of the newly * created mptcp socket */ - inet_sk_state_store(new_msk, TCP_ESTABLISHED); + new_msk->sk_destruct = mptcp_sock_destruct; mptcp_pm_new_connection(mptcp_sk(new_msk), 1); ctx->conn = new_msk; new_msk = NULL; -- cgit From 9f5ca6a59816b406230adc440b6bb684fda90abe Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 17 Apr 2020 09:28:23 +0200 Subject: mptcp: fix 'Attempt to release TCP socket in state' warnings We need to set sk_state to CLOSED, else we will get following: IPv4: Attempt to release TCP socket in state 3 00000000b95f109e IPv4: Attempt to release TCP socket in state 10 00000000b95f109e First one is from inet_sock_destruct(), second one from mptcp_sk_clone failure handling. Setting sk_state to CLOSED isn't enough, we also need to orphan sk so it has DEAD flag set. Otherwise, a very similar warning is printed from inet_sock_destruct(). Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 7 +++++-- net/mptcp/subflow.c | 8 +++++++- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'net/mptcp') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 1c8b021b4537..7e816c733ccb 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1355,12 +1355,15 @@ struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req) msk->subflow = NULL; if (unlikely(mptcp_token_new_accept(subflow_req->token, nsk))) { + nsk->sk_state = TCP_CLOSE; bh_unlock_sock(nsk); /* we can't call into mptcp_close() here - possible BH context - * free the sock directly + * free the sock directly. + * sk_clone_lock() sets nsk refcnt to two, hence call sk_free() + * too. */ - nsk->sk_prot->destroy(nsk); + sk_common_release(nsk); sk_free(nsk); return NULL; } diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 57a836fe4988..bc46b5091b9d 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -370,6 +370,12 @@ static void mptcp_sock_destruct(struct sock *sk) inet_sock_destruct(sk); } +static void mptcp_force_close(struct sock *sk) +{ + inet_sk_state_store(sk, TCP_CLOSE); + sk_common_release(sk); +} + static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -467,7 +473,7 @@ create_child: out: /* dispose of the left over mptcp master, if any */ if (unlikely(new_msk)) - sock_put(new_msk); + mptcp_force_close(new_msk); return child; close_child: -- cgit From 5e20087d1b678965ae9df01eed03efedc1aef9f8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 20 Apr 2020 16:25:04 +0200 Subject: mptcp: handle mptcp listener destruction via rcu Following splat can occur during self test: BUG: KASAN: use-after-free in subflow_data_ready+0x156/0x160 Read of size 8 at addr ffff888100c35c28 by task mptcp_connect/4808 subflow_data_ready+0x156/0x160 tcp_child_process+0x6a3/0xb30 tcp_v4_rcv+0x2231/0x3730 ip_protocol_deliver_rcu+0x5c/0x860 ip_local_deliver_finish+0x220/0x360 ip_local_deliver+0x1c8/0x4e0 ip_rcv_finish+0x1da/0x2f0 ip_rcv+0xd0/0x3c0 __netif_receive_skb_one_core+0xf5/0x160 __netif_receive_skb+0x27/0x1c0 process_backlog+0x21e/0x780 net_rx_action+0x35f/0xe90 do_softirq+0x4c/0x50 [..] This occurs when accessing subflow_ctx->conn. Problem is that tcp_child_process() calls listen sockets' sk_data_ready() notification, but it doesn't hold the listener lock. Another cpu calling close() on the listener will then cause transition of refcount to 0. Fixes: 58b09919626bf ("mptcp: create msk early") Signed-off-by: Florian Westphal Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/mptcp') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 7e816c733ccb..b57974a6b6cc 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1378,6 +1378,7 @@ struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req) msk->ack_seq = ack_seq; } + sock_reset_flag(nsk, SOCK_RCU_FREE); /* will be fully established after successful MPC subflow creation */ inet_sk_state_store(nsk, TCP_SYN_RECV); bh_unlock_sock(nsk); @@ -1779,6 +1780,8 @@ static int mptcp_listen(struct socket *sock, int backlog) goto unlock; } + sock_set_flag(sock->sk, SOCK_RCU_FREE); + err = ssock->ops->listen(ssock, backlog); inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk)); if (!err) -- cgit From 4c8941de781cf71388d1490c6b85a02d1cec1ef4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 20 Apr 2020 16:25:05 +0200 Subject: mptcp: avoid flipping mp_capable field in syn_recv_sock() If multiple CPUs races on the same req_sock in syn_recv_sock(), flipping such field can cause inconsistent child socket status. When racing, the CPU losing the req ownership may still change the mptcp request socket mp_capable flag while the CPU owning the request is cloning the socket, leaving the child socket with 'is_mptcp' set but no 'mp_capable' flag. Such socket will stay with 'conn' field cleared, heading to oops in later mptcp callback. Address the issue tracking the fallback status in a local variable. Fixes: 58b09919626b ("mptcp: create msk early") Co-developed-by: Florian Westphal Signed-off-by: Florian Westphal Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/subflow.c | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) (limited to 'net/mptcp') diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index bc46b5091b9d..4fa190368507 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -376,6 +376,17 @@ static void mptcp_force_close(struct sock *sk) sk_common_release(sk); } +static void subflow_ulp_fallback(struct sock *sk, + struct mptcp_subflow_context *old_ctx) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + + mptcp_subflow_tcp_fallback(sk, old_ctx); + icsk->icsk_ulp_ops = NULL; + rcu_assign_pointer(icsk->icsk_ulp_data, NULL); + tcp_sk(sk)->is_mptcp = 0; +} + static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, struct request_sock *req, @@ -388,6 +399,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, struct tcp_options_received opt_rx; bool fallback_is_fatal = false; struct sock *new_msk = NULL; + bool fallback = false; struct sock *child; pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); @@ -412,14 +424,14 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, subflow_req->remote_key = opt_rx.mptcp.sndr_key; subflow_req->remote_key_valid = 1; } else { - subflow_req->mp_capable = 0; + fallback = true; goto create_child; } create_msk: new_msk = mptcp_sk_clone(listener->conn, req); if (!new_msk) - subflow_req->mp_capable = 0; + fallback = true; } else if (subflow_req->mp_join) { fallback_is_fatal = true; opt_rx.mptcp.mp_join = 0; @@ -438,12 +450,18 @@ create_child: if (child && *own_req) { struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child); - /* we have null ctx on TCP fallback, which is fatal on - * MPJ handshake + /* we need to fallback on ctx allocation failure and on pre-reqs + * checking above. In the latter scenario we additionally need + * to reset the context to non MPTCP status. */ - if (!ctx) { + if (!ctx || fallback) { if (fallback_is_fatal) goto close_child; + + if (ctx) { + subflow_ulp_fallback(child, ctx); + kfree_rcu(ctx, rcu); + } goto out; } @@ -474,6 +492,13 @@ out: /* dispose of the left over mptcp master, if any */ if (unlikely(new_msk)) mptcp_force_close(new_msk); + + /* check for expected invariant - should never trigger, just help + * catching eariler subtle bugs + */ + WARN_ON_ONCE(*own_req && child && tcp_sk(child)->is_mptcp && + (!mptcp_subflow_ctx(child) || + !mptcp_subflow_ctx(child)->conn)); return child; close_child: @@ -1076,17 +1101,6 @@ static void subflow_ulp_release(struct sock *sk) kfree_rcu(ctx, rcu); } -static void subflow_ulp_fallback(struct sock *sk, - struct mptcp_subflow_context *old_ctx) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - - mptcp_subflow_tcp_fallback(sk, old_ctx); - icsk->icsk_ulp_ops = NULL; - rcu_assign_pointer(icsk->icsk_ulp_data, NULL); - tcp_sk(sk)->is_mptcp = 0; -} - static void subflow_ulp_clone(const struct request_sock *req, struct sock *newsk, const gfp_t priority) -- cgit From fca5c82c086ea3871b103618b80558c479c8e597 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 20 Apr 2020 16:25:06 +0200 Subject: mptcp: drop req socket remote_key* fields We don't need them, as we can use the current ingress opt data instead. Setting them in syn_recv_sock() may causes inconsistent mptcp socket status, as per previous commit. Fixes: cc7972ea1932 ("mptcp: parse and emit MP_CAPABLE option according to v1 spec") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/protocol.c | 8 +++++--- net/mptcp/protocol.h | 8 ++++---- net/mptcp/subflow.c | 20 ++++++++++---------- 3 files changed, 19 insertions(+), 17 deletions(-) (limited to 'net/mptcp') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b57974a6b6cc..b22a63ba2348 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1332,7 +1332,9 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) } #endif -struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req) +struct sock *mptcp_sk_clone(const struct sock *sk, + const struct tcp_options_received *opt_rx, + struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC); @@ -1370,9 +1372,9 @@ struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req) msk->write_seq = subflow_req->idsn + 1; atomic64_set(&msk->snd_una, msk->write_seq); - if (subflow_req->remote_key_valid) { + if (opt_rx->mptcp.mp_capable) { msk->can_ack = true; - msk->remote_key = subflow_req->remote_key; + msk->remote_key = opt_rx->mptcp.sndr_key; mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); ack_seq++; msk->ack_seq = ack_seq; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 67448002a2d7..a2b3048037d0 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -206,12 +206,10 @@ struct mptcp_subflow_request_sock { struct tcp_request_sock sk; u16 mp_capable : 1, mp_join : 1, - backup : 1, - remote_key_valid : 1; + backup : 1; u8 local_id; u8 remote_id; u64 local_key; - u64 remote_key; u64 idsn; u32 token; u32 ssn_offset; @@ -332,7 +330,9 @@ void mptcp_proto_init(void); int mptcp_proto_v6_init(void); #endif -struct sock *mptcp_sk_clone(const struct sock *sk, struct request_sock *req); +struct sock *mptcp_sk_clone(const struct sock *sk, + const struct tcp_options_received *opt_rx, + struct request_sock *req); void mptcp_get_options(const struct sk_buff *skb, struct tcp_options_received *opt_rx); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index 4fa190368507..fabd06f2ff45 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -133,7 +133,6 @@ static void subflow_init_req(struct request_sock *req, subflow_req->mp_capable = 0; subflow_req->mp_join = 0; - subflow_req->remote_key_valid = 0; #ifdef CONFIG_TCP_MD5SIG /* no MPTCP if MD5SIG is enabled on this socket or we may run out of @@ -404,6 +403,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); + opt_rx.mptcp.mp_capable = 0; if (tcp_rsk(req)->is_mptcp == 0) goto create_child; @@ -418,18 +418,14 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, goto create_msk; } - opt_rx.mptcp.mp_capable = 0; mptcp_get_options(skb, &opt_rx); - if (opt_rx.mptcp.mp_capable) { - subflow_req->remote_key = opt_rx.mptcp.sndr_key; - subflow_req->remote_key_valid = 1; - } else { + if (!opt_rx.mptcp.mp_capable) { fallback = true; goto create_child; } create_msk: - new_msk = mptcp_sk_clone(listener->conn, req); + new_msk = mptcp_sk_clone(listener->conn, &opt_rx, req); if (!new_msk) fallback = true; } else if (subflow_req->mp_join) { @@ -473,6 +469,13 @@ create_child: mptcp_pm_new_connection(mptcp_sk(new_msk), 1); ctx->conn = new_msk; new_msk = NULL; + + /* with OoO packets we can reach here without ingress + * mpc option + */ + ctx->remote_key = opt_rx.mptcp.sndr_key; + ctx->fully_established = opt_rx.mptcp.mp_capable; + ctx->can_ack = opt_rx.mptcp.mp_capable; } else if (ctx->mp_join) { struct mptcp_sock *owner; @@ -1134,9 +1137,6 @@ static void subflow_ulp_clone(const struct request_sock *req, * is fully established only after we receive the remote key */ new_ctx->mp_capable = 1; - new_ctx->fully_established = subflow_req->remote_key_valid; - new_ctx->can_ack = subflow_req->remote_key_valid; - new_ctx->remote_key = subflow_req->remote_key; new_ctx->local_key = subflow_req->local_key; new_ctx->token = subflow_req->token; new_ctx->ssn_offset = subflow_req->ssn_offset; -- cgit From 9a19371bf029d784aa37ee623ce175205f43ccfd Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 22 Apr 2020 18:24:56 +0200 Subject: mptcp: fix data_fin handing in RX path The data fin flag is set only via a DSS option, but mptcp_incoming_options() copies it unconditionally from the provided RX options. Since we do not clear all the mptcp sock RX options in a socket free/alloc cycle, we can end-up with a stray data_fin value while parsing e.g. MPC packets. That would lead to mapping data corruption and will trigger a few WARN_ON() in the RX path. Instead of adding a costly memset(), fetch the data_fin flag only for DSS packets - when we always explicitly initialize such bit at option parsing time. Fixes: 648ef4b88673 ("mptcp: Implement MPTCP receive path") Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/options.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'net/mptcp') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index faf57585b892..4a7c467b99db 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -876,12 +876,11 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, mpext->data_seq = mp_opt->data_seq; mpext->subflow_seq = mp_opt->subflow_seq; mpext->dsn64 = mp_opt->dsn64; + mpext->data_fin = mp_opt->data_fin; } mpext->data_len = mp_opt->data_len; mpext->use_map = 1; } - - mpext->data_fin = mp_opt->data_fin; } void mptcp_write_options(__be32 *ptr, struct mptcp_out_options *opts) -- cgit From b4e0f9a926ec557cc0b91216957afd1b711bd45f Mon Sep 17 00:00:00 2001 From: Bo YU Date: Thu, 23 Apr 2020 10:10:03 +0800 Subject: mptcp/pm_netlink.c : add check for nla_put_in/6_addr Normal there should be checked for nla_put_in6_addr like other usage in net. Detected by CoverityScan, CID# 1461639 Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") Signed-off-by: Bo YU Acked-by: Paolo Abeni Signed-off-by: David S. Miller --- net/mptcp/pm_netlink.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net/mptcp') diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index 86d61ab34c7c..b78edf237ba0 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -599,12 +599,14 @@ static int mptcp_nl_fill_addr(struct sk_buff *skb, nla_put_s32(skb, MPTCP_PM_ADDR_ATTR_IF_IDX, entry->ifindex)) goto nla_put_failure; - if (addr->family == AF_INET) - nla_put_in_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR4, - addr->addr.s_addr); + if (addr->family == AF_INET && + nla_put_in_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR4, + addr->addr.s_addr)) + goto nla_put_failure; #if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (addr->family == AF_INET6) - nla_put_in6_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR6, &addr->addr6); + else if (addr->family == AF_INET6 && + nla_put_in6_addr(skb, MPTCP_PM_ADDR_ATTR_ADDR6, &addr->addr6)) + goto nla_put_failure; #endif nla_nest_end(skb, attr); return 0; -- cgit