summaryrefslogtreecommitdiff
path: root/net/mptcp/protocol.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r--net/mptcp/protocol.c298
1 files changed, 141 insertions, 157 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 08dc53f56bc2..a6c7f2d24909 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -44,7 +44,7 @@ enum {
static struct percpu_counter mptcp_sockets_allocated ____cacheline_aligned_in_smp;
static void __mptcp_destroy_sock(struct sock *sk);
-static void __mptcp_check_send_data_fin(struct sock *sk);
+static void mptcp_check_send_data_fin(struct sock *sk);
DEFINE_PER_CPU(struct mptcp_delegated_action, mptcp_delegated_actions);
static struct net_device mptcp_napi_dev;
@@ -90,8 +90,8 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
if (err)
return err;
- msk->first = ssock->sk;
- msk->subflow = ssock;
+ WRITE_ONCE(msk->first, ssock->sk);
+ WRITE_ONCE(msk->subflow, ssock);
subflow = mptcp_subflow_ctx(ssock->sk);
list_add(&subflow->node, &msk->conn_list);
sock_hold(ssock->sk);
@@ -424,8 +424,7 @@ static bool mptcp_pending_data_fin_ack(struct sock *sk)
{
struct mptcp_sock *msk = mptcp_sk(sk);
- return !__mptcp_check_fallback(msk) &&
- ((1 << sk->sk_state) &
+ return ((1 << sk->sk_state) &
(TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK)) &&
msk->write_seq == READ_ONCE(msk->snd_una);
}
@@ -583,9 +582,6 @@ static bool mptcp_check_data_fin(struct sock *sk)
u64 rcv_data_fin_seq;
bool ret = false;
- if (__mptcp_check_fallback(msk))
- return ret;
-
/* Need to ack a DATA_FIN received from a peer while this side
* of the connection is in ESTABLISHED, FIN_WAIT1, or FIN_WAIT2.
* msk->rcv_data_fin was set when parsing the incoming options
@@ -603,7 +599,7 @@ static bool mptcp_check_data_fin(struct sock *sk)
WRITE_ONCE(msk->ack_seq, msk->ack_seq + 1);
WRITE_ONCE(msk->rcv_data_fin, 0);
- sk->sk_shutdown |= RCV_SHUTDOWN;
+ WRITE_ONCE(sk->sk_shutdown, sk->sk_shutdown | RCV_SHUTDOWN);
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
switch (sk->sk_state) {
@@ -623,7 +619,8 @@ static bool mptcp_check_data_fin(struct sock *sk)
}
ret = true;
- mptcp_send_ack(msk);
+ if (!__mptcp_check_fallback(msk))
+ mptcp_send_ack(msk);
mptcp_close_wake_up(sk);
}
return ret;
@@ -825,6 +822,13 @@ void mptcp_data_ready(struct sock *sk, struct sock *ssk)
mptcp_data_unlock(sk);
}
+static void mptcp_subflow_joined(struct mptcp_sock *msk, struct sock *ssk)
+{
+ mptcp_subflow_ctx(ssk)->map_seq = READ_ONCE(msk->ack_seq);
+ WRITE_ONCE(msk->allow_infinite_fallback, false);
+ mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
+}
+
static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
{
struct sock *sk = (struct sock *)msk;
@@ -839,15 +843,16 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
mptcp_sock_graft(ssk, sk->sk_socket);
mptcp_sockopt_sync_locked(msk, ssk);
+ mptcp_subflow_joined(msk, ssk);
return true;
}
-static void __mptcp_flush_join_list(struct sock *sk)
+static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list)
{
struct mptcp_subflow_context *tmp, *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
- list_for_each_entry_safe(subflow, tmp, &msk->join_list, node) {
+ list_for_each_entry_safe(subflow, tmp, join_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast(ssk);
@@ -889,49 +894,6 @@ bool mptcp_schedule_work(struct sock *sk)
return false;
}
-void mptcp_subflow_eof(struct sock *sk)
-{
- if (!test_and_set_bit(MPTCP_WORK_EOF, &mptcp_sk(sk)->flags))
- mptcp_schedule_work(sk);
-}
-
-static void mptcp_check_for_eof(struct mptcp_sock *msk)
-{
- struct mptcp_subflow_context *subflow;
- struct sock *sk = (struct sock *)msk;
- int receivers = 0;
-
- mptcp_for_each_subflow(msk, subflow)
- receivers += !subflow->rx_eof;
- if (receivers)
- return;
-
- if (!(sk->sk_shutdown & RCV_SHUTDOWN)) {
- /* hopefully temporary hack: propagate shutdown status
- * to msk, when all subflows agree on it
- */
- sk->sk_shutdown |= RCV_SHUTDOWN;
-
- smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
- sk->sk_data_ready(sk);
- }
-
- switch (sk->sk_state) {
- case TCP_ESTABLISHED:
- inet_sk_state_store(sk, TCP_CLOSE_WAIT);
- break;
- case TCP_FIN_WAIT1:
- inet_sk_state_store(sk, TCP_CLOSING);
- break;
- case TCP_FIN_WAIT2:
- inet_sk_state_store(sk, TCP_CLOSE);
- break;
- default:
- return;
- }
- mptcp_close_wake_up(sk);
-}
-
static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
@@ -1601,7 +1563,7 @@ out:
if (!mptcp_timer_pending(sk))
mptcp_reset_timer(sk);
if (do_check_data_fin)
- __mptcp_check_send_data_fin(sk);
+ mptcp_check_send_data_fin(sk);
}
static void __mptcp_subflow_push_pending(struct sock *sk, struct sock *ssk, bool first)
@@ -1702,7 +1664,6 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
lock_sock(ssk);
msg->msg_flags |= MSG_DONTWAIT;
- msk->connect_flags = O_NONBLOCK;
msk->fastopening = 1;
ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
msk->fastopening = 0;
@@ -1720,7 +1681,13 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
*copied_syn = 0;
} else if (ret && ret != -EINPROGRESS) {
- mptcp_disconnect(sk, 0);
+ /* The disconnect() op called by tcp_sendmsg_fastopen()/
+ * __inet_stream_connect() can fail, due to looking check,
+ * see mptcp_disconnect().
+ * Attempt it again outside the problematic scope.
+ */
+ if (!mptcp_disconnect(sk, 0))
+ sk->sk_socket->state = SS_UNCONNECTED;
}
inet_sk(sk)->defer_connect = 0;
@@ -2151,9 +2118,6 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
break;
}
- if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
- mptcp_check_for_eof(msk);
-
if (sk->sk_shutdown & RCV_SHUTDOWN) {
/* race breaker: the shutdown could be after the
* previous receive queue check
@@ -2283,7 +2247,7 @@ static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
{
if (msk->subflow) {
iput(SOCK_INODE(msk->subflow));
- msk->subflow = NULL;
+ WRITE_ONCE(msk->subflow, NULL);
}
}
@@ -2382,7 +2346,10 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk);
if (!dispose_it) {
- tcp_disconnect(ssk, 0);
+ /* The MPTCP code never wait on the subflow sockets, TCP-level
+ * disconnect should never fail
+ */
+ WARN_ON_ONCE(tcp_disconnect(ssk, 0));
msk->subflow->state = SS_UNCONNECTED;
mptcp_subflow_ctx_reset(subflow);
release_sock(ssk);
@@ -2401,13 +2368,6 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
kfree_rcu(subflow, rcu);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
- if (ssk->sk_state == TCP_LISTEN) {
- tcp_set_state(ssk, TCP_CLOSE);
- mptcp_subflow_queue_clean(sk, ssk);
- inet_csk_listen_stop(ssk);
- mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
- }
-
__tcp_close(ssk, 0);
/* close acquired an extra ref */
@@ -2420,7 +2380,7 @@ out_release:
sock_put(ssk);
if (ssk == msk->first)
- msk->first = NULL;
+ WRITE_ONCE(msk->first, NULL);
out:
if (ssk == msk->last_snd)
@@ -2527,7 +2487,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
}
inet_sk_state_store(sk, TCP_CLOSE);
- sk->sk_shutdown = SHUTDOWN_MASK;
+ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
smp_mb__before_atomic(); /* SHUTDOWN must be visible first */
set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags);
@@ -2664,16 +2624,12 @@ static void mptcp_worker(struct work_struct *work)
if (unlikely((1 << state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto unlock;
- mptcp_check_data_fin_ack(sk);
-
mptcp_check_fastclose(msk);
mptcp_pm_nl_work(msk);
- if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
- mptcp_check_for_eof(msk);
-
- __mptcp_check_send_data_fin(sk);
+ mptcp_check_send_data_fin(sk);
+ mptcp_check_data_fin_ack(sk);
mptcp_check_data_fin(sk);
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
@@ -2721,7 +2677,7 @@ static int __mptcp_init_sock(struct sock *sk)
WRITE_ONCE(msk->rmem_released, 0);
msk->timer_ival = TCP_RTO_MIN;
- msk->first = NULL;
+ WRITE_ONCE(msk->first, NULL);
inet_csk(sk)->icsk_sync_mss = mptcp_sync_mss;
WRITE_ONCE(msk->csum_enabled, mptcp_is_checksum_enabled(sock_net(sk)));
WRITE_ONCE(msk->allow_infinite_fallback, true);
@@ -2805,13 +2761,19 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
break;
fallthrough;
case TCP_SYN_SENT:
- tcp_disconnect(ssk, O_NONBLOCK);
+ WARN_ON_ONCE(tcp_disconnect(ssk, O_NONBLOCK));
break;
default:
if (__mptcp_check_fallback(mptcp_sk(sk))) {
pr_debug("Fallback");
ssk->sk_shutdown |= how;
tcp_shutdown(ssk, how);
+
+ /* simulate the data_fin ack reception to let the state
+ * machine move forward
+ */
+ WRITE_ONCE(mptcp_sk(sk)->snd_una, mptcp_sk(sk)->snd_nxt);
+ mptcp_schedule_work(sk);
} else {
pr_debug("Sending DATA_FIN on subflow %p", ssk);
tcp_send_ack(ssk);
@@ -2851,7 +2813,7 @@ static int mptcp_close_state(struct sock *sk)
return next & TCP_ACTION_FIN;
}
-static void __mptcp_check_send_data_fin(struct sock *sk)
+static void mptcp_check_send_data_fin(struct sock *sk)
{
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
@@ -2869,19 +2831,6 @@ static void __mptcp_check_send_data_fin(struct sock *sk)
WRITE_ONCE(msk->snd_nxt, msk->write_seq);
- /* fallback socket will not get data_fin/ack, can move to the next
- * state now
- */
- if (__mptcp_check_fallback(msk)) {
- WRITE_ONCE(msk->snd_una, msk->write_seq);
- if ((1 << sk->sk_state) & (TCPF_CLOSING | TCPF_LAST_ACK)) {
- inet_sk_state_store(sk, TCP_CLOSE);
- mptcp_close_wake_up(sk);
- } else if (sk->sk_state == TCP_FIN_WAIT1) {
- inet_sk_state_store(sk, TCP_FIN_WAIT2);
- }
- }
-
mptcp_for_each_subflow(msk, subflow) {
struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow);
@@ -2901,7 +2850,7 @@ static void __mptcp_wr_shutdown(struct sock *sk)
WRITE_ONCE(msk->write_seq, msk->write_seq + 1);
WRITE_ONCE(msk->snd_data_fin_enable, 1);
- __mptcp_check_send_data_fin(sk);
+ mptcp_check_send_data_fin(sk);
}
static void __mptcp_destroy_sock(struct sock *sk)
@@ -2946,10 +2895,24 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
return EPOLLIN | EPOLLRDNORM;
}
-static void mptcp_listen_inuse_dec(struct sock *sk)
+static void mptcp_check_listen_stop(struct sock *sk)
{
- if (inet_sk_state_load(sk) == TCP_LISTEN)
- sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ struct sock *ssk;
+
+ if (inet_sk_state_load(sk) != TCP_LISTEN)
+ return;
+
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+ ssk = mptcp_sk(sk)->first;
+ if (WARN_ON_ONCE(!ssk || inet_sk_state_load(ssk) != TCP_LISTEN))
+ return;
+
+ lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
+ mptcp_subflow_queue_clean(sk, ssk);
+ inet_csk_listen_stop(ssk);
+ mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
+ tcp_set_state(ssk, TCP_CLOSE);
+ release_sock(ssk);
}
bool __mptcp_close(struct sock *sk, long timeout)
@@ -2959,10 +2922,10 @@ bool __mptcp_close(struct sock *sk, long timeout)
bool do_cancel_work = false;
int subflows_alive = 0;
- sk->sk_shutdown = SHUTDOWN_MASK;
+ WRITE_ONCE(sk->sk_shutdown, SHUTDOWN_MASK);
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
- mptcp_listen_inuse_dec(sk);
+ mptcp_check_listen_stop(sk);
inet_sk_state_store(sk, TCP_CLOSE);
goto cleanup;
}
@@ -3039,7 +3002,7 @@ static void mptcp_close(struct sock *sk, long timeout)
sock_put(sk);
}
-void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
+static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk)
{
#if IS_ENABLED(CONFIG_MPTCP_IPV6)
const struct ipv6_pinfo *ssk6 = inet6_sk(ssk);
@@ -3066,15 +3029,20 @@ static int mptcp_disconnect(struct sock *sk, int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ /* Deny disconnect if other threads are blocked in sk_wait_event()
+ * or inet_wait_for_connect().
+ */
+ if (sk->sk_wait_pending)
+ return -EBUSY;
+
/* We are on the fastopen error path. We can't call straight into the
* subflows cleanup code due to lock nesting (we are already under
- * msk->firstsocket lock). Do nothing and leave the cleanup to the
- * caller.
+ * msk->firstsocket lock).
*/
if (msk->fastopening)
- return 0;
+ return -EBUSY;
- mptcp_listen_inuse_dec(sk);
+ mptcp_check_listen_stop(sk);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_stop_timer(sk);
@@ -3102,7 +3070,7 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_pm_data_reset(msk);
mptcp_ca_reset(sk);
- sk->sk_shutdown = 0;
+ WRITE_ONCE(sk->sk_shutdown, 0);
sk_error_report(sk);
return 0;
}
@@ -3116,9 +3084,10 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk)
}
#endif
-struct sock *mptcp_sk_clone(const struct sock *sk,
- const struct mptcp_options_received *mp_opt,
- struct request_sock *req)
+struct sock *mptcp_sk_clone_init(const struct sock *sk,
+ const struct mptcp_options_received *mp_opt,
+ struct sock *ssk,
+ struct request_sock *req)
{
struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req);
struct sock *nsk = sk_clone_lock(sk, GFP_ATOMIC);
@@ -3132,12 +3101,13 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk);
#endif
+ nsk->sk_wait_pending = 0;
__mptcp_init_sock(nsk);
msk = mptcp_sk(nsk);
msk->local_key = subflow_req->local_key;
msk->token = subflow_req->token;
- msk->subflow = NULL;
+ WRITE_ONCE(msk->subflow, NULL);
msk->in_accept_queue = 1;
WRITE_ONCE(msk->fully_established, false);
if (mp_opt->suboptions & OPTION_MPTCP_CSUMREQD)
@@ -3150,10 +3120,30 @@ struct sock *mptcp_sk_clone(const struct sock *sk,
msk->setsockopt_seq = mptcp_sk(sk)->setsockopt_seq;
sock_reset_flag(nsk, SOCK_RCU_FREE);
- /* will be fully established after successful MPC subflow creation */
- inet_sk_state_store(nsk, TCP_SYN_RECV);
-
security_inet_csk_clone(nsk, req);
+
+ /* this can't race with mptcp_close(), as the msk is
+ * not yet exposted to user-space
+ */
+ inet_sk_state_store(nsk, TCP_ESTABLISHED);
+
+ /* The msk maintain a ref to each subflow in the connections list */
+ WRITE_ONCE(msk->first, ssk);
+ list_add(&mptcp_subflow_ctx(ssk)->node, &msk->conn_list);
+ sock_hold(ssk);
+
+ /* new mpc subflow takes ownership of the newly
+ * created mptcp socket
+ */
+ mptcp_token_accept(subflow_req, msk);
+
+ /* set msk addresses early to ensure mptcp_pm_get_local_id()
+ * uses the correct data
+ */
+ mptcp_copy_inaddrs(nsk, ssk);
+ mptcp_propagate_sndbuf(nsk, ssk);
+
+ mptcp_rcv_space_init(msk, ssk);
bh_unlock_sock(nsk);
/* note: the newly allocated socket refcount is 2 now */
@@ -3185,7 +3175,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
struct socket *listener;
struct sock *newsk;
- listener = msk->subflow;
+ listener = READ_ONCE(msk->subflow);
if (WARN_ON_ONCE(!listener)) {
*err = -EINVAL;
return NULL;
@@ -3299,9 +3289,14 @@ static void mptcp_release_cb(struct sock *sk)
for (;;) {
unsigned long flags = (msk->cb_flags & MPTCP_FLAGS_PROCESS_CTX_NEED) |
msk->push_pending;
+ struct list_head join_list;
+
if (!flags)
break;
+ INIT_LIST_HEAD(&join_list);
+ list_splice_init(&msk->join_list, &join_list);
+
/* the following actions acquire the subflow socket lock
*
* 1) can't be invoked in atomic scope
@@ -3312,8 +3307,9 @@ static void mptcp_release_cb(struct sock *sk)
msk->push_pending = 0;
msk->cb_flags &= ~flags;
spin_unlock_bh(&sk->sk_lock.slock);
+
if (flags & BIT(MPTCP_FLUSH_JOIN_LIST))
- __mptcp_flush_join_list(sk);
+ __mptcp_flush_join_list(sk, &join_list);
if (flags & BIT(MPTCP_PUSH_PENDING))
__mptcp_push_pending(sk, 0);
if (flags & BIT(MPTCP_RETRANSMIT))
@@ -3465,14 +3461,16 @@ bool mptcp_finish_join(struct sock *ssk)
return false;
}
- if (!list_empty(&subflow->node))
- goto out;
+ /* active subflow, already present inside the conn_list */
+ if (!list_empty(&subflow->node)) {
+ mptcp_subflow_joined(msk, ssk);
+ return true;
+ }
if (!mptcp_pm_allow_new_subflow(msk))
goto err_prohibited;
- /* active connections are already on conn_list.
- * If we can't acquire msk socket lock here, let the release callback
+ /* If we can't acquire msk socket lock here, let the release callback
* handle it
*/
mptcp_data_lock(parent);
@@ -3495,11 +3493,6 @@ err_prohibited:
return false;
}
- subflow->map_seq = READ_ONCE(msk->ack_seq);
- WRITE_ONCE(msk->allow_infinite_fallback, false);
-
-out:
- mptcp_event(MPTCP_EVENT_SUB_ESTABLISHED, msk, ssk, GFP_ATOMIC);
return true;
}
@@ -3617,9 +3610,9 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
* acquired the subflow socket lock, too.
*/
if (msk->fastopening)
- err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1);
+ err = __inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK, 1);
else
- err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags);
+ err = inet_stream_connect(ssock, uaddr, addr_len, O_NONBLOCK);
inet_sk(sk)->defer_connect = inet_sk(ssock->sk)->defer_connect;
/* on successful connect, the msk state will be moved to established by
@@ -3632,12 +3625,10 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
mptcp_copy_inaddrs(sk, ssock->sk);
- /* unblocking connect, mptcp-level inet_stream_connect will error out
- * without changing the socket state, update it here.
+ /* silence EINPROGRESS and let the caller inet_stream_connect
+ * handle the connection in progress
*/
- if (err == -EINPROGRESS)
- sk->sk_socket->state = ssock->state;
- return err;
+ return 0;
}
static struct proto mptcp_prot = {
@@ -3696,18 +3687,6 @@ unlock:
return err;
}
-static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
- int addr_len, int flags)
-{
- int ret;
-
- lock_sock(sock->sk);
- mptcp_sk(sock->sk)->connect_flags = flags;
- ret = __inet_stream_connect(sock, uaddr, addr_len, flags, 0);
- release_sock(sock->sk);
- return ret;
-}
-
static int mptcp_listen(struct socket *sock, int backlog)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
@@ -3751,10 +3730,10 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
pr_debug("msk=%p", msk);
- /* buggy applications can call accept on socket states other then LISTEN
+ /* Buggy applications can call accept on socket states other then LISTEN
* but no need to allocate the first subflow just to error out.
*/
- ssock = msk->subflow;
+ ssock = READ_ONCE(msk->subflow);
if (!ssock)
return -EINVAL;
@@ -3800,9 +3779,6 @@ static __poll_t mptcp_check_writeable(struct mptcp_sock *msk)
{
struct sock *sk = (struct sock *)msk;
- if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN))
- return EPOLLOUT | EPOLLWRNORM;
-
if (sk_stream_is_writeable(sk))
return EPOLLOUT | EPOLLWRNORM;
@@ -3820,6 +3796,7 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
struct sock *sk = sock->sk;
struct mptcp_sock *msk;
__poll_t mask = 0;
+ u8 shutdown;
int state;
msk = mptcp_sk(sk);
@@ -3828,23 +3805,30 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
state = inet_sk_state_load(sk);
pr_debug("msk=%p state=%d flags=%lx", msk, state, msk->flags);
if (state == TCP_LISTEN) {
- if (WARN_ON_ONCE(!msk->subflow || !msk->subflow->sk))
+ struct socket *ssock = READ_ONCE(msk->subflow);
+
+ if (WARN_ON_ONCE(!ssock || !ssock->sk))
return 0;
- return inet_csk_listen_poll(msk->subflow->sk);
+ return inet_csk_listen_poll(ssock->sk);
}
+ shutdown = READ_ONCE(sk->sk_shutdown);
+ if (shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
+ mask |= EPOLLHUP;
+ if (shutdown & RCV_SHUTDOWN)
+ mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
+
if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) {
mask |= mptcp_check_readable(msk);
- mask |= mptcp_check_writeable(msk);
+ if (shutdown & SEND_SHUTDOWN)
+ mask |= EPOLLOUT | EPOLLWRNORM;
+ else
+ mask |= mptcp_check_writeable(msk);
} else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
/* cf tcp_poll() note about TFO */
mask |= EPOLLOUT | EPOLLWRNORM;
}
- if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE)
- mask |= EPOLLHUP;
- if (sk->sk_shutdown & RCV_SHUTDOWN)
- mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP;
/* This barrier is coupled with smp_wmb() in __mptcp_error_report() */
smp_rmb();
@@ -3859,7 +3843,7 @@ static const struct proto_ops mptcp_stream_ops = {
.owner = THIS_MODULE,
.release = inet_release,
.bind = mptcp_bind,
- .connect = mptcp_stream_connect,
+ .connect = inet_stream_connect,
.socketpair = sock_no_socketpair,
.accept = mptcp_stream_accept,
.getname = inet_getname,
@@ -3954,7 +3938,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
.owner = THIS_MODULE,
.release = inet6_release,
.bind = mptcp_bind,
- .connect = mptcp_stream_connect,
+ .connect = inet_stream_connect,
.socketpair = sock_no_socketpair,
.accept = mptcp_stream_accept,
.getname = inet6_getname,