summaryrefslogtreecommitdiff
path: root/net/mptcp/protocol.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r--net/mptcp/protocol.c193
1 files changed, 116 insertions, 77 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a7fc16f5175d..c3b83cb390d9 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -405,7 +405,7 @@ drop:
return false;
}
-static void mptcp_stop_timer(struct sock *sk)
+static void mptcp_stop_rtx_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -770,6 +770,46 @@ static bool __mptcp_ofo_queue(struct mptcp_sock *msk)
return moved;
}
+static bool __mptcp_subflow_error_report(struct sock *sk, struct sock *ssk)
+{
+ int err = sock_error(ssk);
+ int ssk_state;
+
+ if (!err)
+ return false;
+
+ /* only propagate errors on fallen-back sockets or
+ * on MPC connect
+ */
+ if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(mptcp_sk(sk)))
+ return false;
+
+ /* We need to propagate only transition to CLOSE state.
+ * Orphaned socket will see such state change via
+ * subflow_sched_work_if_closed() and that path will properly
+ * destroy the msk as needed.
+ */
+ ssk_state = inet_sk_state_load(ssk);
+ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+ inet_sk_state_store(sk, ssk_state);
+ WRITE_ONCE(sk->sk_err, -err);
+
+ /* This barrier is coupled with smp_rmb() in mptcp_poll() */
+ smp_wmb();
+ sk_error_report(sk);
+ return true;
+}
+
+void __mptcp_error_report(struct sock *sk)
+{
+ struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+
+ mptcp_for_each_subflow(msk, subflow)
+ if (__mptcp_subflow_error_report(sk, mptcp_subflow_tcp_sock(subflow)))
+ break;
+}
+
/* In most cases we will be able to lock the mptcp socket. If its already
* owned, we need to defer to the work queue to avoid ABBA deadlock.
*/
@@ -852,6 +892,7 @@ static bool __mptcp_finish_join(struct mptcp_sock *msk, struct sock *ssk)
mptcp_subflow_ctx(ssk)->subflow_id = msk->subflow_id++;
mptcp_sockopt_sync_locked(msk, ssk);
mptcp_subflow_joined(msk, ssk);
+ mptcp_stop_tout_timer(sk);
return true;
}
@@ -871,12 +912,12 @@ static void __mptcp_flush_join_list(struct sock *sk, struct list_head *join_list
}
}
-static bool mptcp_timer_pending(struct sock *sk)
+static bool mptcp_rtx_timer_pending(struct sock *sk)
{
return timer_pending(&inet_csk(sk)->icsk_retransmit_timer);
}
-static void mptcp_reset_timer(struct sock *sk)
+static void mptcp_reset_rtx_timer(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
unsigned long tout;
@@ -1010,10 +1051,10 @@ static void __mptcp_clean_una(struct sock *sk)
out:
if (snd_una == READ_ONCE(msk->snd_nxt) &&
snd_una == READ_ONCE(msk->write_seq)) {
- if (mptcp_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
- mptcp_stop_timer(sk);
+ if (mptcp_rtx_timer_pending(sk) && !mptcp_data_fin_enabled(msk))
+ mptcp_stop_rtx_timer(sk);
} else {
- mptcp_reset_timer(sk);
+ mptcp_reset_rtx_timer(sk);
}
}
@@ -1586,8 +1627,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags)
mptcp_push_release(ssk, &info);
/* ensure the rtx timer is running */
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
if (do_check_data_fin)
mptcp_check_send_data_fin(sk);
}
@@ -1650,8 +1691,8 @@ out:
if (copied) {
tcp_push(ssk, 0, info.mss_now, tcp_sk(ssk)->nonagle,
info.size_goal);
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
if (msk->snd_data_fin_enable &&
msk->snd_nxt + 1 == msk->write_seq)
@@ -2220,7 +2261,7 @@ static void mptcp_retransmit_timer(struct timer_list *t)
sock_put(sk);
}
-static void mptcp_timeout_timer(struct timer_list *t)
+static void mptcp_tout_timer(struct timer_list *t)
{
struct sock *sk = from_timer(sk, t, sk_timer);
@@ -2329,18 +2370,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
bool dispose_it, need_push = false;
/* If the first subflow moved to a close state before accept, e.g. due
- * to an incoming reset, mptcp either:
- * - if either the subflow or the msk are dead, destroy the context
- * (the subflow socket is deleted by inet_child_forget) and the msk
- * - otherwise do nothing at the moment and take action at accept and/or
- * listener shutdown - user-space must be able to accept() the closed
- * socket.
+ * to an incoming reset or listener shutdown, the subflow socket is
+ * already deleted by inet_child_forget() and the mptcp socket can't
+ * survive too.
*/
- if (msk->in_accept_queue && msk->first == ssk) {
- if (!sock_flag(sk, SOCK_DEAD) && !sock_flag(ssk, SOCK_DEAD))
- return;
-
+ if (msk->in_accept_queue && msk->first == ssk &&
+ (sock_flag(sk, SOCK_DEAD) || sock_flag(ssk, SOCK_DEAD))) {
/* ensure later check in mptcp_worker() will dispose the msk */
+ mptcp_set_close_tout(sk, tcp_jiffies32 - (TCP_TIMEWAIT_LEN + 1));
sock_set_flag(sk, SOCK_DEAD);
lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
mptcp_subflow_drop_ctx(ssk);
@@ -2392,6 +2429,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
}
out_release:
+ __mptcp_subflow_error_report(sk, ssk);
release_sock(ssk);
sock_put(ssk);
@@ -2402,6 +2440,22 @@ out_release:
out:
if (need_push)
__mptcp_push_pending(sk, 0);
+
+ /* Catch every 'all subflows closed' scenario, including peers silently
+ * closing them, e.g. due to timeout.
+ * For established sockets, allow an additional timeout before closing,
+ * as the protocol can still create more subflows.
+ */
+ if (list_is_singular(&msk->conn_list) && msk->first &&
+ inet_sk_state_load(msk->first) == TCP_CLOSE) {
+ if (sk->sk_state != TCP_ESTABLISHED ||
+ msk->in_accept_queue || sock_flag(sk, SOCK_DEAD)) {
+ inet_sk_state_store(sk, TCP_CLOSE);
+ mptcp_close_wake_up(sk);
+ } else {
+ mptcp_start_tout_timer(sk);
+ }
+ }
}
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
@@ -2445,23 +2499,14 @@ static void __mptcp_close_subflow(struct sock *sk)
}
-static bool mptcp_should_close(const struct sock *sk)
+static bool mptcp_close_tout_expired(const struct sock *sk)
{
- s32 delta = tcp_jiffies32 - inet_csk(sk)->icsk_mtup.probe_timestamp;
- struct mptcp_subflow_context *subflow;
-
- if (delta >= TCP_TIMEWAIT_LEN || mptcp_sk(sk)->in_accept_queue)
- return true;
+ if (!inet_csk(sk)->icsk_mtup.probe_timestamp ||
+ sk->sk_state == TCP_CLOSE)
+ return false;
- /* if all subflows are in closed status don't bother with additional
- * timeout
- */
- mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
- if (inet_sk_state_load(mptcp_subflow_tcp_sock(subflow)) !=
- TCP_CLOSE)
- return false;
- }
- return true;
+ return time_after32(tcp_jiffies32,
+ inet_csk(sk)->icsk_mtup.probe_timestamp + TCP_TIMEWAIT_LEN);
}
static void mptcp_check_fastclose(struct mptcp_sock *msk)
@@ -2588,27 +2633,28 @@ static void __mptcp_retrans(struct sock *sk)
reset_timer:
mptcp_check_and_set_pending(sk);
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
}
/* schedule the timeout timer for the relevant event: either close timeout
* or mp_fail timeout. The close timeout takes precedence on the mp_fail one
*/
-void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout)
+void mptcp_reset_tout_timer(struct mptcp_sock *msk, unsigned long fail_tout)
{
struct sock *sk = (struct sock *)msk;
unsigned long timeout, close_timeout;
- if (!fail_tout && !sock_flag(sk, SOCK_DEAD))
+ if (!fail_tout && !inet_csk(sk)->icsk_mtup.probe_timestamp)
return;
- close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN;
+ close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies +
+ TCP_TIMEWAIT_LEN;
/* the close timeout takes precedence on the fail one, and here at least one of
* them is active
*/
- timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout;
+ timeout = inet_csk(sk)->icsk_mtup.probe_timestamp ? close_timeout : fail_tout;
sk_reset_timer(sk, &sk->sk_timer, timeout);
}
@@ -2627,8 +2673,6 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
mptcp_subflow_reset(ssk);
WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
unlock_sock_fast(ssk, slow);
-
- mptcp_reset_timeout(msk, 0);
}
static void mptcp_do_fastclose(struct sock *sk)
@@ -2665,18 +2709,14 @@ static void mptcp_worker(struct work_struct *work)
if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
__mptcp_close_subflow(sk);
- /* There is no point in keeping around an orphaned sk timedout or
- * closed, but we need the msk around to reply to incoming DATA_FIN,
- * even if it is orphaned and in FIN_WAIT2 state
- */
- if (sock_flag(sk, SOCK_DEAD)) {
- if (mptcp_should_close(sk))
- mptcp_do_fastclose(sk);
+ if (mptcp_close_tout_expired(sk)) {
+ mptcp_do_fastclose(sk);
+ mptcp_close_wake_up(sk);
+ }
- if (sk->sk_state == TCP_CLOSE) {
- __mptcp_destroy_sock(sk);
- goto unlock;
- }
+ if (sock_flag(sk, SOCK_DEAD) && sk->sk_state == TCP_CLOSE) {
+ __mptcp_destroy_sock(sk);
+ goto unlock;
}
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
@@ -2717,7 +2757,7 @@ static void __mptcp_init_sock(struct sock *sk)
/* re-use the csk retrans timer for MPTCP-level retrans */
timer_setup(&msk->sk.icsk_retransmit_timer, mptcp_retransmit_timer, 0);
- timer_setup(&sk->sk_timer, mptcp_timeout_timer, 0);
+ timer_setup(&sk->sk_timer, mptcp_tout_timer, 0);
}
static void mptcp_ca_reset(struct sock *sk)
@@ -2808,8 +2848,8 @@ void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how)
} else {
pr_debug("Sending DATA_FIN on subflow %p", ssk);
tcp_send_ack(ssk);
- if (!mptcp_timer_pending(sk))
- mptcp_reset_timer(sk);
+ if (!mptcp_rtx_timer_pending(sk))
+ mptcp_reset_rtx_timer(sk);
}
break;
}
@@ -2892,7 +2932,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
might_sleep();
- mptcp_stop_timer(sk);
+ mptcp_stop_rtx_timer(sk);
sk_stop_timer(sk, &sk->sk_timer);
msk->pm.status = 0;
mptcp_release_sched(msk);
@@ -2975,7 +3015,6 @@ bool __mptcp_close(struct sock *sk, long timeout)
cleanup:
/* orphan all the subflows */
- inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast_nested(ssk);
@@ -3012,7 +3051,7 @@ cleanup:
__mptcp_destroy_sock(sk);
do_cancel_work = true;
} else {
- mptcp_reset_timeout(msk, 0);
+ mptcp_start_tout_timer(sk);
}
return do_cancel_work;
@@ -3075,8 +3114,8 @@ static int mptcp_disconnect(struct sock *sk, int flags)
mptcp_check_listen_stop(sk);
inet_sk_state_store(sk, TCP_CLOSE);
- mptcp_stop_timer(sk);
- sk_stop_timer(sk, &sk->sk_timer);
+ mptcp_stop_rtx_timer(sk);
+ mptcp_stop_tout_timer(sk);
if (msk->token)
mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
@@ -3386,24 +3425,21 @@ static void schedule_3rdack_retransmission(struct sock *ssk)
sk_reset_timer(ssk, &icsk->icsk_delack_timer, timeout);
}
-void mptcp_subflow_process_delegated(struct sock *ssk)
+void mptcp_subflow_process_delegated(struct sock *ssk, long status)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct sock *sk = subflow->conn;
- if (test_bit(MPTCP_DELEGATE_SEND, &subflow->delegated_status)) {
+ if (status & BIT(MPTCP_DELEGATE_SEND)) {
mptcp_data_lock(sk);
if (!sock_owned_by_user(sk))
__mptcp_subflow_push_pending(sk, ssk, true);
else
__set_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->cb_flags);
mptcp_data_unlock(sk);
- mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_SEND);
}
- if (test_bit(MPTCP_DELEGATE_ACK, &subflow->delegated_status)) {
+ if (status & BIT(MPTCP_DELEGATE_ACK))
schedule_3rdack_retransmission(ssk);
- mptcp_subflow_delegated_done(subflow, MPTCP_DELEGATE_ACK);
- }
}
static int mptcp_hash(struct sock *sk)
@@ -3929,14 +3965,17 @@ static int mptcp_napi_poll(struct napi_struct *napi, int budget)
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bh_lock_sock_nested(ssk);
- if (!sock_owned_by_user(ssk) &&
- mptcp_subflow_has_delegated_action(subflow))
- mptcp_subflow_process_delegated(ssk);
- /* ... elsewhere tcp_release_cb_override already processed
- * the action or will do at next release_sock().
- * In both case must dequeue the subflow here - on the same
- * CPU that scheduled it.
- */
+ if (!sock_owned_by_user(ssk)) {
+ mptcp_subflow_process_delegated(ssk, xchg(&subflow->delegated_status, 0));
+ } else {
+ /* tcp_release_cb_override already processed
+ * the action or will do at next release_sock().
+ * In both case must dequeue the subflow here - on the same
+ * CPU that scheduled it.
+ */
+ smp_wmb();
+ clear_bit(MPTCP_DELEGATE_SCHEDULED, &subflow->delegated_status);
+ }
bh_unlock_sock(ssk);
sock_put(ssk);