summaryrefslogtreecommitdiff
path: root/net/mptcp/protocol.c
diff options
context:
space:
mode:
authorMat Martineau <mathew.j.martineau@linux.intel.com>2020-01-21 16:56:23 -0800
committerDavid S. Miller <davem@davemloft.net>2020-01-24 13:44:07 +0100
commit6d0060f600adfddaa43fefb96b6b12643331961e (patch)
treee619ccf0b4df57bc1958c4bcc0c142b85446aba7 /net/mptcp/protocol.c
parent717e79c867ca5f27815578815feafa3f3944f06b (diff)
mptcp: Write MPTCP DSS headers to outgoing data packets
Per-packet metadata required to write the MPTCP DSS option is written to the skb_ext area. One write to the socket may contain more than one packet of data, which is copied to page fragments and mapped in to MPTCP DSS segments with size determined by the available page fragments and the maximum mapping length allowed by the MPTCP specification. If do_tcp_sendpages() splits a DSS segment in to multiple skbs, that's ok - the later skbs can either have duplicated DSS mapping information or none at all, and the receiver can handle that. The current implementation uses the subflow frag cache and tcp sendpages to avoid excessive code duplication. More work is required to ensure that it works correctly under memory pressure and to support MPTCP-level retransmissions. The MPTCP DSS checksum is not yet implemented. Co-developed-by: Paolo Abeni <pabeni@redhat.com> Signed-off-by: Paolo Abeni <pabeni@redhat.com> Co-developed-by: Peter Krystad <peter.krystad@linux.intel.com> Signed-off-by: Peter Krystad <peter.krystad@linux.intel.com> Co-developed-by: Florian Westphal <fw@strlen.de> Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com> Signed-off-by: Christoph Paasch <cpaasch@apple.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/mptcp/protocol.c')
-rw-r--r--net/mptcp/protocol.c116
1 files changed, 114 insertions, 2 deletions
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 875ca48de4b2..8cf49193b1c0 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -97,12 +97,93 @@ static struct sock *mptcp_subflow_get(const struct mptcp_sock *msk)
return NULL;
}
+static bool mptcp_ext_cache_refill(struct mptcp_sock *msk)
+{
+ if (!msk->cached_ext)
+ msk->cached_ext = __skb_ext_alloc();
+
+ return !!msk->cached_ext;
+}
+
+static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
+ struct msghdr *msg, long *timeo)
+{
+ int mss_now = 0, size_goal = 0, ret = 0;
+ struct mptcp_sock *msk = mptcp_sk(sk);
+ struct mptcp_ext *mpext = NULL;
+ struct page_frag *pfrag;
+ struct sk_buff *skb;
+ size_t psize;
+
+ /* use the mptcp page cache so that we can easily move the data
+ * from one substream to another, but do per subflow memory accounting
+ */
+ pfrag = sk_page_frag(sk);
+ while (!sk_page_frag_refill(ssk, pfrag) ||
+ !mptcp_ext_cache_refill(msk)) {
+ ret = sk_stream_wait_memory(ssk, timeo);
+ if (ret)
+ return ret;
+ }
+
+ /* compute copy limit */
+ mss_now = tcp_send_mss(ssk, &size_goal, msg->msg_flags);
+ psize = min_t(int, pfrag->size - pfrag->offset, size_goal);
+
+ pr_debug("left=%zu", msg_data_left(msg));
+ psize = copy_page_from_iter(pfrag->page, pfrag->offset,
+ min_t(size_t, msg_data_left(msg), psize),
+ &msg->msg_iter);
+ pr_debug("left=%zu", msg_data_left(msg));
+ if (!psize)
+ return -EINVAL;
+
+ /* Mark the end of the previous write so the beginning of the
+ * next write (with its own mptcp skb extension data) is not
+ * collapsed.
+ */
+ skb = tcp_write_queue_tail(ssk);
+ if (skb)
+ TCP_SKB_CB(skb)->eor = 1;
+
+ ret = do_tcp_sendpages(ssk, pfrag->page, pfrag->offset, psize,
+ msg->msg_flags | MSG_SENDPAGE_NOTLAST);
+ if (ret <= 0)
+ return ret;
+ if (unlikely(ret < psize))
+ iov_iter_revert(&msg->msg_iter, psize - ret);
+
+ skb = tcp_write_queue_tail(ssk);
+ mpext = __skb_ext_set(skb, SKB_EXT_MPTCP, msk->cached_ext);
+ msk->cached_ext = NULL;
+
+ memset(mpext, 0, sizeof(*mpext));
+ mpext->data_seq = msk->write_seq;
+ mpext->subflow_seq = mptcp_subflow_ctx(ssk)->rel_write_seq;
+ mpext->data_len = ret;
+ mpext->use_map = 1;
+ mpext->dsn64 = 1;
+
+ pr_debug("data_seq=%llu subflow_seq=%u data_len=%u dsn64=%d",
+ mpext->data_seq, mpext->subflow_seq, mpext->data_len,
+ mpext->dsn64);
+
+ pfrag->offset += ret;
+ msk->write_seq += ret;
+ mptcp_subflow_ctx(ssk)->rel_write_seq += ret;
+
+ tcp_push(ssk, msg->msg_flags, mss_now, tcp_sk(ssk)->nonagle, size_goal);
+ return ret;
+}
+
static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
{
struct mptcp_sock *msk = mptcp_sk(sk);
struct socket *ssock;
+ size_t copied = 0;
struct sock *ssk;
- int ret;
+ int ret = 0;
+ long timeo;
if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
return -EOPNOTSUPP;
@@ -116,14 +197,29 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
return ret;
}
+ timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+
ssk = mptcp_subflow_get(msk);
if (!ssk) {
release_sock(sk);
return -ENOTCONN;
}
- ret = sock_sendmsg(ssk->sk_socket, msg);
+ pr_debug("conn_list->subflow=%p", ssk);
+ lock_sock(ssk);
+ while (msg_data_left(msg)) {
+ ret = mptcp_sendmsg_frag(sk, ssk, msg, &timeo);
+ if (ret < 0)
+ break;
+
+ copied += ret;
+ }
+
+ if (copied > 0)
+ ret = copied;
+
+ release_sock(ssk);
release_sock(sk);
return ret;
}
@@ -235,6 +331,8 @@ static void mptcp_close(struct sock *sk, long timeout)
__mptcp_close_ssk(sk, ssk, subflow, timeout);
}
+ if (msk->cached_ext)
+ __skb_ext_put(msk->cached_ext);
release_sock(sk);
sk_common_release(sk);
}
@@ -286,6 +384,7 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
struct mptcp_subflow_context *subflow;
struct sock *new_mptcp_sock;
struct sock *ssk = newsk;
+ u64 ack_seq;
subflow = mptcp_subflow_ctx(newsk);
lock_sock(sk);
@@ -310,6 +409,12 @@ static struct sock *mptcp_accept(struct sock *sk, int flags, int *err,
msk->subflow = NULL;
mptcp_token_update_accept(newsk, new_mptcp_sock);
+
+ mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq);
+ msk->write_seq = subflow->idsn + 1;
+ ack_seq++;
+ msk->ack_seq = ack_seq;
+ subflow->rel_write_seq = 1;
newsk = new_mptcp_sock;
mptcp_copy_inaddrs(newsk, ssk);
list_add(&subflow->node, &msk->conn_list);
@@ -404,6 +509,7 @@ void mptcp_finish_connect(struct sock *ssk)
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk;
struct sock *sk;
+ u64 ack_seq;
subflow = mptcp_subflow_ctx(ssk);
@@ -413,12 +519,18 @@ void mptcp_finish_connect(struct sock *ssk)
sk = subflow->conn;
msk = mptcp_sk(sk);
+ mptcp_crypto_key_sha(subflow->remote_key, NULL, &ack_seq);
+ ack_seq++;
+ subflow->rel_write_seq = 1;
+
/* the socket is not connected yet, no msk/subflow ops can access/race
* accessing the field below
*/
WRITE_ONCE(msk->remote_key, subflow->remote_key);
WRITE_ONCE(msk->local_key, subflow->local_key);
WRITE_ONCE(msk->token, subflow->token);
+ WRITE_ONCE(msk->write_seq, subflow->idsn + 1);
+ WRITE_ONCE(msk->ack_seq, ack_seq);
}
static void mptcp_sock_graft(struct sock *sk, struct socket *parent)