From 90017accff61ae89283ad9a51f9ac46ca01633fb Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Thu, 2 Jun 2016 15:05:43 -0300 Subject: sctp: Add GSO support SCTP has this pecualiarity that its packets cannot be just segmented to (P)MTU. Its chunks must be contained in IP segments, padding respected. So we can't just generate a big skb, set gso_size to the fragmentation point and deliver it to IP layer. This patch takes a different approach. SCTP will now build a skb as it would be if it was received using GRO. That is, there will be a cover skb with protocol headers and children ones containing the actual segments, already segmented to a way that respects SCTP RFCs. With that, we can tell skb_segment() to just split based on frag_list, trusting its sizes are already in accordance. This way SCTP can benefit from GSO and instead of passing several packets through the stack, it can pass a single large packet. v2: - Added support for receiving GSO frames, as requested by Dave Miller. - Clear skb->cb if packet is GSO (otherwise it's not used by SCTP) - Added heuristics similar to what we have in TCP for not generating single GSO packets that fills cwnd. v3: - consider sctphdr size in skb_gso_transport_seglen() - rebased due to 5c7cdf339af5 ("gso: Remove arbitrary checks for unsupported GSO") Signed-off-by: Marcelo Ricardo Leitner Tested-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 4 ++++ include/net/sctp/structs.h | 5 +++++ 2 files changed, 9 insertions(+) (limited to 'include/net/sctp') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index b392ac8382f2..632e205ca54b 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -186,6 +186,10 @@ void sctp_assocs_proc_exit(struct net *net); int sctp_remaddr_proc_init(struct net *net); void sctp_remaddr_proc_exit(struct net *net); +/* + * sctp/offload.c + */ +int sctp_offload_init(void); /* * Module global variables diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 16b013a6191c..83c5ec58b93a 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -566,6 +566,9 @@ struct sctp_chunk { /* This points to the sk_buff containing the actual data. */ struct sk_buff *skb; + /* In case of GSO packets, this will store the head one */ + struct sk_buff *head_skb; + /* These are the SCTP headers by reverse order in a packet. * Note that some of these may happen more than once. In that * case, we point at the "current" one, whatever that means @@ -696,6 +699,8 @@ struct sctp_packet { size_t overhead; /* This is the total size of all chunks INCLUDING padding. */ size_t size; + /* This is the maximum size this packet may have */ + size_t max_size; /* The packet is destined for this transport address. * The function we finally use to pass down to the next lower -- cgit From 28aa4c26fce2202db8d42ae76b639ca1d9a23d25 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 9 Jul 2016 19:47:40 +0800 Subject: sctp: add SCTP_PR_SUPPORTED on sctp sockopt According to section 4.5 of rfc7496, prsctp_enable should be per asoc. We will add prsctp_enable to both asoc and ep, and replace the places where it used net.sctp->prsctp_enable with asoc->prsctp_enable. ep->prsctp_enable will be initialized with net.sctp->prsctp_enable, and asoc->prsctp_enable will be initialized with ep->prsctp_enable. We can also modify it's value through sockopt SCTP_PR_SUPPORTED. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/net/sctp') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 83c5ec58b93a..07115ca9de4d 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1256,7 +1256,8 @@ struct sctp_endpoint { /* SCTP-AUTH: endpoint shared keys */ struct list_head endpoint_shared_keys; __u16 active_key_id; - __u8 auth_enable; + __u8 auth_enable:1, + prsctp_enable:1; }; /* Recover the outter endpoint structure. */ @@ -1848,7 +1849,8 @@ struct sctp_association { __u16 active_key_id; __u8 need_ecne:1, /* Need to send an ECNE Chunk? */ - temp:1; /* Is it a temporary association? */ + temp:1, /* Is it a temporary association? */ + prsctp_enable:1; struct sctp_priv_assoc_stats stats; }; -- cgit From 826d253d57b11f69add81c8086d2e7f1dce5ec77 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 9 Jul 2016 19:47:42 +0800 Subject: sctp: add SCTP_PR_ASSOC_STATUS on sctp sockopt This patch adds SCTP_PR_ASSOC_STATUS to sctp sockopt, which is used to dump the prsctp statistics info from the asoc. The prsctp statistics includes abandoned_sent/unsent from the asoc. abandoned_sent is the count of the packets we drop packets from retransmit/transmited queue, and abandoned_unsent is the count of the packets we drop from out_queue according to the policy. Note: another option for prsctp statistics dump described in rfc is SCTP_PR_STREAM_STATUS, which is used to dump the prsctp statistics info from each stream. But by now, linux doesn't yet have per stream statistics info, it needs rfc6525 to be implemented. As the prsctp statistics for each stream has to be based on per stream statistics, we will delay it until rfc6525 is done in linux. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/net/sctp') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 07115ca9de4d..d8e464aacb20 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1853,6 +1853,9 @@ struct sctp_association { prsctp_enable:1; struct sctp_priv_assoc_stats stats; + + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; + __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; }; -- cgit From a6c2f792873aff332a4689717c3cd6104f46684c Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 9 Jul 2016 19:47:43 +0800 Subject: sctp: implement prsctp TTL policy prsctp TTL policy is a policy to abandon chunks when they expire at the specific time in local stack. It's similar with expires_at in struct sctp_datamsg. This patch uses sinfo->sinfo_timetolive to set the specific time for TTL policy. sinfo->sinfo_timetolive is also used for msg->expires_at. So if prsctp_enable or TTL policy is not enabled, msg->expires_at still works as before. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/net/sctp') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index d8e464aacb20..6bcda715008e 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -602,6 +602,16 @@ struct sctp_chunk { /* This needs to be recoverable for SCTP_SEND_FAILED events. */ struct sctp_sndrcvinfo sinfo; + /* We use this field to record param for prsctp policies, + * for TTL policy, it is the time_to_drop of this chunk, + * for RTX policy, it is the max_sent_count of this chunk, + * for PRIO policy, it is the priority of this chunk. + */ + unsigned long prsctp_param; + + /* How many times this chunk have been sent, for prsctp RTX policy */ + int sent_count; + /* Which association does this belong to? */ struct sctp_association *asoc; -- cgit From 8dbdf1f5b09cb22560e7c7173b52fe3c631046bd Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sat, 9 Jul 2016 19:47:45 +0800 Subject: sctp: implement prsctp PRIO policy prsctp PRIO policy is a policy to abandon lower priority chunks when asoc doesn't have enough snd buffer, so that the current chunk with higher priority can be queued successfully. Similar to TTL/RTX policy, we will set the priority of the chunk to prsctp_param with sinfo->sinfo_timetolive in sctp_set_prsctp_policy(). So if PRIO policy is enabled, msg->expire_at won't work. asoc->sent_cnt_removable will record how many chunks can be checked to remove. If priority policy is enabled, when the chunk is queued into the out_queue, we will increase sent_cnt_removable. When the chunk is moved to abandon_queue or dequeue and free, we will decrease sent_cnt_removable. In sctp_sendmsg, we will check if there is enough snd buffer for current msg and if sent_cnt_removable is not 0. Then try to abandon chunks in sctp_prune_prsctp when sendmsg from the retransmit/transmited queue, and free chunks from out_queue in right order until the abandon+free size > msg_len - sctp_wfree. For the abandon size, we have to wait until it sends FORWARD TSN, receives the sack and the chunks are really freed. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/net/sctp') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 6bcda715008e..8626bdd3249a 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1084,6 +1084,8 @@ void sctp_retransmit(struct sctp_outq *, struct sctp_transport *, sctp_retransmit_reason_t); void sctp_retransmit_mark(struct sctp_outq *, struct sctp_transport *, __u8); int sctp_outq_uncork(struct sctp_outq *, gfp_t gfp); +void sctp_prsctp_prune(struct sctp_association *asoc, + struct sctp_sndrcvinfo *sinfo, int msg_len); /* Uncork and flush an outqueue. */ static inline void sctp_outq_cork(struct sctp_outq *q) { @@ -1864,6 +1866,8 @@ struct sctp_association { struct sctp_priv_assoc_stats stats; + int sent_cnt_removable; + __u64 abandoned_unsent[SCTP_PR_INDEX(MAX) + 1]; __u64 abandoned_sent[SCTP_PR_INDEX(MAX) + 1]; }; -- cgit From 9e238323799fb8c2add2b1de9a22edd4d4e51e30 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 13 Jul 2016 15:08:55 -0300 Subject: sctp: allow others to use sctp_input_cb We process input path in other files too and having access to it is nice, so move it to a header where it's shared. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/net/sctp') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 8626bdd3249a..966c3a40039c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -59,6 +59,7 @@ #include /* We need tq_struct. */ #include /* We need sctp* header structs. */ #include /* We need auth specific structs */ +#include /* For inet_skb_parm */ /* A convenience structure for handling sockaddr structures. * We should wean ourselves off this. @@ -1092,6 +1093,20 @@ static inline void sctp_outq_cork(struct sctp_outq *q) q->cork = 1; } +/* SCTP skb control block. + * sctp_input_cb is currently used on rx and sock rx queue + */ +struct sctp_input_cb { + union { + struct inet_skb_parm h4; +#if IS_ENABLED(CONFIG_IPV6) + struct inet6_skb_parm h6; +#endif + } header; + struct sctp_chunk *chunk; +}; +#define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0])) + /* These bind address data fields common between endpoints and associations */ struct sctp_bind_addr { -- cgit From f5d258e60722142e88cb6f0f337d78bca67cf973 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 13 Jul 2016 15:08:56 -0300 Subject: sctp: reorder sctp_ulpevent and shrink msg_flags The next patch needs 8 bytes in there. sctp_ulpevent has a hole due to bad alignment; msg_flags is using 4 bytes while it actually uses only 2, so we shrink it, and iif member (4 bytes) which can be easily fetched from another place once the next patch is there, so we remove it and thus creating space for 8 bytes. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/ulpevent.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net/sctp') diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index cccdcfd14973..aa342645dbce 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -48,15 +48,15 @@ */ struct sctp_ulpevent { struct sctp_association *asoc; - __u16 stream; - __u16 ssn; - __u16 flags; + unsigned int rmem_len; __u32 ppid; __u32 tsn; __u32 cumtsn; - int msg_flags; int iif; - unsigned int rmem_len; + __u16 stream; + __u16 ssn; + __u16 flags; + __u16 msg_flags; }; /* Retrieve the skb this event sits inside of. */ -- cgit From 1f45f78f8e511203f03138f2ccde3d2cf90d2cbf Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 13 Jul 2016 15:08:57 -0300 Subject: sctp: allow GSO frags to access the chunk too SCTP will try to access original IP headers on sctp_recvmsg in order to copy the addresses used. There are also other places that do similar access to IP or even SCTP headers. But after 90017accff61 ("sctp: Add GSO support") they aren't always there because they are only present in the header skb. SCTP handles the queueing of incoming data by cloning the incoming skb and limiting to only the relevant payload. This clone has its cb updated to something different and it's then queued on socket rx queue. Thus we need to fix this in two moments. For rx path, not related to socket queue yet, this patch uses a partially copied sctp_input_cb to such GSO frags. This restores the ability to access the headers for this part of the code. Regarding the socket rx queue, it removes iif member from sctp_event and also add a chunk pointer on it. With these changes we're always able to reach the headers again. The biggest change here is that now the sctp_chunk struct and the original skb are only freed after the application consumed the buffer. Note however that the original payload was already like this due to the skb cloning. For iif, SCTP's IPv4 code doesn't use it, so no change is necessary. IPv6 now can fetch it directly from original's IPv6 CB as the original skb is still accessible. In the future we probably can simplify sctp_v*_skb_iif() stuff, as sctp_v4_skb_iif() was called but it's return value not used, and now it's not even called, but such cleanup is out of scope for this change. Fixes: 90017accff61 ("sctp: Add GSO support") Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 7 +++++++ include/net/sctp/ulpevent.h | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/net/sctp') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 966c3a40039c..f6f201de6fa4 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1107,6 +1107,13 @@ struct sctp_input_cb { }; #define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0])) +static inline const struct sk_buff *sctp_gso_headskb(const struct sk_buff *skb) +{ + const struct sctp_chunk *chunk = SCTP_INPUT_CB(skb)->chunk; + + return chunk->head_skb ? : skb; +} + /* These bind address data fields common between endpoints and associations */ struct sctp_bind_addr { diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h index aa342645dbce..2c098cd7e7e2 100644 --- a/include/net/sctp/ulpevent.h +++ b/include/net/sctp/ulpevent.h @@ -48,11 +48,11 @@ */ struct sctp_ulpevent { struct sctp_association *asoc; + struct sctp_chunk *chunk; unsigned int rmem_len; __u32 ppid; __u32 tsn; __u32 cumtsn; - int iif; __u16 stream; __u16 ssn; __u16 flags; -- cgit From e7487c86dc5c4a528a7dbd9dc14f453a0de61a84 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 13 Jul 2016 15:08:58 -0300 Subject: sctp: avoid identifying address family many times for a chunk Identifying address family operations during rx path is not something expensive but it's ugly to the eye to have it done multiple times, specially when we already validated it during initial rx processing. This patch takes advantage of the now shared sctp_input_cb and make the pointer to the operations readily available. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net/sctp') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f6f201de6fa4..ce93c4b10d26 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -1104,6 +1104,7 @@ struct sctp_input_cb { #endif } header; struct sctp_chunk *chunk; + struct sctp_af *af; }; #define SCTP_INPUT_CB(__skb) ((struct sctp_input_cb *)&((__skb)->cb[0])) -- cgit