summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/netdev_features.h4
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--include/linux/skbuff.h4
-rw-r--r--include/net/fou.h38
-rw-r--r--include/net/gue.h103
-rw-r--r--include/uapi/linux/if_tunnel.h1
-rw-r--r--net/core/skbuff.c4
-rw-r--r--net/ipv4/Kconfig9
-rw-r--r--net/ipv4/af_inet.c1
-rw-r--r--net/ipv4/fou.c388
-rw-r--r--net/ipv4/ip_tunnel.c61
-rw-r--r--net/ipv4/tcp_offload.c1
-rw-r--r--net/ipv4/udp_offload.c66
-rw-r--r--net/ipv6/ip6_offload.c1
-rw-r--r--net/ipv6/udp_offload.c1
15 files changed, 565 insertions, 118 deletions
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index dcfdecbfa0b7..8c94b07e654a 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -48,8 +48,9 @@ enum {
NETIF_F_GSO_UDP_TUNNEL_BIT, /* ... UDP TUNNEL with TSO */
NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */
NETIF_F_GSO_MPLS_BIT, /* ... MPLS segmentation */
+ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */
/**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */
- NETIF_F_GSO_MPLS_BIT,
+ NETIF_F_GSO_TUNNEL_REMCSUM_BIT,
NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */
NETIF_F_SCTP_CSUM_BIT, /* SCTP checksum offload */
@@ -119,6 +120,7 @@ enum {
#define NETIF_F_GSO_UDP_TUNNEL __NETIF_F(GSO_UDP_TUNNEL)
#define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM)
#define NETIF_F_GSO_MPLS __NETIF_F(GSO_MPLS)
+#define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM)
#define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
#define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX)
#define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5ed05bd764dc..4767f546d7c0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3584,6 +3584,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT));
BUILD_BUG_ON(SKB_GSO_MPLS != (NETIF_F_GSO_MPLS >> NETIF_F_GSO_SHIFT));
+ BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT));
return (features & feature) == feature;
}
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5ad9675b6fe1..74ed34413969 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -373,6 +373,7 @@ enum {
SKB_GSO_MPLS = 1 << 12,
+ SKB_GSO_TUNNEL_REMCSUM = 1 << 13,
};
#if BITS_PER_LONG > 32
@@ -603,7 +604,8 @@ struct sk_buff {
#endif
__u8 ipvs_property:1;
__u8 inner_protocol_type:1;
- /* 4 or 6 bit hole */
+ __u8 remcsum_offload:1;
+ /* 3 or 5 bit hole */
#ifdef CONFIG_NET_SCHED
__u16 tc_index; /* traffic control index */
diff --git a/include/net/fou.h b/include/net/fou.h
new file mode 100644
index 000000000000..25b26ffcf1df
--- /dev/null
+++ b/include/net/fou.h
@@ -0,0 +1,38 @@
+#ifndef __NET_FOU_H
+#define __NET_FOU_H
+
+#include <linux/skbuff.h>
+
+#include <net/flow.h>
+#include <net/gue.h>
+#include <net/ip_tunnels.h>
+#include <net/udp.h>
+
+int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi4 *fl4);
+int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi4 *fl4);
+
+static size_t fou_encap_hlen(struct ip_tunnel_encap *e)
+{
+ return sizeof(struct udphdr);
+}
+
+static size_t gue_encap_hlen(struct ip_tunnel_encap *e)
+{
+ size_t len;
+ bool need_priv = false;
+
+ len = sizeof(struct udphdr) + sizeof(struct guehdr);
+
+ if (e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) {
+ len += GUE_PLEN_REMCSUM;
+ need_priv = true;
+ }
+
+ len += need_priv ? GUE_LEN_PRIV : 0;
+
+ return len;
+}
+
+#endif
diff --git a/include/net/gue.h b/include/net/gue.h
index b6c332788084..3f28ec7f1c7f 100644
--- a/include/net/gue.h
+++ b/include/net/gue.h
@@ -1,23 +1,116 @@
#ifndef __NET_GUE_H
#define __NET_GUE_H
+/* Definitions for the GUE header, standard and private flags, lengths
+ * of optional fields are below.
+ *
+ * Diagram of GUE header:
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |Ver|C| Hlen | Proto/ctype | Standard flags |P|
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * ~ Fields (optional) ~
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | Private flags (optional, P bit is set) |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * | |
+ * ~ Private fields (optional) ~
+ * | |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * C bit indicates contol message when set, data message when unset.
+ * For a control message, proto/ctype is interpreted as a type of
+ * control message. For data messages, proto/ctype is the IP protocol
+ * of the next header.
+ *
+ * P bit indicates private flags field is present. The private flags
+ * may refer to options placed after this field.
+ */
+
struct guehdr {
union {
struct {
#if defined(__LITTLE_ENDIAN_BITFIELD)
- __u8 hlen:4,
- version:4;
+ __u8 hlen:5,
+ control:1,
+ version:2;
#elif defined (__BIG_ENDIAN_BITFIELD)
- __u8 version:4,
- hlen:4;
+ __u8 version:2,
+ control:1,
+ hlen:5;
#else
#error "Please fix <asm/byteorder.h>"
#endif
- __u8 next_hdr;
+ __u8 proto_ctype;
__u16 flags;
};
__u32 word;
};
};
+/* Standard flags in GUE header */
+
+#define GUE_FLAG_PRIV htons(1<<0) /* Private flags are in options */
+#define GUE_LEN_PRIV 4
+
+#define GUE_FLAGS_ALL (GUE_FLAG_PRIV)
+
+/* Private flags in the private option extension */
+
+#define GUE_PFLAG_REMCSUM htonl(1 << 31)
+#define GUE_PLEN_REMCSUM 4
+
+#define GUE_PFLAGS_ALL (GUE_PFLAG_REMCSUM)
+
+/* Functions to compute options length corresponding to flags.
+ * If we ever have a lot of flags this can be potentially be
+ * converted to a more optimized algorithm (table lookup
+ * for instance).
+ */
+static inline size_t guehdr_flags_len(__be16 flags)
+{
+ return ((flags & GUE_FLAG_PRIV) ? GUE_LEN_PRIV : 0);
+}
+
+static inline size_t guehdr_priv_flags_len(__be32 flags)
+{
+ return 0;
+}
+
+/* Validate standard and private flags. Returns non-zero (meaning invalid)
+ * if there is an unknown standard or private flags, or the options length for
+ * the flags exceeds the options length specific in hlen of the GUE header.
+ */
+static inline int validate_gue_flags(struct guehdr *guehdr,
+ size_t optlen)
+{
+ size_t len;
+ __be32 flags = guehdr->flags;
+
+ if (flags & ~GUE_FLAGS_ALL)
+ return 1;
+
+ len = guehdr_flags_len(flags);
+ if (len > optlen)
+ return 1;
+
+ if (flags & GUE_FLAG_PRIV) {
+ /* Private flags are last four bytes accounted in
+ * guehdr_flags_len
+ */
+ flags = *(__be32 *)((void *)&guehdr[1] + len - GUE_LEN_PRIV);
+
+ if (flags & ~GUE_PFLAGS_ALL)
+ return 1;
+
+ len += guehdr_priv_flags_len(flags);
+ if (len > optlen)
+ return 1;
+ }
+
+ return 0;
+}
+
#endif
diff --git a/include/uapi/linux/if_tunnel.h b/include/uapi/linux/if_tunnel.h
index 280d9e092283..bd3cc11a431f 100644
--- a/include/uapi/linux/if_tunnel.h
+++ b/include/uapi/linux/if_tunnel.h
@@ -69,6 +69,7 @@ enum tunnel_encap_types {
#define TUNNEL_ENCAP_FLAG_CSUM (1<<0)
#define TUNNEL_ENCAP_FLAG_CSUM6 (1<<1)
+#define TUNNEL_ENCAP_FLAG_REMCSUM (1<<2)
/* SIT-mode i_flags */
#define SIT_ISATAP 0x0001
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e48e5c02e877..700189604f3d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3013,7 +3013,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
if (nskb->len == len + doffset)
goto perform_csum_check;
- if (!sg) {
+ if (!sg && !nskb->remcsum_offload) {
nskb->ip_summed = CHECKSUM_NONE;
nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
skb_put(nskb, len),
@@ -3085,7 +3085,7 @@ skip_fraglist:
nskb->truesize += nskb->data_len;
perform_csum_check:
- if (!csum) {
+ if (!csum && !nskb->remcsum_offload) {
nskb->csum = skb_checksum(nskb, doffset,
nskb->len - doffset, 0);
nskb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index e682b48e0709..bd2901604842 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -322,6 +322,15 @@ config NET_FOU
network mechanisms and optimizations for UDP (such as ECMP
and RSS) can be leveraged to provide better service.
+config NET_FOU_IP_TUNNELS
+ bool "IP: FOU encapsulation of IP tunnels"
+ depends on NET_IPIP || NET_IPGRE || IPV6_SIT
+ select NET_FOU
+ ---help---
+ Allow configuration of FOU or GUE encapsulation for IP tunnels.
+ When this option is enabled IP tunnels can be configured to use
+ FOU or GUE encapsulation.
+
config GENEVE
tristate "Generic Network Virtualization Encapsulation (Geneve)"
depends on INET
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8b7fe5b03906..ed2c672c5b01 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1222,6 +1222,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
SKB_GSO_TCPV6 |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM |
SKB_GSO_MPLS |
0)))
goto out;
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 32e78924e246..740ae099a0d9 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -38,21 +38,17 @@ static inline struct fou *fou_from_sock(struct sock *sk)
return sk->sk_user_data;
}
-static int fou_udp_encap_recv_deliver(struct sk_buff *skb,
- u8 protocol, size_t len)
+static void fou_recv_pull(struct sk_buff *skb, size_t len)
{
struct iphdr *iph = ip_hdr(skb);
/* Remove 'len' bytes from the packet (UDP header and
- * FOU header if present), modify the protocol to the one
- * we found, and then call rcv_encap.
+ * FOU header if present).
*/
iph->tot_len = htons(ntohs(iph->tot_len) - len);
__skb_pull(skb, len);
skb_postpull_rcsum(skb, udp_hdr(skb), len);
skb_reset_transport_header(skb);
-
- return -protocol;
}
static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
@@ -62,16 +58,78 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
if (!fou)
return 1;
- return fou_udp_encap_recv_deliver(skb, fou->protocol,
- sizeof(struct udphdr));
+ fou_recv_pull(skb, sizeof(struct udphdr));
+
+ return -fou->protocol;
+}
+
+static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
+ void *data, int hdrlen, u8 ipproto)
+{
+ __be16 *pd = data;
+ u16 start = ntohs(pd[0]);
+ u16 offset = ntohs(pd[1]);
+ u16 poffset = 0;
+ u16 plen;
+ __wsum csum, delta;
+ __sum16 *psum;
+
+ if (skb->remcsum_offload) {
+ /* Already processed in GRO path */
+ skb->remcsum_offload = 0;
+ return guehdr;
+ }
+
+ if (start > skb->len - hdrlen ||
+ offset > skb->len - hdrlen - sizeof(u16))
+ return NULL;
+
+ if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE))
+ __skb_checksum_complete(skb);
+
+ plen = hdrlen + offset + sizeof(u16);
+ if (!pskb_may_pull(skb, plen))
+ return NULL;
+ guehdr = (struct guehdr *)&udp_hdr(skb)[1];
+
+ if (ipproto == IPPROTO_IP && sizeof(struct iphdr) < plen) {
+ struct iphdr *ip = (struct iphdr *)(skb->data + hdrlen);
+
+ /* If next header happens to be IP we can skip that for the
+ * checksum calculation since the IP header checksum is zero
+ * if correct.
+ */
+ poffset = ip->ihl * 4;
+ }
+
+ csum = csum_sub(skb->csum, skb_checksum(skb, poffset + hdrlen,
+ start - poffset - hdrlen, 0));
+
+ /* Set derived checksum in packet */
+ psum = (__sum16 *)(skb->data + hdrlen + offset);
+ delta = csum_sub(csum_fold(csum), *psum);
+ *psum = csum_fold(csum);
+
+ /* Adjust skb->csum since we changed the packet */
+ skb->csum = csum_add(skb->csum, delta);
+
+ return guehdr;
+}
+
+static int gue_control_message(struct sk_buff *skb, struct guehdr *guehdr)
+{
+ /* No support yet */
+ kfree_skb(skb);
+ return 0;
}
static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
{
struct fou *fou = fou_from_sock(sk);
- size_t len;
+ size_t len, optlen, hdrlen;
struct guehdr *guehdr;
- struct udphdr *uh;
+ void *data;
+ u16 doffset = 0;
if (!fou)
return 1;
@@ -80,25 +138,61 @@ static int gue_udp_recv(struct sock *sk, struct sk_buff *skb)
if (!pskb_may_pull(skb, len))
goto drop;
- uh = udp_hdr(skb);
- guehdr = (struct guehdr *)&uh[1];
+ guehdr = (struct guehdr *)&udp_hdr(skb)[1];
+
+ optlen = guehdr->hlen << 2;
+ len += optlen;
- len += guehdr->hlen << 2;
if (!pskb_may_pull(skb, len))
goto drop;
- uh = udp_hdr(skb);
- guehdr = (struct guehdr *)&uh[1];
+ /* guehdr may change after pull */
+ guehdr = (struct guehdr *)&udp_hdr(skb)[1];
- if (guehdr->version != 0)
- goto drop;
+ hdrlen = sizeof(struct guehdr) + optlen;
- if (guehdr->flags) {
- /* No support yet */
+ if (guehdr->version != 0 || validate_gue_flags(guehdr, optlen))
goto drop;
+
+ hdrlen = sizeof(struct guehdr) + optlen;
+
+ ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(skb)->tot_len) - len);
+
+ /* Pull UDP header now, skb->data points to guehdr */
+ __skb_pull(skb, sizeof(struct udphdr));
+
+ /* Pull csum through the guehdr now . This can be used if
+ * there is a remote checksum offload.
+ */
+ skb_postpull_rcsum(skb, udp_hdr(skb), len);
+
+ data = &guehdr[1];
+
+ if (guehdr->flags & GUE_FLAG_PRIV) {
+ __be32 flags = *(__be32 *)(data + doffset);
+
+ doffset += GUE_LEN_PRIV;
+
+ if (flags & GUE_PFLAG_REMCSUM) {
+ guehdr = gue_remcsum(skb, guehdr, data + doffset,
+ hdrlen, guehdr->proto_ctype);
+ if (!guehdr)
+ goto drop;
+
+ data = &guehdr[1];
+
+ doffset += GUE_PLEN_REMCSUM;
+ }
}
- return fou_udp_encap_recv_deliver(skb, guehdr->next_hdr, len);
+ if (unlikely(guehdr->control))
+ return gue_control_message(skb, guehdr);
+
+ __skb_pull(skb, hdrlen);
+ skb_reset_transport_header(skb);
+
+ return -guehdr->proto_ctype;
+
drop:
kfree_skb(skb);
return 0;
@@ -147,6 +241,66 @@ out_unlock:
return err;
}
+static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
+ struct guehdr *guehdr, void *data,
+ size_t hdrlen, u8 ipproto)
+{
+ __be16 *pd = data;
+ u16 start = ntohs(pd[0]);
+ u16 offset = ntohs(pd[1]);
+ u16 poffset = 0;
+ u16 plen;
+ void *ptr;
+ __wsum csum, delta;
+ __sum16 *psum;
+
+ if (skb->remcsum_offload)
+ return guehdr;
+
+ if (start > skb_gro_len(skb) - hdrlen ||
+ offset > skb_gro_len(skb) - hdrlen - sizeof(u16) ||
+ !NAPI_GRO_CB(skb)->csum_valid || skb->remcsum_offload)
+ return NULL;
+
+ plen = hdrlen + offset + sizeof(u16);
+
+ /* Pull checksum that will be written */
+ if (skb_gro_header_hard(skb, off + plen)) {
+ guehdr = skb_gro_header_slow(skb, off + plen, off);
+ if (!guehdr)
+ return NULL;
+ }
+
+ ptr = (void *)guehdr + hdrlen;
+
+ if (ipproto == IPPROTO_IP &&
+ (hdrlen + sizeof(struct iphdr) < plen)) {
+ struct iphdr *ip = (struct iphdr *)(ptr + hdrlen);
+
+ /* If next header happens to be IP we can skip
+ * that for the checksum calculation since the
+ * IP header checksum is zero if correct.
+ */
+ poffset = ip->ihl * 4;
+ }
+
+ csum = csum_sub(NAPI_GRO_CB(skb)->csum,
+ csum_partial(ptr + poffset, start - poffset, 0));
+
+ /* Set derived checksum in packet */
+ psum = (__sum16 *)(ptr + offset);
+ delta = csum_sub(csum_fold(csum), *psum);
+ *psum = csum_fold(csum);
+
+ /* Adjust skb->csum since we changed the packet */
+ skb->csum = csum_add(skb->csum, delta);
+ NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+
+ skb->remcsum_offload = 1;
+
+ return guehdr;
+}
+
static struct sk_buff **gue_gro_receive(struct sk_buff **head,
struct sk_buff *skb)
{
@@ -154,38 +308,64 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
const struct net_offload *ops;
struct sk_buff **pp = NULL;
struct sk_buff *p;
- u8 proto;
struct guehdr *guehdr;
- unsigned int hlen, guehlen;
- unsigned int off;
+ size_t len, optlen, hdrlen, off;
+ void *data;
+ u16 doffset = 0;
int flush = 1;
off = skb_gro_offset(skb);
- hlen = off + sizeof(*guehdr);
+ len = off + sizeof(*guehdr);
+
guehdr = skb_gro_header_fast(skb, off);
- if (skb_gro_header_hard(skb, hlen)) {
- guehdr = skb_gro_header_slow(skb, hlen, off);
+ if (skb_gro_header_hard(skb, len)) {
+ guehdr = skb_gro_header_slow(skb, len, off);
if (unlikely(!guehdr))
goto out;
}
- proto = guehdr->next_hdr;
+ optlen = guehdr->hlen << 2;
+ len += optlen;
- rcu_read_lock();
- offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
- ops = rcu_dereference(offloads[proto]);
- if (WARN_ON(!ops || !ops->callbacks.gro_receive))
- goto out_unlock;
+ if (skb_gro_header_hard(skb, len)) {
+ guehdr = skb_gro_header_slow(skb, len, off);
+ if (unlikely(!guehdr))
+ goto out;
+ }
- guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
+ if (unlikely(guehdr->control) || guehdr->version != 0 ||
+ validate_gue_flags(guehdr, optlen))
+ goto out;
- hlen = off + guehlen;
- if (skb_gro_header_hard(skb, hlen)) {
- guehdr = skb_gro_header_slow(skb, hlen, off);
- if (unlikely(!guehdr))
- goto out_unlock;
+ hdrlen = sizeof(*guehdr) + optlen;
+
+ /* Adjust NAPI_GRO_CB(skb)->csum to account for guehdr,
+ * this is needed if there is a remote checkcsum offload.
+ */
+ skb_gro_postpull_rcsum(skb, guehdr, hdrlen);
+
+ data = &guehdr[1];
+
+ if (guehdr->flags & GUE_FLAG_PRIV) {
+ __be32 flags = *(__be32 *)(data + doffset);
+
+ doffset += GUE_LEN_PRIV;
+
+ if (flags & GUE_PFLAG_REMCSUM) {
+ guehdr = gue_gro_remcsum(skb, off, guehdr,
+ data + doffset, hdrlen,
+ guehdr->proto_ctype);
+ if (!guehdr)
+ goto out;
+
+ data = &guehdr[1];
+
+ doffset += GUE_PLEN_REMCSUM;
+ }
}
+ skb_gro_pull(skb, hdrlen);
+
flush = 0;
for (p = *head; p; p = p->next) {
@@ -197,7 +377,7 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
guehdr2 = (struct guehdr *)(p->data + off);
/* Compare base GUE header to be equal (covers
- * hlen, version, next_hdr, and flags.
+ * hlen, version, proto_ctype, and flags.
*/
if (guehdr->word != guehdr2->word) {
NAPI_GRO_CB(p)->same_flow = 0;
@@ -212,10 +392,11 @@ static struct sk_buff **gue_gro_receive(struct sk_buff **head,
}
}
- skb_gro_pull(skb, guehlen);
-
- /* Adjusted NAPI_GRO_CB(skb)->csum after skb_gro_pull()*/
- skb_gro_postpull_rcsum(skb, guehdr, guehlen);
+ rcu_read_lock();
+ offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
+ ops = rcu_dereference(offloads[guehdr->proto_ctype]);
+ if (WARN_ON(!ops || !ops->callbacks.gro_receive))
+ goto out_unlock;
pp = ops->callbacks.gro_receive(head, skb);
@@ -236,7 +417,7 @@ static int gue_gro_complete(struct sk_buff *skb, int nhoff)
u8 proto;
int err = -ENOENT;
- proto = guehdr->next_hdr;
+ proto = guehdr->proto_ctype;
guehlen = sizeof(*guehdr) + (guehdr->hlen << 2);
@@ -487,6 +668,125 @@ static const struct genl_ops fou_nl_ops[] = {
},
};
+static void fou_build_udp(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ struct flowi4 *fl4, u8 *protocol, __be16 sport)
+{
+ struct udphdr *uh;
+
+ skb_push(skb, sizeof(struct udphdr));
+ skb_reset_transport_header(skb);
+
+ uh = udp_hdr(skb);
+
+ uh->dest = e->dport;
+ uh->source = sport;
+ uh->len = htons(skb->len);
+ uh->check = 0;
+ udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
+ fl4->saddr, fl4->daddr, skb->len);
+
+ *protocol = IPPROTO_UDP;
+}
+
+int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi4 *fl4)
+{
+ bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
+ int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+ __be16 sport;
+
+ skb = iptunnel_handle_offloads(skb, csum, type);
+
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
+ skb, 0, 0, false);
+ fou_build_udp(skb, e, fl4, protocol, sport);
+
+ return 0;
+}
+EXPORT_SYMBOL(fou_build_header);
+
+int gue_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
+ u8 *protocol, struct flowi4 *fl4)
+{
+ bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
+ int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+ struct guehdr *guehdr;
+ size_t hdrlen, optlen = 0;
+ __be16 sport;
+ void *data;
+ bool need_priv = false;
+
+ if ((e->flags & TUNNEL_ENCAP_FLAG_REMCSUM) &&
+ skb->ip_summed == CHECKSUM_PARTIAL) {
+ csum = false;
+ optlen += GUE_PLEN_REMCSUM;
+ type |= SKB_GSO_TUNNEL_REMCSUM;
+ need_priv = true;
+ }
+
+ optlen += need_priv ? GUE_LEN_PRIV : 0;
+
+ skb = iptunnel_handle_offloads(skb, csum, type);
+
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+ /* Get source port (based on flow hash) before skb_push */
+ sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
+ skb, 0, 0, false);
+
+ hdrlen = sizeof(struct guehdr) + optlen;
+
+ skb_push(skb, hdrlen);
+
+ guehdr = (struct guehdr *)skb->data;
+
+ guehdr->control = 0;
+ guehdr->version = 0;
+ guehdr->hlen = optlen >> 2;
+ guehdr->flags = 0;
+ guehdr->proto_ctype = *protocol;
+
+ data = &guehdr[1];
+
+ if (need_priv) {
+ __be32 *flags = data;
+
+ guehdr->flags |= GUE_FLAG_PRIV;
+ *flags = 0;
+ data += GUE_LEN_PRIV;
+
+ if (type & SKB_GSO_TUNNEL_REMCSUM) {
+ u16 csum_start = skb_checksum_start_offset(skb);
+ __be16 *pd = data;
+
+ if (csum_start < hdrlen)
+ return -EINVAL;
+
+ csum_start -= hdrlen;
+ pd[0] = htons(csum_start);
+ pd[1] = htons(csum_start + skb->csum_offset);
+
+ if (!skb_is_gso(skb)) {
+ skb->ip_summed = CHECKSUM_NONE;
+ skb->encapsulation = 0;
+ }
+
+ *flags |= GUE_PFLAG_REMCSUM;
+ data += GUE_PLEN_REMCSUM;
+ }
+
+ }
+
+ fou_build_udp(skb, e, fl4, protocol, sport);
+
+ return 0;
+}
+EXPORT_SYMBOL(gue_build_header);
+
static int __init fou_init(void)
{
int ret;
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index 0bb8e141eacc..c3587e1c8b82 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -56,7 +56,10 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/udp.h>
-#include <net/gue.h>
+
+#if IS_ENABLED(CONFIG_NET_FOU)
+#include <net/fou.h>
+#endif
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -494,10 +497,12 @@ static int ip_encap_hlen(struct ip_tunnel_encap *e)
switch (e->type) {
case TUNNEL_ENCAP_NONE:
return 0;
+#if IS_ENABLED(CONFIG_NET_FOU)
case TUNNEL_ENCAP_FOU:
- return sizeof(struct udphdr);
+ return fou_encap_hlen(e);
case TUNNEL_ENCAP_GUE:
- return sizeof(struct udphdr) + sizeof(struct guehdr);
+ return gue_encap_hlen(e);
+#endif
default:
return -EINVAL;
}
@@ -526,60 +531,18 @@ int ip_tunnel_encap_setup(struct ip_tunnel *t,
}
EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
-static int fou_build_header(struct sk_buff *skb, struct ip_tunnel_encap *e,
- size_t hdr_len, u8 *protocol, struct flowi4 *fl4)
-{
- struct udphdr *uh;
- __be16 sport;
- bool csum = !!(e->flags & TUNNEL_ENCAP_FLAG_CSUM);
- int type = csum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
-
- skb = iptunnel_handle_offloads(skb, csum, type);
-
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
- /* Get length and hash before making space in skb */
-
- sport = e->sport ? : udp_flow_src_port(dev_net(skb->dev),
- skb, 0, 0, false);
-
- skb_push(skb, hdr_len);
-
- skb_reset_transport_header(skb);
- uh = udp_hdr(skb);
-
- if (e->type == TUNNEL_ENCAP_GUE) {
- struct guehdr *guehdr = (struct guehdr *)&uh[1];
-
- guehdr->version = 0;
- guehdr->hlen = 0;
- guehdr->flags = 0;
- guehdr->next_hdr = *protocol;
- }
-
- uh->dest = e->dport;
- uh->source = sport;
- uh->len = htons(skb->len);
- uh->check = 0;
- udp_set_csum(!(e->flags & TUNNEL_ENCAP_FLAG_CSUM), skb,
- fl4->saddr, fl4->daddr, skb->len);
-
- *protocol = IPPROTO_UDP;
-
- return 0;
-}
-
int ip_tunnel_encap(struct sk_buff *skb, struct ip_tunnel *t,
u8 *protocol, struct flowi4 *fl4)
{
switch (t->encap.type) {
case TUNNEL_ENCAP_NONE:
return 0;
+#if IS_ENABLED(CONFIG_NET_FOU)
case TUNNEL_ENCAP_FOU:
+ return fou_build_header(skb, &t->encap, protocol, fl4);
case TUNNEL_ENCAP_GUE:
- return fou_build_header(skb, &t->encap, t->encap_hlen,
- protocol, fl4);
+ return gue_build_header(skb, &t->encap, protocol, fl4);
+#endif
default:
return -EINVAL;
}
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 5b90f2f447a5..a1b2a5624f91 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -97,6 +97,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
SKB_GSO_MPLS |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM |
0) ||
!(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
goto out;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 6480cea7aa53..0a5a70d0e84c 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -29,7 +29,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features,
struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
netdev_features_t features),
- __be16 new_protocol)
+ __be16 new_protocol, bool is_ipv6)
{
struct sk_buff *segs = ERR_PTR(-EINVAL);
u16 mac_offset = skb->mac_header;
@@ -39,7 +39,10 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t enc_features;
int udp_offset, outer_hlen;
unsigned int oldlen;
- bool need_csum;
+ bool need_csum = !!(skb_shinfo(skb)->gso_type &
+ SKB_GSO_UDP_TUNNEL_CSUM);
+ bool remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
+ bool offload_csum = false, dont_encap = (need_csum || remcsum);
oldlen = (u16)~skb->len;
@@ -52,10 +55,13 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
skb_set_network_header(skb, skb_inner_network_offset(skb));
skb->mac_len = skb_inner_network_offset(skb);
skb->protocol = new_protocol;
+ skb->encap_hdr_csum = need_csum;
+ skb->remcsum_offload = remcsum;
- need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
- if (need_csum)
- skb->encap_hdr_csum = 1;
+ /* Try to offload checksum if possible */
+ offload_csum = !!(need_csum &&
+ (skb->dev->features &
+ (is_ipv6 ? NETIF_F_V6_CSUM : NETIF_F_V4_CSUM)));
/* segment inner packet. */
enc_features = skb->dev->hw_enc_features & features;
@@ -72,11 +78,21 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
do {
struct udphdr *uh;
int len;
-
- skb_reset_inner_headers(skb);
- skb->encapsulation = 1;
+ __be32 delta;
+
+ if (dont_encap) {
+ skb->encapsulation = 0;
+ skb->ip_summed = CHECKSUM_NONE;
+ } else {
+ /* Only set up inner headers if we might be offloading
+ * inner checksum.
+ */
+ skb_reset_inner_headers(skb);
+ skb->encapsulation = 1;
+ }
skb->mac_len = mac_len;
+ skb->protocol = protocol;
skb_push(skb, outer_hlen);
skb_reset_mac_header(skb);
@@ -86,19 +102,36 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
uh = udp_hdr(skb);
uh->len = htons(len);
- if (need_csum) {
- __be32 delta = htonl(oldlen + len);
+ if (!need_csum)
+ continue;
- uh->check = ~csum_fold((__force __wsum)
- ((__force u32)uh->check +
- (__force u32)delta));
+ delta = htonl(oldlen + len);
+
+ uh->check = ~csum_fold((__force __wsum)
+ ((__force u32)uh->check +
+ (__force u32)delta));
+ if (offload_csum) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_transport_header(skb) - skb->head;
+ skb->csum_offset = offsetof(struct udphdr, check);
+ } else if (remcsum) {
+ /* Need to calculate checksum from scratch,
+ * inner checksums are never when doing
+ * remote_checksum_offload.
+ */
+
+ skb->csum = skb_checksum(skb, udp_offset,
+ skb->len - udp_offset,
+ 0);
+ uh->check = csum_fold(skb->csum);
+ if (uh->check == 0)
+ uh->check = CSUM_MANGLED_0;
+ } else {
uh->check = gso_make_checksum(skb, ~uh->check);
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
}
-
- skb->protocol = protocol;
} while ((skb = skb->next));
out:
return segs;
@@ -134,7 +167,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
}
segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
- protocol);
+ protocol, is_ipv6);
out_unlock:
rcu_read_unlock();
@@ -172,6 +205,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM |
SKB_GSO_IPIP |
SKB_GSO_GRE | SKB_GSO_GRE_CSUM |
SKB_GSO_MPLS) ||
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index a071563a7e6e..e9767079a360 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -78,6 +78,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
SKB_GSO_SIT |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM |
SKB_GSO_MPLS |
SKB_GSO_TCPV6 |
0)))
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 6b8f543f6ac6..637ba2e438b7 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -42,6 +42,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
SKB_GSO_DODGY |
SKB_GSO_UDP_TUNNEL |
SKB_GSO_UDP_TUNNEL_CSUM |
+ SKB_GSO_TUNNEL_REMCSUM |
SKB_GSO_GRE |
SKB_GSO_GRE_CSUM |
SKB_GSO_IPIP |