summaryrefslogtreecommitdiff
path: root/net/ipv4/ip_gre.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ip_gre.c')
-rw-r--r--net/ipv4/ip_gre.c1349
1 files changed, 1186 insertions, 163 deletions
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 1f6eab66f7ce..761a53c6a89a 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1,13 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Linux NET3: GRE over IP protocol decoder.
*
* Authors: Alexey Kuznetsov (kuznet@ms2.inr.ac.ru)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -17,14 +12,14 @@
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/slab.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
#include <linux/in.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/if_arp.h>
-#include <linux/mroute.h>
+#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/in6.h>
#include <linux/inetdevice.h>
@@ -33,6 +28,7 @@
#include <linux/etherdevice.h>
#include <linux/if_ether.h>
+#include <net/flow.h>
#include <net/sock.h>
#include <net/ip.h>
#include <net/icmp.h>
@@ -47,12 +43,8 @@
#include <net/netns/generic.h>
#include <net/rtnetlink.h>
#include <net/gre.h>
-
-#if IS_ENABLED(CONFIG_IPV6)
-#include <net/ipv6.h>
-#include <net/ip6_fib.h>
-#include <net/ip6_route.h>
-#endif
+#include <net/dst_metadata.h>
+#include <net/erspan.h>
/*
Problems & solutions
@@ -116,10 +108,16 @@ module_param(log_ecn_error, bool, 0644);
MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
static struct rtnl_link_ops ipgre_link_ops __read_mostly;
+static const struct header_ops ipgre_header_ops;
+
static int ipgre_tunnel_init(struct net_device *dev);
+static void erspan_build_header(struct sk_buff *skb,
+ u32 id, u32 index,
+ bool truncate, bool is_ipv4);
-static int ipgre_net_id __read_mostly;
-static int gre_tap_net_id __read_mostly;
+static unsigned int ipgre_net_id __read_mostly;
+static unsigned int gre_tap_net_id __read_mostly;
+static unsigned int erspan_net_id __read_mostly;
static int ipgre_err(struct sk_buff *skb, u32 info,
const struct tnl_ptk_info *tpi)
@@ -145,17 +143,32 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
const int code = icmp_hdr(skb)->code;
struct ip_tunnel *t;
+ if (tpi->proto == htons(ETH_P_TEB))
+ itn = net_generic(net, gre_tap_net_id);
+ else if (tpi->proto == htons(ETH_P_ERSPAN) ||
+ tpi->proto == htons(ETH_P_ERSPAN2))
+ itn = net_generic(net, erspan_net_id);
+ else
+ itn = net_generic(net, ipgre_net_id);
+
+ iph = (const struct iphdr *)(icmp_hdr(skb) + 1);
+ t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
+ iph->daddr, iph->saddr, tpi->key);
+
+ if (!t)
+ return -ENOENT;
+
switch (type) {
default:
case ICMP_PARAMETERPROB:
- return PACKET_RCVD;
+ return 0;
case ICMP_DEST_UNREACH:
switch (code) {
case ICMP_SR_FAILED:
case ICMP_PORT_UNREACH:
/* Impossible event. */
- return PACKET_RCVD;
+ return 0;
default:
/* All others are translated to HOST_UNREACH.
rfc2003 contains "deep thoughts" about NET_UNREACH,
@@ -164,63 +177,297 @@ static int ipgre_err(struct sk_buff *skb, u32 info,
break;
}
break;
+
case ICMP_TIME_EXCEEDED:
if (code != ICMP_EXC_TTL)
- return PACKET_RCVD;
+ return 0;
break;
case ICMP_REDIRECT:
break;
}
- if (tpi->proto == htons(ETH_P_TEB))
- itn = net_generic(net, gre_tap_net_id);
- else
- itn = net_generic(net, ipgre_net_id);
+#if IS_ENABLED(CONFIG_IPV6)
+ if (tpi->proto == htons(ETH_P_IPV6)) {
+ unsigned int data_len = 0;
- iph = (const struct iphdr *)skb->data;
- t = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
- iph->daddr, iph->saddr, tpi->key);
+ if (type == ICMP_TIME_EXCEEDED)
+ data_len = icmp_hdr(skb)->un.reserved[1] * 4; /* RFC 4884 4.1 */
- if (t == NULL)
- return PACKET_REJECT;
+ if (!ip6_err_gen_icmpv6_unreach(skb, iph->ihl * 4 + tpi->hdr_len,
+ type, data_len))
+ return 0;
+ }
+#endif
if (t->parms.iph.daddr == 0 ||
ipv4_is_multicast(t->parms.iph.daddr))
- return PACKET_RCVD;
+ return 0;
if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
- return PACKET_RCVD;
+ return 0;
if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
t->err_count++;
else
t->err_count = 1;
t->err_time = jiffies;
- return PACKET_RCVD;
+
+ return 0;
}
-static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi)
+static void gre_err(struct sk_buff *skb, u32 info)
+{
+ /* All the routers (except for Linux) return only
+ * 8 bytes of packet payload. It means, that precise relaying of
+ * ICMP in the real Internet is absolutely infeasible.
+ *
+ * Moreover, Cisco "wise men" put GRE key to the third word
+ * in GRE header. It makes impossible maintaining even soft
+ * state for keyed
+ * GRE tunnels with enabled checksum. Tell them "thank you".
+ *
+ * Well, I wonder, rfc1812 was written by Cisco employee,
+ * what the hell these idiots break standards established
+ * by themselves???
+ */
+
+ const struct iphdr *iph = (struct iphdr *)skb->data;
+ const int type = icmp_hdr(skb)->type;
+ const int code = icmp_hdr(skb)->code;
+ struct tnl_ptk_info tpi;
+
+ if (gre_parse_header(skb, &tpi, NULL, htons(ETH_P_IP),
+ iph->ihl * 4) < 0)
+ return;
+
+ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+ ipv4_update_pmtu(skb, dev_net(skb->dev), info,
+ skb->dev->ifindex, IPPROTO_GRE);
+ return;
+ }
+ if (type == ICMP_REDIRECT) {
+ ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex,
+ IPPROTO_GRE);
+ return;
+ }
+
+ ipgre_err(skb, info, &tpi);
+}
+
+static bool is_erspan_type1(int gre_hdr_len)
+{
+ /* Both ERSPAN type I (version 0) and type II (version 1) use
+ * protocol 0x88BE, but the type I has only 4-byte GRE header,
+ * while type II has 8-byte.
+ */
+ return gre_hdr_len == 4;
+}
+
+static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
+ int gre_hdr_len)
{
struct net *net = dev_net(skb->dev);
+ struct metadata_dst *tun_dst = NULL;
+ struct erspan_base_hdr *ershdr;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
struct ip_tunnel_net *itn;
- const struct iphdr *iph;
struct ip_tunnel *tunnel;
+ const struct iphdr *iph;
+ struct erspan_md2 *md2;
+ int ver;
+ int len;
- if (tpi->proto == htons(ETH_P_TEB))
- itn = net_generic(net, gre_tap_net_id);
- else
- itn = net_generic(net, ipgre_net_id);
+ ip_tunnel_flags_copy(flags, tpi->flags);
+
+ itn = net_generic(net, erspan_net_id);
+ iph = ip_hdr(skb);
+ if (is_erspan_type1(gre_hdr_len)) {
+ ver = 0;
+ __set_bit(IP_TUNNEL_NO_KEY_BIT, flags);
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
+ iph->saddr, iph->daddr, 0);
+ } else {
+ if (unlikely(!pskb_may_pull(skb,
+ gre_hdr_len + sizeof(*ershdr))))
+ return PACKET_REJECT;
+
+ ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
+ ver = ershdr->ver;
+ iph = ip_hdr(skb);
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, flags,
+ iph->saddr, iph->daddr, tpi->key);
+ }
+
+ if (tunnel) {
+ if (is_erspan_type1(gre_hdr_len))
+ len = gre_hdr_len;
+ else
+ len = gre_hdr_len + erspan_hdr_len(ver);
+
+ if (unlikely(!pskb_may_pull(skb, len)))
+ return PACKET_REJECT;
+
+ if (__iptunnel_pull_header(skb,
+ len,
+ htons(ETH_P_TEB),
+ false, false) < 0)
+ goto drop;
+
+ if (tunnel->collect_md) {
+ struct erspan_metadata *pkt_md, *md;
+ struct ip_tunnel_info *info;
+ unsigned char *gh;
+ __be64 tun_id;
+
+ __set_bit(IP_TUNNEL_KEY_BIT, tpi->flags);
+ ip_tunnel_flags_copy(flags, tpi->flags);
+ tun_id = key32_to_tunnel_id(tpi->key);
+
+ tun_dst = ip_tun_rx_dst(skb, flags,
+ tun_id, sizeof(*md));
+ if (!tun_dst)
+ return PACKET_REJECT;
+
+ /* skb can be uncloned in __iptunnel_pull_header, so
+ * old pkt_md is no longer valid and we need to reset
+ * it
+ */
+ gh = skb_network_header(skb) +
+ skb_network_header_len(skb);
+ pkt_md = (struct erspan_metadata *)(gh + gre_hdr_len +
+ sizeof(*ershdr));
+ md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+ md->version = ver;
+ md2 = &md->u.md2;
+ memcpy(md2, pkt_md, ver == 1 ? ERSPAN_V1_MDSIZE :
+ ERSPAN_V2_MDSIZE);
+
+ info = &tun_dst->u.tun_info;
+ __set_bit(IP_TUNNEL_ERSPAN_OPT_BIT,
+ info->key.tun_flags);
+ info->options_len = sizeof(*md);
+ }
+
+ skb_reset_mac_header(skb);
+ ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
+ return PACKET_RCVD;
+ }
+ return PACKET_REJECT;
+
+drop:
+ kfree_skb(skb);
+ return PACKET_RCVD;
+}
+
+static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+ struct ip_tunnel_net *itn, int hdr_len, bool raw_proto)
+{
+ struct metadata_dst *tun_dst = NULL;
+ const struct iphdr *iph;
+ struct ip_tunnel *tunnel;
iph = ip_hdr(skb);
tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, tpi->flags,
iph->saddr, iph->daddr, tpi->key);
if (tunnel) {
- ip_tunnel_rcv(tunnel, skb, tpi, log_ecn_error);
+ const struct iphdr *tnl_params;
+
+ if (__iptunnel_pull_header(skb, hdr_len, tpi->proto,
+ raw_proto, false) < 0)
+ goto drop;
+
+ /* Special case for ipgre_header_parse(), which expects the
+ * mac_header to point to the outer IP header.
+ */
+ if (tunnel->dev->header_ops == &ipgre_header_ops)
+ skb_pop_mac_header(skb);
+ else
+ skb_reset_mac_header(skb);
+
+ tnl_params = &tunnel->parms.iph;
+ if (tunnel->collect_md || tnl_params->daddr == 0) {
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
+ __be64 tun_id;
+
+ __set_bit(IP_TUNNEL_CSUM_BIT, flags);
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ ip_tunnel_flags_and(flags, tpi->flags, flags);
+
+ tun_id = key32_to_tunnel_id(tpi->key);
+ tun_dst = ip_tun_rx_dst(skb, flags, tun_id, 0);
+ if (!tun_dst)
+ return PACKET_REJECT;
+ }
+
+ ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error);
return PACKET_RCVD;
}
- return PACKET_REJECT;
+ return PACKET_NEXT;
+
+drop:
+ kfree_skb(skb);
+ return PACKET_RCVD;
+}
+
+static int ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi,
+ int hdr_len)
+{
+ struct net *net = dev_net(skb->dev);
+ struct ip_tunnel_net *itn;
+ int res;
+
+ if (tpi->proto == htons(ETH_P_TEB))
+ itn = net_generic(net, gre_tap_net_id);
+ else
+ itn = net_generic(net, ipgre_net_id);
+
+ res = __ipgre_rcv(skb, tpi, itn, hdr_len, false);
+ if (res == PACKET_NEXT && tpi->proto == htons(ETH_P_TEB)) {
+ /* ipgre tunnels in collect metadata mode should receive
+ * also ETH_P_TEB traffic.
+ */
+ itn = net_generic(net, ipgre_net_id);
+ res = __ipgre_rcv(skb, tpi, itn, hdr_len, true);
+ }
+ return res;
+}
+
+static int gre_rcv(struct sk_buff *skb)
+{
+ struct tnl_ptk_info tpi;
+ bool csum_err = false;
+ int hdr_len;
+
+#ifdef CONFIG_NET_IPGRE_BROADCAST
+ if (ipv4_is_multicast(ip_hdr(skb)->daddr)) {
+ /* Looped back packet, drop it! */
+ if (rt_is_output_route(skb_rtable(skb)))
+ goto drop;
+ }
+#endif
+
+ hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0);
+ if (hdr_len < 0)
+ goto drop;
+
+ if (unlikely(tpi.proto == htons(ETH_P_ERSPAN) ||
+ tpi.proto == htons(ETH_P_ERSPAN2))) {
+ if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
+ return 0;
+ goto out;
+ }
+
+ if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD)
+ return 0;
+
+out:
+ icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
+drop:
+ kfree_skb(skb);
+ return 0;
}
static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -228,43 +475,209 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
__be16 proto)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- struct tnl_ptk_info tpi;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
- tpi.flags = tunnel->parms.o_flags;
- tpi.proto = proto;
- tpi.key = tunnel->parms.o_key;
- if (tunnel->parms.o_flags & TUNNEL_SEQ)
- tunnel->o_seqno++;
- tpi.seq = htonl(tunnel->o_seqno);
+ ip_tunnel_flags_copy(flags, tunnel->parms.o_flags);
/* Push GRE header. */
- gre_build_header(skb, &tpi, tunnel->hlen);
+ gre_build_header(skb, tunnel->tun_hlen,
+ flags, proto, tunnel->parms.o_key,
+ test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
+static int gre_handle_offloads(struct sk_buff *skb, bool csum)
+{
+ return iptunnel_handle_offloads(skb, csum ? SKB_GSO_GRE_CSUM : SKB_GSO_GRE);
+}
+
+static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
+ __be16 proto)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+ int tunnel_hlen;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET))
+ goto err_free_skb;
+
+ key = &tun_info->key;
+ tunnel_hlen = gre_calc_hlen(key->tun_flags);
+
+ if (skb_cow_head(skb, dev->needed_headroom))
+ goto err_free_skb;
+
+ /* Push Tunnel header. */
+ if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ tunnel->parms.o_flags)))
+ goto err_free_skb;
+
+ __set_bit(IP_TUNNEL_CSUM_BIT, flags);
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ __set_bit(IP_TUNNEL_SEQ_BIT, flags);
+ ip_tunnel_flags_and(flags, tun_info->key.tun_flags, flags);
+
+ gre_build_header(skb, tunnel_hlen, flags, proto,
+ tunnel_id_to_key32(tun_info->key.tun_id),
+ test_bit(IP_TUNNEL_SEQ_BIT, flags) ?
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
+
+ ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
+
+ return;
+
+err_free_skb:
+ kfree_skb(skb);
+ DEV_STATS_INC(dev, tx_dropped);
+}
+
+static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
+ struct ip_tunnel_info *tun_info;
+ const struct ip_tunnel_key *key;
+ struct erspan_metadata *md;
+ bool truncate = false;
+ __be16 proto;
+ int tunnel_hlen;
+ int version;
+ int nhoff;
+
+ tun_info = skb_tunnel_info(skb);
+ if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
+ ip_tunnel_info_af(tun_info) != AF_INET))
+ goto err_free_skb;
+
+ key = &tun_info->key;
+ if (!test_bit(IP_TUNNEL_ERSPAN_OPT_BIT, tun_info->key.tun_flags))
+ goto err_free_skb;
+ if (tun_info->options_len < sizeof(*md))
+ goto err_free_skb;
+ md = ip_tunnel_info_opts(tun_info);
+
+ /* ERSPAN has fixed 8 byte GRE header */
+ version = md->version;
+ tunnel_hlen = 8 + erspan_hdr_len(version);
+
+ if (skb_cow_head(skb, dev->needed_headroom))
+ goto err_free_skb;
+
+ if (gre_handle_offloads(skb, false))
+ goto err_free_skb;
+
+ if (skb->len > dev->mtu + dev->hard_header_len) {
+ if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
+ goto err_free_skb;
+ truncate = true;
+ }
+
+ nhoff = skb_network_offset(skb);
+ if (skb->protocol == htons(ETH_P_IP) &&
+ (ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
+ truncate = true;
+
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ int thoff;
+
+ if (skb_transport_header_was_set(skb))
+ thoff = skb_transport_offset(skb);
+ else
+ thoff = nhoff + sizeof(struct ipv6hdr);
+ if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
+ truncate = true;
+ }
+
+ if (version == 1) {
+ erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
+ ntohl(md->u.index), truncate, true);
+ proto = htons(ETH_P_ERSPAN);
+ } else if (version == 2) {
+ erspan_build_header_v2(skb,
+ ntohl(tunnel_id_to_key32(key->tun_id)),
+ md->u.md2.dir,
+ get_hwid(&md->u.md2),
+ truncate, true);
+ proto = htons(ETH_P_ERSPAN2);
+ } else {
+ goto err_free_skb;
+ }
+
+ __set_bit(IP_TUNNEL_SEQ_BIT, flags);
+ gre_build_header(skb, 8, flags, proto, 0,
+ htonl(atomic_fetch_inc(&tunnel->o_seqno)));
+
+ ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
+
+ return;
+
+err_free_skb:
+ kfree_skb(skb);
+ DEV_STATS_INC(dev, tx_dropped);
+}
+
+static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
+{
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+ const struct ip_tunnel_key *key;
+ struct rtable *rt;
+ struct flowi4 fl4;
+
+ if (ip_tunnel_info_af(info) != AF_INET)
+ return -EINVAL;
+
+ key = &info->key;
+ ip_tunnel_init_flow(&fl4, IPPROTO_GRE, key->u.ipv4.dst, key->u.ipv4.src,
+ tunnel_id_to_key32(key->tun_id),
+ key->tos & ~INET_ECN_MASK, dev_net(dev), 0,
+ skb->mark, skb_get_hash(skb), key->flow_flags);
+ rt = ip_route_output_key(dev_net(dev), &fl4);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+
+ ip_rt_put(rt);
+ info->key.u.ipv4.src = fl4.saddr;
+ return 0;
+}
+
static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
const struct iphdr *tnl_params;
- skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
- if (IS_ERR(skb))
- goto out;
+ if (!pskb_inet_may_pull(skb))
+ goto free_skb;
+
+ if (tunnel->collect_md) {
+ gre_fb_xmit(skb, dev, skb->protocol);
+ return NETDEV_TX_OK;
+ }
if (dev->header_ops) {
- /* Need space for new headers */
- if (skb_cow_head(skb, dev->needed_headroom -
- (tunnel->hlen + sizeof(struct iphdr))))
+ int pull_len = tunnel->hlen + sizeof(struct iphdr);
+
+ if (skb_cow_head(skb, 0))
+ goto free_skb;
+
+ if (!pskb_may_pull(skb, pull_len))
goto free_skb;
tnl_params = (const struct iphdr *)skb->data;
- /* Pull skb since ip_tunnel_xmit() needs skb->data pointing
- * to gre header.
- */
- skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
+ /* ip_tunnel_xmit() needs skb->data pointing to gre header. */
+ skb_pull(skb, pull_len);
+ skb_reset_mac_header(skb);
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_start(skb) < skb->data)
+ goto free_skb;
} else {
if (skb_cow_head(skb, dev->needed_headroom))
goto free_skb;
@@ -272,14 +685,71 @@ static netdev_tx_t ipgre_xmit(struct sk_buff *skb,
tnl_params = &tunnel->parms.iph;
}
+ if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ tunnel->parms.o_flags)))
+ goto free_skb;
+
__gre_xmit(skb, dev, tnl_params, skb->protocol);
+ return NETDEV_TX_OK;
+free_skb:
+ kfree_skb(skb);
+ DEV_STATS_INC(dev, tx_dropped);
+ return NETDEV_TX_OK;
+}
+
+static netdev_tx_t erspan_xmit(struct sk_buff *skb,
+ struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ bool truncate = false;
+ __be16 proto;
+
+ if (!pskb_inet_may_pull(skb))
+ goto free_skb;
+
+ if (tunnel->collect_md) {
+ erspan_fb_xmit(skb, dev);
+ return NETDEV_TX_OK;
+ }
+
+ if (gre_handle_offloads(skb, false))
+ goto free_skb;
+
+ if (skb_cow_head(skb, dev->needed_headroom))
+ goto free_skb;
+
+ if (skb->len > dev->mtu + dev->hard_header_len) {
+ if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
+ goto free_skb;
+ truncate = true;
+ }
+
+ /* Push ERSPAN header */
+ if (tunnel->erspan_ver == 0) {
+ proto = htons(ETH_P_ERSPAN);
+ __clear_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags);
+ } else if (tunnel->erspan_ver == 1) {
+ erspan_build_header(skb, ntohl(tunnel->parms.o_key),
+ tunnel->index,
+ truncate, true);
+ proto = htons(ETH_P_ERSPAN);
+ } else if (tunnel->erspan_ver == 2) {
+ erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key),
+ tunnel->dir, tunnel->hwid,
+ truncate, true);
+ proto = htons(ETH_P_ERSPAN2);
+ } else {
+ goto free_skb;
+ }
+
+ __clear_bit(IP_TUNNEL_KEY_BIT, tunnel->parms.o_flags);
+ __gre_xmit(skb, dev, &tunnel->parms.iph, proto);
return NETDEV_TX_OK;
free_skb:
- dev_kfree_skb(skb);
-out:
- dev->stats.tx_dropped++;
+ kfree_skb(skb);
+ DEV_STATS_INC(dev, tx_dropped);
return NETDEV_TX_OK;
}
@@ -288,50 +758,102 @@ static netdev_tx_t gre_tap_xmit(struct sk_buff *skb,
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- skb = gre_handle_offloads(skb, !!(tunnel->parms.o_flags&TUNNEL_CSUM));
- if (IS_ERR(skb))
- goto out;
+ if (!pskb_inet_may_pull(skb))
+ goto free_skb;
+
+ if (tunnel->collect_md) {
+ gre_fb_xmit(skb, dev, htons(ETH_P_TEB));
+ return NETDEV_TX_OK;
+ }
+
+ if (gre_handle_offloads(skb, test_bit(IP_TUNNEL_CSUM_BIT,
+ tunnel->parms.o_flags)))
+ goto free_skb;
if (skb_cow_head(skb, dev->needed_headroom))
goto free_skb;
__gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_TEB));
-
return NETDEV_TX_OK;
free_skb:
- dev_kfree_skb(skb);
-out:
- dev->stats.tx_dropped++;
+ kfree_skb(skb);
+ DEV_STATS_INC(dev, tx_dropped);
return NETDEV_TX_OK;
}
-static int ipgre_tunnel_ioctl(struct net_device *dev,
- struct ifreq *ifr, int cmd)
+static void ipgre_link_update(struct net_device *dev, bool set_mtu)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+ int len;
+
+ len = tunnel->tun_hlen;
+ tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
+ len = tunnel->tun_hlen - len;
+ tunnel->hlen = tunnel->hlen + len;
+
+ if (dev->header_ops)
+ dev->hard_header_len += len;
+ else
+ dev->needed_headroom += len;
+
+ if (set_mtu)
+ WRITE_ONCE(dev->mtu, max_t(int, dev->mtu - len, 68));
+
+ if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags) ||
+ (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
+ tunnel->encap.type != TUNNEL_ENCAP_NONE)) {
+ dev->features &= ~NETIF_F_GSO_SOFTWARE;
+ dev->hw_features &= ~NETIF_F_GSO_SOFTWARE;
+ } else {
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ }
+}
+
+static int ipgre_tunnel_ctl(struct net_device *dev,
+ struct ip_tunnel_parm_kern *p,
+ int cmd)
{
- int err = 0;
- struct ip_tunnel_parm p;
+ __be16 i_flags, o_flags;
+ int err;
+
+ if (!ip_tunnel_flags_is_be16_compat(p->i_flags) ||
+ !ip_tunnel_flags_is_be16_compat(p->o_flags))
+ return -EOVERFLOW;
+
+ i_flags = ip_tunnel_flags_to_be16(p->i_flags);
+ o_flags = ip_tunnel_flags_to_be16(p->o_flags);
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
- return -EFAULT;
if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
- if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
- p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) ||
- ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)))
+ if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
+ p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
+ ((i_flags | o_flags) & (GRE_VERSION | GRE_ROUTING)))
return -EINVAL;
}
- p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
- p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
- err = ip_tunnel_ioctl(dev, &p, cmd);
+ gre_flags_to_tnl_flags(p->i_flags, i_flags);
+ gre_flags_to_tnl_flags(p->o_flags, o_flags);
+
+ err = ip_tunnel_ctl(dev, p, cmd);
if (err)
return err;
- p.i_flags = tnl_flags_to_gre_flags(p.i_flags);
- p.o_flags = tnl_flags_to_gre_flags(p.o_flags);
+ if (cmd == SIOCCHGTUNNEL) {
+ struct ip_tunnel *t = netdev_priv(dev);
+
+ ip_tunnel_flags_copy(t->parms.i_flags, p->i_flags);
+ ip_tunnel_flags_copy(t->parms.o_flags, p->o_flags);
+
+ if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
+ ipgre_link_update(dev, true);
+ }
+
+ i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
+ ip_tunnel_flags_from_be16(p->i_flags, i_flags);
+ o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
+ ip_tunnel_flags_from_be16(p->o_flags, o_flags);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
- return -EFAULT;
return 0;
}
@@ -370,9 +892,9 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
struct iphdr *iph;
struct gre_base_hdr *greh;
- iph = (struct iphdr *)skb_push(skb, t->hlen + sizeof(*iph));
+ iph = skb_push(skb, t->hlen + sizeof(*iph));
greh = (struct gre_base_hdr *)(iph+1);
- greh->flags = tnl_flags_to_gre_flags(t->parms.o_flags);
+ greh->flags = gre_tnl_flags_to_gre_flags(t->parms.o_flags);
greh->protocol = htons(type);
memcpy(iph, &t->parms.iph, sizeof(struct iphdr));
@@ -383,7 +905,7 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
if (daddr)
memcpy(&iph->daddr, daddr, 4);
if (iph->daddr)
- return t->hlen;
+ return t->hlen + sizeof(*iph);
return -(t->hlen + sizeof(*iph));
}
@@ -406,20 +928,23 @@ static int ipgre_open(struct net_device *dev)
struct ip_tunnel *t = netdev_priv(dev);
if (ipv4_is_multicast(t->parms.iph.daddr)) {
- struct flowi4 fl4;
+ struct flowi4 fl4 = {
+ .flowi4_oif = t->parms.link,
+ .flowi4_dscp = ip4h_dscp(&t->parms.iph),
+ .flowi4_scope = RT_SCOPE_UNIVERSE,
+ .flowi4_proto = IPPROTO_GRE,
+ .saddr = t->parms.iph.saddr,
+ .daddr = t->parms.iph.daddr,
+ .fl4_gre_key = t->parms.o_key,
+ };
struct rtable *rt;
- rt = ip_route_output_gre(dev_net(dev), &fl4,
- t->parms.iph.daddr,
- t->parms.iph.saddr,
- t->parms.o_key,
- RT_TOS(t->parms.iph.tos),
- t->parms.link);
+ rt = ip_route_output_key(t->net, &fl4);
if (IS_ERR(rt))
return -EADDRNOTAVAIL;
dev = rt->dst.dev;
ip_rt_put(rt);
- if (__in_dev_get_rtnl(dev) == NULL)
+ if (!__in_dev_get_rtnl(dev))
return -EADDRNOTAVAIL;
t->mlink = dev->ifindex;
ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
@@ -433,7 +958,7 @@ static int ipgre_close(struct net_device *dev)
if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
struct in_device *in_dev;
- in_dev = inetdev_by_index(dev_net(dev), t->mlink);
+ in_dev = inetdev_by_index(t->net, t->mlink);
if (in_dev)
ip_mc_dec_group(in_dev, t->parms.iph.daddr);
}
@@ -449,9 +974,11 @@ static const struct net_device_ops ipgre_netdev_ops = {
.ndo_stop = ipgre_close,
#endif
.ndo_start_xmit = ipgre_xmit,
- .ndo_do_ioctl = ipgre_tunnel_ioctl,
+ .ndo_siocdevprivate = ip_tunnel_siocdevprivate,
.ndo_change_mtu = ip_tunnel_change_mtu,
- .ndo_get_stats64 = ip_tunnel_get_stats64,
+ .ndo_get_stats64 = dev_get_tstats64,
+ .ndo_get_iflink = ip_tunnel_get_iflink,
+ .ndo_tunnel_ctl = ipgre_tunnel_ctl,
};
#define GRE_FEATURES (NETIF_F_SG | \
@@ -462,6 +989,7 @@ static const struct net_device_ops ipgre_netdev_ops = {
static void ipgre_tunnel_setup(struct net_device *dev)
{
dev->netdev_ops = &ipgre_netdev_ops;
+ dev->type = ARPHRD_IPGRE;
ip_tunnel_setup(dev, ipgre_net_id);
}
@@ -470,24 +998,28 @@ static void __gre_tunnel_init(struct net_device *dev)
struct ip_tunnel *tunnel;
tunnel = netdev_priv(dev);
- tunnel->hlen = ip_gre_calc_hlen(tunnel->parms.o_flags);
+ tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags);
tunnel->parms.iph.protocol = IPPROTO_GRE;
- dev->needed_headroom = LL_MAX_HEADER + sizeof(struct iphdr) + 4;
- dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 4;
+ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen;
+ dev->needed_headroom = tunnel->hlen + sizeof(tunnel->parms.iph);
- dev->features |= NETIF_F_NETNS_LOCAL | GRE_FEATURES;
+ dev->features |= GRE_FEATURES;
dev->hw_features |= GRE_FEATURES;
- if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) {
- /* TCP offload with GRE SEQ is not supported. */
- dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_GSO_SOFTWARE;
- /* Can use a lockless transmit, unless we generate
- * output sequences
- */
- dev->features |= NETIF_F_LLTX;
- }
+ /* TCP offload with GRE SEQ is not supported, nor can we support 2
+ * levels of outer headers requiring an update.
+ */
+ if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.o_flags))
+ return;
+ if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.o_flags) &&
+ tunnel->encap.type != TUNNEL_ENCAP_NONE)
+ return;
+
+ dev->features |= NETIF_F_GSO_SOFTWARE;
+ dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+
+ dev->lltx = true;
}
static int ipgre_tunnel_init(struct net_device *dev)
@@ -497,33 +1029,36 @@ static int ipgre_tunnel_init(struct net_device *dev)
__gre_tunnel_init(dev);
- memcpy(dev->dev_addr, &iph->saddr, 4);
+ __dev_addr_set(dev, &iph->saddr, 4);
memcpy(dev->broadcast, &iph->daddr, 4);
- dev->type = ARPHRD_IPGRE;
dev->flags = IFF_NOARP;
- dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+ netif_keep_dst(dev);
dev->addr_len = 4;
- if (iph->daddr) {
+ if (iph->daddr && !tunnel->collect_md) {
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(iph->daddr)) {
if (!iph->saddr)
return -EINVAL;
dev->flags = IFF_BROADCAST;
dev->header_ops = &ipgre_header_ops;
+ dev->hard_header_len = tunnel->hlen + sizeof(*iph);
+ dev->needed_headroom = 0;
}
#endif
- } else
+ } else if (!tunnel->collect_md) {
dev->header_ops = &ipgre_header_ops;
+ dev->hard_header_len = tunnel->hlen + sizeof(*iph);
+ dev->needed_headroom = 0;
+ }
return ip_tunnel_init(dev);
}
-static struct gre_cisco_protocol ipgre_protocol = {
- .handler = ipgre_rcv,
- .err_handler = ipgre_err,
- .priority = 0,
+static const struct gre_protocol ipgre_protocol = {
+ .handler = gre_rcv,
+ .err_handler = gre_err,
};
static int __net_init ipgre_init_net(struct net *net)
@@ -531,20 +1066,21 @@ static int __net_init ipgre_init_net(struct net *net)
return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL);
}
-static void __net_exit ipgre_exit_net(struct net *net)
+static void __net_exit ipgre_exit_rtnl(struct net *net,
+ struct list_head *dev_to_kill)
{
- struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id);
- ip_tunnel_delete_net(itn);
+ ip_tunnel_delete_net(net, ipgre_net_id, &ipgre_link_ops, dev_to_kill);
}
static struct pernet_operations ipgre_net_ops = {
.init = ipgre_init_net,
- .exit = ipgre_exit_net,
+ .exit_rtnl = ipgre_exit_rtnl,
.id = &ipgre_net_id,
.size = sizeof(struct ip_tunnel_net),
};
-static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
+static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
{
__be16 flags;
@@ -559,10 +1095,16 @@ static int ipgre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[])
if (flags & (GRE_VERSION|GRE_ROUTING))
return -EINVAL;
+ if (data[IFLA_GRE_COLLECT_METADATA] &&
+ data[IFLA_GRE_ENCAP_TYPE] &&
+ nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]) != TUNNEL_ENCAP_NONE)
+ return -EINVAL;
+
return 0;
}
-static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
+static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
{
__be32 daddr;
@@ -583,27 +1125,74 @@ static int ipgre_tap_validate(struct nlattr *tb[], struct nlattr *data[])
}
out:
- return ipgre_tunnel_validate(tb, data);
+ return ipgre_tunnel_validate(tb, data, extack);
+}
+
+static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ __be16 flags = 0;
+ int ret;
+
+ if (!data)
+ return 0;
+
+ ret = ipgre_tap_validate(tb, data, extack);
+ if (ret)
+ return ret;
+
+ if (data[IFLA_GRE_ERSPAN_VER] &&
+ nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
+ return 0;
+
+ /* ERSPAN type II/III should only have GRE sequence and key flag */
+ if (data[IFLA_GRE_OFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
+ if (data[IFLA_GRE_IFLAGS])
+ flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]);
+ if (!data[IFLA_GRE_COLLECT_METADATA] &&
+ flags != (GRE_SEQ | GRE_KEY))
+ return -EINVAL;
+
+ /* ERSPAN Session ID only has 10-bit. Since we reuse
+ * 32-bit key field as ID, check it's range.
+ */
+ if (data[IFLA_GRE_IKEY] &&
+ (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK))
+ return -EINVAL;
+
+ if (data[IFLA_GRE_OKEY] &&
+ (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK))
+ return -EINVAL;
+
+ return 0;
}
-static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
- struct ip_tunnel_parm *parms)
+static int ipgre_netlink_parms(struct net_device *dev,
+ struct nlattr *data[],
+ struct nlattr *tb[],
+ struct ip_tunnel_parm_kern *parms,
+ __u32 *fwmark)
{
+ struct ip_tunnel *t = netdev_priv(dev);
+
memset(parms, 0, sizeof(*parms));
parms->iph.protocol = IPPROTO_GRE;
if (!data)
- return;
+ return 0;
if (data[IFLA_GRE_LINK])
parms->link = nla_get_u32(data[IFLA_GRE_LINK]);
if (data[IFLA_GRE_IFLAGS])
- parms->i_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_IFLAGS]));
+ gre_flags_to_tnl_flags(parms->i_flags,
+ nla_get_be16(data[IFLA_GRE_IFLAGS]));
if (data[IFLA_GRE_OFLAGS])
- parms->o_flags = gre_flags_to_tnl_flags(nla_get_be16(data[IFLA_GRE_OFLAGS]));
+ gre_flags_to_tnl_flags(parms->o_flags,
+ nla_get_be16(data[IFLA_GRE_OFLAGS]));
if (data[IFLA_GRE_IKEY])
parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]);
@@ -612,10 +1201,10 @@ static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]);
if (data[IFLA_GRE_LOCAL])
- parms->iph.saddr = nla_get_be32(data[IFLA_GRE_LOCAL]);
+ parms->iph.saddr = nla_get_in_addr(data[IFLA_GRE_LOCAL]);
if (data[IFLA_GRE_REMOTE])
- parms->iph.daddr = nla_get_be32(data[IFLA_GRE_REMOTE]);
+ parms->iph.daddr = nla_get_in_addr(data[IFLA_GRE_REMOTE]);
if (data[IFLA_GRE_TTL])
parms->iph.ttl = nla_get_u8(data[IFLA_GRE_TTL]);
@@ -623,13 +1212,114 @@ static void ipgre_netlink_parms(struct nlattr *data[], struct nlattr *tb[],
if (data[IFLA_GRE_TOS])
parms->iph.tos = nla_get_u8(data[IFLA_GRE_TOS]);
- if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC]))
+ if (!data[IFLA_GRE_PMTUDISC] || nla_get_u8(data[IFLA_GRE_PMTUDISC])) {
+ if (t->ignore_df)
+ return -EINVAL;
parms->iph.frag_off = htons(IP_DF);
+ }
+
+ if (data[IFLA_GRE_COLLECT_METADATA]) {
+ t->collect_md = true;
+ if (dev->type == ARPHRD_IPGRE)
+ dev->type = ARPHRD_NONE;
+ }
+
+ if (data[IFLA_GRE_IGNORE_DF]) {
+ if (nla_get_u8(data[IFLA_GRE_IGNORE_DF])
+ && (parms->iph.frag_off & htons(IP_DF)))
+ return -EINVAL;
+ t->ignore_df = !!nla_get_u8(data[IFLA_GRE_IGNORE_DF]);
+ }
+
+ if (data[IFLA_GRE_FWMARK])
+ *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]);
+
+ return 0;
+}
+
+static int erspan_netlink_parms(struct net_device *dev,
+ struct nlattr *data[],
+ struct nlattr *tb[],
+ struct ip_tunnel_parm_kern *parms,
+ __u32 *fwmark)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err;
+
+ err = ipgre_netlink_parms(dev, data, tb, parms, fwmark);
+ if (err)
+ return err;
+ if (!data)
+ return 0;
+
+ if (data[IFLA_GRE_ERSPAN_VER]) {
+ t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
+
+ if (t->erspan_ver > 2)
+ return -EINVAL;
+ }
+
+ if (t->erspan_ver == 1) {
+ if (data[IFLA_GRE_ERSPAN_INDEX]) {
+ t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]);
+ if (t->index & ~INDEX_MASK)
+ return -EINVAL;
+ }
+ } else if (t->erspan_ver == 2) {
+ if (data[IFLA_GRE_ERSPAN_DIR]) {
+ t->dir = nla_get_u8(data[IFLA_GRE_ERSPAN_DIR]);
+ if (t->dir & ~(DIR_MASK >> DIR_OFFSET))
+ return -EINVAL;
+ }
+ if (data[IFLA_GRE_ERSPAN_HWID]) {
+ t->hwid = nla_get_u16(data[IFLA_GRE_ERSPAN_HWID]);
+ if (t->hwid & ~(HWID_MASK >> HWID_OFFSET))
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+/* This function returns true when ENCAP attributes are present in the nl msg */
+static bool ipgre_netlink_encap_parms(struct nlattr *data[],
+ struct ip_tunnel_encap *ipencap)
+{
+ bool ret = false;
+
+ memset(ipencap, 0, sizeof(*ipencap));
+
+ if (!data)
+ return ret;
+
+ if (data[IFLA_GRE_ENCAP_TYPE]) {
+ ret = true;
+ ipencap->type = nla_get_u16(data[IFLA_GRE_ENCAP_TYPE]);
+ }
+
+ if (data[IFLA_GRE_ENCAP_FLAGS]) {
+ ret = true;
+ ipencap->flags = nla_get_u16(data[IFLA_GRE_ENCAP_FLAGS]);
+ }
+
+ if (data[IFLA_GRE_ENCAP_SPORT]) {
+ ret = true;
+ ipencap->sport = nla_get_be16(data[IFLA_GRE_ENCAP_SPORT]);
+ }
+
+ if (data[IFLA_GRE_ENCAP_DPORT]) {
+ ret = true;
+ ipencap->dport = nla_get_be16(data[IFLA_GRE_ENCAP_DPORT]);
+ }
+
+ return ret;
}
static int gre_tap_init(struct net_device *dev)
{
__gre_tunnel_init(dev);
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netif_keep_dst(dev);
return ip_tunnel_init(dev);
}
@@ -641,32 +1331,166 @@ static const struct net_device_ops gre_tap_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_validate_addr = eth_validate_addr,
.ndo_change_mtu = ip_tunnel_change_mtu,
- .ndo_get_stats64 = ip_tunnel_get_stats64,
+ .ndo_get_stats64 = dev_get_tstats64,
+ .ndo_get_iflink = ip_tunnel_get_iflink,
+ .ndo_fill_metadata_dst = gre_fill_metadata_dst,
+};
+
+static int erspan_tunnel_init(struct net_device *dev)
+{
+ struct ip_tunnel *tunnel = netdev_priv(dev);
+
+ if (tunnel->erspan_ver == 0)
+ tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
+ else
+ tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
+
+ tunnel->parms.iph.protocol = IPPROTO_GRE;
+ tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
+ erspan_hdr_len(tunnel->erspan_ver);
+
+ dev->features |= GRE_FEATURES;
+ dev->hw_features |= GRE_FEATURES;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ netif_keep_dst(dev);
+
+ return ip_tunnel_init(dev);
+}
+
+static const struct net_device_ops erspan_netdev_ops = {
+ .ndo_init = erspan_tunnel_init,
+ .ndo_uninit = ip_tunnel_uninit,
+ .ndo_start_xmit = erspan_xmit,
+ .ndo_set_mac_address = eth_mac_addr,
+ .ndo_validate_addr = eth_validate_addr,
+ .ndo_change_mtu = ip_tunnel_change_mtu,
+ .ndo_get_stats64 = dev_get_tstats64,
+ .ndo_get_iflink = ip_tunnel_get_iflink,
+ .ndo_fill_metadata_dst = gre_fill_metadata_dst,
};
static void ipgre_tap_setup(struct net_device *dev)
{
ether_setup(dev);
- dev->netdev_ops = &gre_tap_netdev_ops;
+ dev->max_mtu = 0;
+ dev->netdev_ops = &gre_tap_netdev_ops;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
ip_tunnel_setup(dev, gre_tap_net_id);
}
-static int ipgre_newlink(struct net *src_net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[])
+static int
+ipgre_newlink_encap_setup(struct net_device *dev, struct nlattr *data[])
+{
+ struct ip_tunnel_encap ipencap;
+
+ if (ipgre_netlink_encap_parms(data, &ipencap)) {
+ struct ip_tunnel *t = netdev_priv(dev);
+ int err = ip_tunnel_encap_setup(t, &ipencap);
+
+ if (err < 0)
+ return err;
+ }
+
+ return 0;
+}
+
+static int ipgre_newlink(struct net_device *dev,
+ struct rtnl_newlink_params *params,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr **data = params->data;
+ struct nlattr **tb = params->tb;
+ struct ip_tunnel_parm_kern p;
+ __u32 fwmark = 0;
+ int err;
+
+ err = ipgre_newlink_encap_setup(dev, data);
+ if (err)
+ return err;
+
+ err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
+ if (err < 0)
+ return err;
+ return ip_tunnel_newlink(params->link_net ? : dev_net(dev), dev, tb, &p,
+ fwmark);
+}
+
+static int erspan_newlink(struct net_device *dev,
+ struct rtnl_newlink_params *params,
+ struct netlink_ext_ack *extack)
{
- struct ip_tunnel_parm p;
+ struct nlattr **data = params->data;
+ struct nlattr **tb = params->tb;
+ struct ip_tunnel_parm_kern p;
+ __u32 fwmark = 0;
+ int err;
- ipgre_netlink_parms(data, tb, &p);
- return ip_tunnel_newlink(dev, tb, &p);
+ err = ipgre_newlink_encap_setup(dev, data);
+ if (err)
+ return err;
+
+ err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
+ if (err)
+ return err;
+ return ip_tunnel_newlink(params->link_net ? : dev_net(dev), dev, tb, &p,
+ fwmark);
}
static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[],
- struct nlattr *data[])
+ struct nlattr *data[],
+ struct netlink_ext_ack *extack)
{
- struct ip_tunnel_parm p;
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_parm_kern p;
+ __u32 fwmark = t->fwmark;
+ int err;
+
+ err = ipgre_newlink_encap_setup(dev, data);
+ if (err)
+ return err;
+
+ err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark);
+ if (err < 0)
+ return err;
+
+ err = ip_tunnel_changelink(dev, tb, &p, fwmark);
+ if (err < 0)
+ return err;
+
+ ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
+ ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
+
+ ipgre_link_update(dev, !tb[IFLA_MTU]);
+
+ return 0;
+}
+
+static int erspan_changelink(struct net_device *dev, struct nlattr *tb[],
+ struct nlattr *data[],
+ struct netlink_ext_ack *extack)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+ struct ip_tunnel_parm_kern p;
+ __u32 fwmark = t->fwmark;
+ int err;
- ipgre_netlink_parms(data, tb, &p);
- return ip_tunnel_changelink(dev, tb, &p);
+ err = ipgre_newlink_encap_setup(dev, data);
+ if (err)
+ return err;
+
+ err = erspan_netlink_parms(dev, data, tb, &p, &fwmark);
+ if (err < 0)
+ return err;
+
+ err = ip_tunnel_changelink(dev, tb, &p, fwmark);
+ if (err < 0)
+ return err;
+
+ ip_tunnel_flags_copy(t->parms.i_flags, p.i_flags);
+ ip_tunnel_flags_copy(t->parms.o_flags, p.o_flags);
+
+ return 0;
}
static size_t ipgre_get_size(const struct net_device *dev)
@@ -692,43 +1516,142 @@ static size_t ipgre_get_size(const struct net_device *dev)
nla_total_size(1) +
/* IFLA_GRE_PMTUDISC */
nla_total_size(1) +
+ /* IFLA_GRE_ENCAP_TYPE */
+ nla_total_size(2) +
+ /* IFLA_GRE_ENCAP_FLAGS */
+ nla_total_size(2) +
+ /* IFLA_GRE_ENCAP_SPORT */
+ nla_total_size(2) +
+ /* IFLA_GRE_ENCAP_DPORT */
+ nla_total_size(2) +
+ /* IFLA_GRE_COLLECT_METADATA */
+ nla_total_size(0) +
+ /* IFLA_GRE_IGNORE_DF */
+ nla_total_size(1) +
+ /* IFLA_GRE_FWMARK */
+ nla_total_size(4) +
+ /* IFLA_GRE_ERSPAN_INDEX */
+ nla_total_size(4) +
+ /* IFLA_GRE_ERSPAN_VER */
+ nla_total_size(1) +
+ /* IFLA_GRE_ERSPAN_DIR */
+ nla_total_size(1) +
+ /* IFLA_GRE_ERSPAN_HWID */
+ nla_total_size(2) +
0;
}
static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct ip_tunnel *t = netdev_priv(dev);
- struct ip_tunnel_parm *p = &t->parms;
+ struct ip_tunnel_parm_kern *p = &t->parms;
+ IP_TUNNEL_DECLARE_FLAGS(o_flags);
+
+ ip_tunnel_flags_copy(o_flags, p->o_flags);
if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) ||
- nla_put_be16(skb, IFLA_GRE_IFLAGS, tnl_flags_to_gre_flags(p->i_flags)) ||
- nla_put_be16(skb, IFLA_GRE_OFLAGS, tnl_flags_to_gre_flags(p->o_flags)) ||
+ nla_put_be16(skb, IFLA_GRE_IFLAGS,
+ gre_tnl_flags_to_gre_flags(p->i_flags)) ||
+ nla_put_be16(skb, IFLA_GRE_OFLAGS,
+ gre_tnl_flags_to_gre_flags(o_flags)) ||
nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) ||
nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) ||
- nla_put_be32(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
- nla_put_be32(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
+ nla_put_in_addr(skb, IFLA_GRE_LOCAL, p->iph.saddr) ||
+ nla_put_in_addr(skb, IFLA_GRE_REMOTE, p->iph.daddr) ||
nla_put_u8(skb, IFLA_GRE_TTL, p->iph.ttl) ||
nla_put_u8(skb, IFLA_GRE_TOS, p->iph.tos) ||
nla_put_u8(skb, IFLA_GRE_PMTUDISC,
- !!(p->iph.frag_off & htons(IP_DF))))
+ !!(p->iph.frag_off & htons(IP_DF))) ||
+ nla_put_u32(skb, IFLA_GRE_FWMARK, t->fwmark))
goto nla_put_failure;
+
+ if (nla_put_u16(skb, IFLA_GRE_ENCAP_TYPE,
+ t->encap.type) ||
+ nla_put_be16(skb, IFLA_GRE_ENCAP_SPORT,
+ t->encap.sport) ||
+ nla_put_be16(skb, IFLA_GRE_ENCAP_DPORT,
+ t->encap.dport) ||
+ nla_put_u16(skb, IFLA_GRE_ENCAP_FLAGS,
+ t->encap.flags))
+ goto nla_put_failure;
+
+ if (nla_put_u8(skb, IFLA_GRE_IGNORE_DF, t->ignore_df))
+ goto nla_put_failure;
+
+ if (t->collect_md) {
+ if (nla_put_flag(skb, IFLA_GRE_COLLECT_METADATA))
+ goto nla_put_failure;
+ }
+
return 0;
nla_put_failure:
return -EMSGSIZE;
}
+static int erspan_fill_info(struct sk_buff *skb, const struct net_device *dev)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+
+ if (t->erspan_ver <= 2) {
+ if (t->erspan_ver != 0 && !t->collect_md)
+ __set_bit(IP_TUNNEL_KEY_BIT, t->parms.o_flags);
+
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
+ goto nla_put_failure;
+
+ if (t->erspan_ver == 1) {
+ if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
+ goto nla_put_failure;
+ } else if (t->erspan_ver == 2) {
+ if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
+ goto nla_put_failure;
+ if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
+ goto nla_put_failure;
+ }
+ }
+
+ return ipgre_fill_info(skb, dev);
+
+nla_put_failure:
+ return -EMSGSIZE;
+}
+
+static void erspan_setup(struct net_device *dev)
+{
+ struct ip_tunnel *t = netdev_priv(dev);
+
+ ether_setup(dev);
+ dev->max_mtu = 0;
+ dev->netdev_ops = &erspan_netdev_ops;
+ dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+ dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
+ ip_tunnel_setup(dev, erspan_net_id);
+ t->erspan_ver = 1;
+}
+
static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = {
[IFLA_GRE_LINK] = { .type = NLA_U32 },
[IFLA_GRE_IFLAGS] = { .type = NLA_U16 },
[IFLA_GRE_OFLAGS] = { .type = NLA_U16 },
[IFLA_GRE_IKEY] = { .type = NLA_U32 },
[IFLA_GRE_OKEY] = { .type = NLA_U32 },
- [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct iphdr, saddr) },
- [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+ [IFLA_GRE_LOCAL] = { .len = sizeof_field(struct iphdr, saddr) },
+ [IFLA_GRE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
[IFLA_GRE_TTL] = { .type = NLA_U8 },
[IFLA_GRE_TOS] = { .type = NLA_U8 },
[IFLA_GRE_PMTUDISC] = { .type = NLA_U8 },
+ [IFLA_GRE_ENCAP_TYPE] = { .type = NLA_U16 },
+ [IFLA_GRE_ENCAP_FLAGS] = { .type = NLA_U16 },
+ [IFLA_GRE_ENCAP_SPORT] = { .type = NLA_U16 },
+ [IFLA_GRE_ENCAP_DPORT] = { .type = NLA_U16 },
+ [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG },
+ [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 },
+ [IFLA_GRE_FWMARK] = { .type = NLA_U32 },
+ [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 },
+ [IFLA_GRE_ERSPAN_VER] = { .type = NLA_U8 },
+ [IFLA_GRE_ERSPAN_DIR] = { .type = NLA_U8 },
+ [IFLA_GRE_ERSPAN_HWID] = { .type = NLA_U16 },
};
static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
@@ -743,6 +1666,7 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
.dellink = ip_tunnel_dellink,
.get_size = ipgre_get_size,
.fill_info = ipgre_fill_info,
+ .get_link_net = ip_tunnel_get_link_net,
};
static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
@@ -757,26 +1681,108 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
.dellink = ip_tunnel_dellink,
.get_size = ipgre_get_size,
.fill_info = ipgre_fill_info,
+ .get_link_net = ip_tunnel_get_link_net,
+};
+
+static struct rtnl_link_ops erspan_link_ops __read_mostly = {
+ .kind = "erspan",
+ .maxtype = IFLA_GRE_MAX,
+ .policy = ipgre_policy,
+ .priv_size = sizeof(struct ip_tunnel),
+ .setup = erspan_setup,
+ .validate = erspan_validate,
+ .newlink = erspan_newlink,
+ .changelink = erspan_changelink,
+ .dellink = ip_tunnel_dellink,
+ .get_size = ipgre_get_size,
+ .fill_info = erspan_fill_info,
+ .get_link_net = ip_tunnel_get_link_net,
};
+struct net_device *gretap_fb_dev_create(struct net *net, const char *name,
+ u8 name_assign_type)
+{
+ struct rtnl_newlink_params params = { .src_net = net };
+ struct nlattr *tb[IFLA_MAX + 1];
+ struct net_device *dev;
+ LIST_HEAD(list_kill);
+ struct ip_tunnel *t;
+ int err;
+
+ memset(&tb, 0, sizeof(tb));
+ params.tb = tb;
+
+ dev = rtnl_create_link(net, name, name_assign_type,
+ &ipgre_tap_ops, tb, NULL);
+ if (IS_ERR(dev))
+ return dev;
+
+ /* Configure flow based GRE device. */
+ t = netdev_priv(dev);
+ t->collect_md = true;
+
+ err = ipgre_newlink(dev, &params, NULL);
+ if (err < 0) {
+ free_netdev(dev);
+ return ERR_PTR(err);
+ }
+
+ /* openvswitch users expect packet sizes to be unrestricted,
+ * so set the largest MTU we can.
+ */
+ err = __ip_tunnel_change_mtu(dev, IP_MAX_MTU, false);
+ if (err)
+ goto out;
+
+ err = rtnl_configure_link(dev, NULL, 0, NULL);
+ if (err < 0)
+ goto out;
+
+ return dev;
+out:
+ ip_tunnel_dellink(dev, &list_kill);
+ unregister_netdevice_many(&list_kill);
+ return ERR_PTR(err);
+}
+EXPORT_SYMBOL_GPL(gretap_fb_dev_create);
+
static int __net_init ipgre_tap_init_net(struct net *net)
{
- return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, NULL);
+ return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0");
}
-static void __net_exit ipgre_tap_exit_net(struct net *net)
+static void __net_exit ipgre_tap_exit_rtnl(struct net *net,
+ struct list_head *dev_to_kill)
{
- struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id);
- ip_tunnel_delete_net(itn);
+ ip_tunnel_delete_net(net, gre_tap_net_id, &ipgre_tap_ops, dev_to_kill);
}
static struct pernet_operations ipgre_tap_net_ops = {
.init = ipgre_tap_init_net,
- .exit = ipgre_tap_exit_net,
+ .exit_rtnl = ipgre_tap_exit_rtnl,
.id = &gre_tap_net_id,
.size = sizeof(struct ip_tunnel_net),
};
+static int __net_init erspan_init_net(struct net *net)
+{
+ return ip_tunnel_init_net(net, erspan_net_id,
+ &erspan_link_ops, "erspan0");
+}
+
+static void __net_exit erspan_exit_rtnl(struct net *net,
+ struct list_head *dev_to_kill)
+{
+ ip_tunnel_delete_net(net, erspan_net_id, &erspan_link_ops, dev_to_kill);
+}
+
+static struct pernet_operations erspan_net_ops = {
+ .init = erspan_init_net,
+ .exit_rtnl = erspan_exit_rtnl,
+ .id = &erspan_net_id,
+ .size = sizeof(struct ip_tunnel_net),
+};
+
static int __init ipgre_init(void)
{
int err;
@@ -789,9 +1795,13 @@ static int __init ipgre_init(void)
err = register_pernet_device(&ipgre_tap_net_ops);
if (err < 0)
- goto pnet_tap_faied;
+ goto pnet_tap_failed;
+
+ err = register_pernet_device(&erspan_net_ops);
+ if (err < 0)
+ goto pnet_erspan_failed;
- err = gre_cisco_register(&ipgre_protocol);
+ err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO);
if (err < 0) {
pr_info("%s: can't add protocol\n", __func__);
goto add_proto_failed;
@@ -805,15 +1815,23 @@ static int __init ipgre_init(void)
if (err < 0)
goto tap_ops_failed;
+ err = rtnl_link_register(&erspan_link_ops);
+ if (err < 0)
+ goto erspan_link_failed;
+
return 0;
+erspan_link_failed:
+ rtnl_link_unregister(&ipgre_tap_ops);
tap_ops_failed:
rtnl_link_unregister(&ipgre_link_ops);
rtnl_link_failed:
- gre_cisco_unregister(&ipgre_protocol);
+ gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
add_proto_failed:
+ unregister_pernet_device(&erspan_net_ops);
+pnet_erspan_failed:
unregister_pernet_device(&ipgre_tap_net_ops);
-pnet_tap_faied:
+pnet_tap_failed:
unregister_pernet_device(&ipgre_net_ops);
return err;
}
@@ -822,15 +1840,20 @@ static void __exit ipgre_fini(void)
{
rtnl_link_unregister(&ipgre_tap_ops);
rtnl_link_unregister(&ipgre_link_ops);
- gre_cisco_unregister(&ipgre_protocol);
+ rtnl_link_unregister(&erspan_link_ops);
+ gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO);
unregister_pernet_device(&ipgre_tap_net_ops);
unregister_pernet_device(&ipgre_net_ops);
+ unregister_pernet_device(&erspan_net_ops);
}
module_init(ipgre_init);
module_exit(ipgre_fini);
+MODULE_DESCRIPTION("IPv4 GRE tunnels over IP library");
MODULE_LICENSE("GPL");
MODULE_ALIAS_RTNL_LINK("gre");
MODULE_ALIAS_RTNL_LINK("gretap");
+MODULE_ALIAS_RTNL_LINK("erspan");
MODULE_ALIAS_NETDEV("gre0");
MODULE_ALIAS_NETDEV("gretap0");
+MODULE_ALIAS_NETDEV("erspan0");