summaryrefslogtreecommitdiff
path: root/net/ipv4/ip_tunnel.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/ip_tunnel.c')
-rw-r--r--net/ipv4/ip_tunnel.c516
1 files changed, 313 insertions, 203 deletions
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index c4f5602308ed..158a30ae7c5f 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -1,19 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2013 Nicira, Inc.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
- * 02110-1301, USA
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -53,9 +40,11 @@
#include <net/xfrm.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
+#include <net/netdev_lock.h>
#include <net/rtnetlink.h>
#include <net/udp.h>
#include <net/dst_metadata.h>
+#include <net/inet_dscp.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
@@ -69,17 +58,13 @@ static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)
IP_TNL_HASH_BITS);
}
-static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
- __be16 flags, __be32 key)
+static bool ip_tunnel_key_match(const struct ip_tunnel_parm_kern *p,
+ const unsigned long *flags, __be32 key)
{
- if (p->i_flags & TUNNEL_KEY) {
- if (flags & TUNNEL_KEY)
- return key == p->i_key;
- else
- /* key expected, none present */
- return false;
- } else
- return !(flags & TUNNEL_KEY);
+ if (!test_bit(IP_TUNNEL_KEY_BIT, flags))
+ return !test_bit(IP_TUNNEL_KEY_BIT, p->i_flags);
+
+ return test_bit(IP_TUNNEL_KEY_BIT, p->i_flags) && p->i_key == key;
}
/* Fallback tunnel: no source, no destination, no key, no options
@@ -94,13 +79,14 @@ static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
Given src, dst and key, find appropriate for input tunnel.
*/
struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
- int link, __be16 flags,
+ int link, const unsigned long *flags,
__be32 remote, __be32 local,
__be32 key)
{
- unsigned int hash;
struct ip_tunnel *t, *cand = NULL;
struct hlist_head *head;
+ struct net_device *ndev;
+ unsigned int hash;
hash = ip_tunnel_hash(key, remote);
head = &itn->tunnels[hash];
@@ -114,10 +100,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
if (!ip_tunnel_key_match(&t->parms, flags, key))
continue;
- if (t->parms.link == link)
+ if (READ_ONCE(t->parms.link) == link)
return t;
- else
- cand = t;
+ cand = t;
}
hlist_for_each_entry_rcu(t, head, hash_node) {
@@ -129,9 +114,9 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
if (!ip_tunnel_key_match(&t->parms, flags, key))
continue;
- if (t->parms.link == link)
+ if (READ_ONCE(t->parms.link) == link)
return t;
- else if (!cand)
+ if (!cand)
cand = t;
}
@@ -149,29 +134,26 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
if (!ip_tunnel_key_match(&t->parms, flags, key))
continue;
- if (t->parms.link == link)
+ if (READ_ONCE(t->parms.link) == link)
return t;
- else if (!cand)
+ if (!cand)
cand = t;
}
- if (flags & TUNNEL_NO_KEY)
- goto skip_key_lookup;
-
hlist_for_each_entry_rcu(t, head, hash_node) {
- if (t->parms.i_key != key ||
+ if ((!test_bit(IP_TUNNEL_NO_KEY_BIT, flags) &&
+ t->parms.i_key != key) ||
t->parms.iph.saddr != 0 ||
t->parms.iph.daddr != 0 ||
!(t->dev->flags & IFF_UP))
continue;
- if (t->parms.link == link)
+ if (READ_ONCE(t->parms.link) == link)
return t;
- else if (!cand)
+ if (!cand)
cand = t;
}
-skip_key_lookup:
if (cand)
return cand;
@@ -179,15 +161,16 @@ skip_key_lookup:
if (t && t->dev->flags & IFF_UP)
return t;
- if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
- return netdev_priv(itn->fb_tunnel_dev);
+ ndev = READ_ONCE(itn->fb_tunnel_dev);
+ if (ndev && ndev->flags & IFF_UP)
+ return netdev_priv(ndev);
return NULL;
}
EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm_kern *parms)
{
unsigned int h;
__be32 remote;
@@ -198,7 +181,8 @@ static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
else
remote = 0;
- if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI))
+ if (!test_bit(IP_TUNNEL_KEY_BIT, parms->i_flags) &&
+ test_bit(IP_TUNNEL_VTI_BIT, parms->i_flags))
i_key = 0;
h = ip_tunnel_hash(i_key, remote);
@@ -222,21 +206,23 @@ static void ip_tunnel_del(struct ip_tunnel_net *itn, struct ip_tunnel *t)
}
static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
- struct ip_tunnel_parm *parms,
+ struct ip_tunnel_parm_kern *parms,
int type)
{
__be32 remote = parms->iph.daddr;
__be32 local = parms->iph.saddr;
+ IP_TUNNEL_DECLARE_FLAGS(flags);
__be32 key = parms->i_key;
- __be16 flags = parms->i_flags;
int link = parms->link;
struct ip_tunnel *t = NULL;
struct hlist_head *head = ip_bucket(itn, parms);
- hlist_for_each_entry_rcu(t, head, hash_node) {
+ ip_tunnel_flags_copy(flags, parms->i_flags);
+
+ hlist_for_each_entry_rcu(t, head, hash_node, lockdep_rtnl_is_held()) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
- link == t->parms.link &&
+ link == READ_ONCE(t->parms.link) &&
type == t->dev->type &&
ip_tunnel_key_match(&t->parms, flags, key))
break;
@@ -246,7 +232,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
static struct net_device *__ip_tunnel_create(struct net *net,
const struct rtnl_link_ops *ops,
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm_kern *parms)
{
int err;
struct ip_tunnel *tunnel;
@@ -257,11 +243,11 @@ static struct net_device *__ip_tunnel_create(struct net *net,
if (parms->name[0]) {
if (!dev_valid_name(parms->name))
goto failed;
- strlcpy(name, parms->name, IFNAMSIZ);
+ strscpy(name, parms->name);
} else {
if (strlen(ops->kind) > (IFNAMSIZ - 3))
goto failed;
- strcpy(name, ops->kind);
+ strscpy(name, ops->kind);
strcat(name, "%d");
}
@@ -309,8 +295,8 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
ip_tunnel_init_flow(&fl4, iph->protocol, iph->daddr,
iph->saddr, tunnel->parms.o_key,
- RT_TOS(iph->tos), tunnel->parms.link,
- tunnel->fwmark);
+ iph->tos & INET_DSCP_MASK, tunnel->net,
+ tunnel->parms.link, tunnel->fwmark, 0, 0);
rt = ip_route_output_key(tunnel->net, &fl4);
if (!IS_ERR(rt)) {
@@ -332,7 +318,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
}
dev->needed_headroom = t_hlen + hlen;
- mtu -= (dev->hard_header_len + t_hlen);
+ mtu -= t_hlen + (dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0);
if (mtu < IPV4_MIN_MTU)
mtu = IPV4_MIN_MTU;
@@ -342,7 +328,7 @@ static int ip_tunnel_bind_dev(struct net_device *dev)
static struct ip_tunnel *ip_tunnel_create(struct net *net,
struct ip_tunnel_net *itn,
- struct ip_tunnel_parm *parms)
+ struct ip_tunnel_parm_kern *parms)
{
struct ip_tunnel *nt;
struct net_device *dev;
@@ -362,7 +348,10 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,
nt = netdev_priv(dev);
t_hlen = nt->hlen + sizeof(struct iphdr);
dev->min_mtu = ETH_MIN_MTU;
- dev->max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
+ dev->max_mtu = IP_MAX_MTU - t_hlen;
+ if (dev->type == ARPHRD_ETHER)
+ dev->max_mtu -= dev->hard_header_len;
+
ip_tunnel_add(itn, nt);
return nt;
@@ -371,39 +360,65 @@ err_dev_set_mtu:
return ERR_PTR(err);
}
+void ip_tunnel_md_udp_encap(struct sk_buff *skb, struct ip_tunnel_info *info)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct udphdr *udph;
+
+ if (iph->protocol != IPPROTO_UDP)
+ return;
+
+ udph = (struct udphdr *)((__u8 *)iph + (iph->ihl << 2));
+ info->encap.sport = udph->source;
+ info->encap.dport = udph->dest;
+}
+EXPORT_SYMBOL(ip_tunnel_md_udp_encap);
+
int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
const struct tnl_ptk_info *tpi, struct metadata_dst *tun_dst,
bool log_ecn_error)
{
- struct pcpu_sw_netstats *tstats;
const struct iphdr *iph = ip_hdr(skb);
- int err;
+ int nh, err;
#ifdef CONFIG_NET_IPGRE_BROADCAST
if (ipv4_is_multicast(iph->daddr)) {
- tunnel->dev->stats.multicast++;
+ DEV_STATS_INC(tunnel->dev, multicast);
skb->pkt_type = PACKET_BROADCAST;
}
#endif
- if ((!(tpi->flags&TUNNEL_CSUM) && (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
- ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
- tunnel->dev->stats.rx_crc_errors++;
- tunnel->dev->stats.rx_errors++;
+ if (test_bit(IP_TUNNEL_CSUM_BIT, tunnel->parms.i_flags) !=
+ test_bit(IP_TUNNEL_CSUM_BIT, tpi->flags)) {
+ DEV_STATS_INC(tunnel->dev, rx_crc_errors);
+ DEV_STATS_INC(tunnel->dev, rx_errors);
goto drop;
}
- if (tunnel->parms.i_flags&TUNNEL_SEQ) {
- if (!(tpi->flags&TUNNEL_SEQ) ||
+ if (test_bit(IP_TUNNEL_SEQ_BIT, tunnel->parms.i_flags)) {
+ if (!test_bit(IP_TUNNEL_SEQ_BIT, tpi->flags) ||
(tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
- tunnel->dev->stats.rx_fifo_errors++;
- tunnel->dev->stats.rx_errors++;
+ DEV_STATS_INC(tunnel->dev, rx_fifo_errors);
+ DEV_STATS_INC(tunnel->dev, rx_errors);
goto drop;
}
tunnel->i_seqno = ntohl(tpi->seq) + 1;
}
- skb_reset_network_header(skb);
+ /* Save offset of outer header relative to skb->head,
+ * because we are going to reset the network header to the inner header
+ * and might change skb->head.
+ */
+ nh = skb_network_header(skb) - skb->head;
+
+ skb_set_network_header(skb, (tunnel->dev->type == ARPHRD_ETHER) ? ETH_HLEN : 0);
+
+ if (!pskb_inet_may_pull(skb)) {
+ DEV_STATS_INC(tunnel->dev, rx_length_errors);
+ DEV_STATS_INC(tunnel->dev, rx_errors);
+ goto drop;
+ }
+ iph = (struct iphdr *)(skb->head + nh);
err = IP_ECN_decapsulate(iph, skb);
if (unlikely(err)) {
@@ -411,18 +426,13 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
&iph->saddr, iph->tos);
if (err > 1) {
- ++tunnel->dev->stats.rx_frame_errors;
- ++tunnel->dev->stats.rx_errors;
+ DEV_STATS_INC(tunnel->dev, rx_frame_errors);
+ DEV_STATS_INC(tunnel->dev, rx_errors);
goto drop;
}
}
- tstats = this_cpu_ptr(tunnel->dev->tstats);
- u64_stats_update_begin(&tstats->syncp);
- tstats->rx_packets++;
- tstats->rx_bytes += skb->len;
- u64_stats_update_end(&tstats->syncp);
-
+ dev_sw_netstats_rx_add(tunnel->dev, skb->len);
skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev)));
if (tunnel->dev->type == ARPHRD_ETHER) {
@@ -501,37 +511,47 @@ EXPORT_SYMBOL_GPL(ip_tunnel_encap_setup);
static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
struct rtable *rt, __be16 df,
- const struct iphdr *inner_iph)
+ const struct iphdr *inner_iph,
+ int tunnel_hlen, __be32 dst, bool md)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- int pkt_size = skb->len - tunnel->hlen - dev->hard_header_len;
+ int pkt_size;
int mtu;
- if (df)
- mtu = dst_mtu(&rt->dst) - dev->hard_header_len
- - sizeof(struct iphdr) - tunnel->hlen;
- else
- mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+ tunnel_hlen = md ? tunnel_hlen : tunnel->hlen;
+ pkt_size = skb->len - tunnel_hlen;
+ pkt_size -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
+
+ if (df) {
+ mtu = dst_mtu(&rt->dst) - (sizeof(struct iphdr) + tunnel_hlen);
+ mtu -= dev->type == ARPHRD_ETHER ? dev->hard_header_len : 0;
+ } else {
+ mtu = skb_valid_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
+ }
- skb_dst_update_pmtu(skb, mtu);
+ if (skb_valid_dst(skb))
+ skb_dst_update_pmtu_no_confirm(skb, mtu);
if (skb->protocol == htons(ETH_P_IP)) {
if (!skb_is_gso(skb) &&
(inner_iph->frag_off & htons(IP_DF)) &&
mtu < pkt_size) {
- memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
- icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+ icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
return -E2BIG;
}
}
#if IS_ENABLED(CONFIG_IPV6)
else if (skb->protocol == htons(ETH_P_IPV6)) {
- struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
+ struct rt6_info *rt6;
+ __be32 daddr;
+
+ rt6 = skb_valid_dst(skb) ? dst_rt6_info(skb_dst(skb)) :
+ NULL;
+ daddr = md ? dst : tunnel->parms.iph.daddr;
if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
mtu >= IPV6_MIN_MTU) {
- if ((tunnel->parms.iph.daddr &&
- !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
+ if ((daddr && !ipv4_is_multicast(daddr)) ||
rt6->rt6i_dst.plen == 128) {
rt6->rt6i_flags |= RTF_MODIFIED;
dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
@@ -540,7 +560,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
mtu < pkt_size) {
- icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+ icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
return -E2BIG;
}
}
@@ -548,17 +568,19 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
return 0;
}
-void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
+void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
+ u8 proto, int tunnel_hlen)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
u32 headroom = sizeof(struct iphdr);
struct ip_tunnel_info *tun_info;
const struct ip_tunnel_key *key;
const struct iphdr *inner_iph;
- struct rtable *rt;
+ struct rtable *rt = NULL;
struct flowi4 fl4;
__be16 df = 0;
u8 tos, ttl;
+ bool use_cache;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
@@ -574,20 +596,44 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
else if (skb->protocol == htons(ETH_P_IPV6))
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
}
- ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src, 0,
- RT_TOS(tos), tunnel->parms.link, tunnel->fwmark);
- if (tunnel->encap.type != TUNNEL_ENCAP_NONE)
- goto tx_error;
- rt = ip_route_output_key(tunnel->net, &fl4);
- if (IS_ERR(rt)) {
- dev->stats.tx_carrier_errors++;
+ ip_tunnel_init_flow(&fl4, proto, key->u.ipv4.dst, key->u.ipv4.src,
+ tunnel_id_to_key32(key->tun_id),
+ tos & INET_DSCP_MASK, tunnel->net, 0, skb->mark,
+ skb_get_hash(skb), key->flow_flags);
+
+ if (!tunnel_hlen)
+ tunnel_hlen = ip_encap_hlen(&tun_info->encap);
+
+ if (ip_tunnel_encap(skb, &tun_info->encap, &proto, &fl4) < 0)
goto tx_error;
+
+ use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
+ if (use_cache)
+ rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl4.saddr);
+ if (!rt) {
+ rt = ip_route_output_key(tunnel->net, &fl4);
+ if (IS_ERR(rt)) {
+ DEV_STATS_INC(dev, tx_carrier_errors);
+ goto tx_error;
+ }
+ if (use_cache)
+ dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
+ fl4.saddr);
}
if (rt->dst.dev == dev) {
ip_rt_put(rt);
- dev->stats.collisions++;
+ DEV_STATS_INC(dev, collisions);
goto tx_error;
}
+
+ if (test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags))
+ df = htons(IP_DF);
+ if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, tunnel_hlen,
+ key->u.ipv4.dst, true)) {
+ ip_rt_put(rt);
+ goto tx_error;
+ }
+
tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
ttl = key->ttl;
if (ttl == 0) {
@@ -598,26 +644,23 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto)
else
ttl = ip4_dst_hoplimit(&rt->dst);
}
- if (key->tun_flags & TUNNEL_DONT_FRAGMENT)
- df = htons(IP_DF);
- else if (skb->protocol == htons(ETH_P_IP))
- df = inner_iph->frag_off & htons(IP_DF);
- headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
- if (headroom > dev->needed_headroom)
- dev->needed_headroom = headroom;
- if (skb_cow_head(skb, dev->needed_headroom)) {
+ headroom += LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len;
+ if (skb_cow_head(skb, headroom)) {
ip_rt_put(rt);
goto tx_dropped;
}
+
+ ip_tunnel_adj_headroom(dev, headroom);
+
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl,
- df, !net_eq(tunnel->net, dev_net(dev)));
+ df, !net_eq(tunnel->net, dev_net(dev)), 0);
return;
tx_error:
- dev->stats.tx_errors++;
+ DEV_STATS_INC(dev, tx_errors);
goto kfree;
tx_dropped:
- dev->stats.tx_dropped++;
+ DEV_STATS_INC(dev, tx_dropped);
kfree:
kfree_skb(skb);
}
@@ -627,17 +670,22 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
const struct iphdr *tnl_params, u8 protocol)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
+ struct ip_tunnel_info *tun_info = NULL;
const struct iphdr *inner_iph;
- struct flowi4 fl4;
- u8 tos, ttl;
- __be16 df;
- struct rtable *rt; /* Route to the other host */
unsigned int max_headroom; /* The extra header space needed */
- __be32 dst;
+ struct rtable *rt = NULL; /* Route to the other host */
+ __be16 payload_protocol;
+ bool use_cache = false;
+ struct flowi4 fl4;
+ bool md = false;
bool connected;
+ u8 tos, ttl;
+ __be32 dst;
+ __be16 df;
inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
connected = (tunnel->parms.iph.daddr != 0);
+ payload_protocol = skb_protocol(skb, true);
memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
@@ -646,16 +694,23 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
/* NBMA tunnel */
if (!skb_dst(skb)) {
- dev->stats.tx_fifo_errors++;
+ DEV_STATS_INC(dev, tx_fifo_errors);
goto tx_error;
}
- if (skb->protocol == htons(ETH_P_IP)) {
+ tun_info = skb_tunnel_info(skb);
+ if (tun_info && (tun_info->mode & IP_TUNNEL_INFO_TX) &&
+ ip_tunnel_info_af(tun_info) == AF_INET &&
+ tun_info->key.u.ipv4.dst) {
+ dst = tun_info->key.u.ipv4.dst;
+ md = true;
+ connected = true;
+ } else if (payload_protocol == htons(ETH_P_IP)) {
rt = skb_rtable(skb);
dst = rt_nexthop(rt, inner_iph->daddr);
}
#if IS_ENABLED(CONFIG_IPV6)
- else if (skb->protocol == htons(ETH_P_IPV6)) {
+ else if (payload_protocol == htons(ETH_P_IPV6)) {
const struct in6_addr *addr6;
struct neighbour *neigh;
bool do_tx_error_icmp;
@@ -688,50 +743,66 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
else
goto tx_error;
- connected = false;
+ if (!md)
+ connected = false;
}
tos = tnl_params->tos;
if (tos & 0x1) {
tos &= ~0x1;
- if (skb->protocol == htons(ETH_P_IP)) {
+ if (payload_protocol == htons(ETH_P_IP)) {
tos = inner_iph->tos;
connected = false;
- } else if (skb->protocol == htons(ETH_P_IPV6)) {
+ } else if (payload_protocol == htons(ETH_P_IPV6)) {
tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
connected = false;
}
}
ip_tunnel_init_flow(&fl4, protocol, dst, tnl_params->saddr,
- tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link,
- tunnel->fwmark);
+ tunnel->parms.o_key, tos & INET_DSCP_MASK,
+ tunnel->net, READ_ONCE(tunnel->parms.link),
+ tunnel->fwmark, skb_get_hash(skb), 0);
- if (ip_tunnel_encap(skb, tunnel, &protocol, &fl4) < 0)
+ if (ip_tunnel_encap(skb, &tunnel->encap, &protocol, &fl4) < 0)
goto tx_error;
- rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache, &fl4.saddr) :
- NULL;
+ if (connected && md) {
+ use_cache = ip_tunnel_dst_cache_usable(skb, tun_info);
+ if (use_cache)
+ rt = dst_cache_get_ip4(&tun_info->dst_cache,
+ &fl4.saddr);
+ } else {
+ rt = connected ? dst_cache_get_ip4(&tunnel->dst_cache,
+ &fl4.saddr) : NULL;
+ }
if (!rt) {
rt = ip_route_output_key(tunnel->net, &fl4);
if (IS_ERR(rt)) {
- dev->stats.tx_carrier_errors++;
+ DEV_STATS_INC(dev, tx_carrier_errors);
goto tx_error;
}
- if (connected)
+ if (use_cache)
+ dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst,
+ fl4.saddr);
+ else if (!md && connected)
dst_cache_set_ip4(&tunnel->dst_cache, &rt->dst,
fl4.saddr);
}
if (rt->dst.dev == dev) {
ip_rt_put(rt);
- dev->stats.collisions++;
+ DEV_STATS_INC(dev, collisions);
goto tx_error;
}
- if (tnl_update_pmtu(dev, skb, rt, tnl_params->frag_off, inner_iph)) {
+ df = tnl_params->frag_off;
+ if (payload_protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
+ df |= (inner_iph->frag_off & htons(IP_DF));
+
+ if (tnl_update_pmtu(dev, skb, rt, df, inner_iph, 0, 0, false)) {
ip_rt_put(rt);
goto tx_error;
}
@@ -749,34 +820,30 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);
ttl = tnl_params->ttl;
if (ttl == 0) {
- if (skb->protocol == htons(ETH_P_IP))
+ if (payload_protocol == htons(ETH_P_IP))
ttl = inner_iph->ttl;
#if IS_ENABLED(CONFIG_IPV6)
- else if (skb->protocol == htons(ETH_P_IPV6))
+ else if (payload_protocol == htons(ETH_P_IPV6))
ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
#endif
else
ttl = ip4_dst_hoplimit(&rt->dst);
}
- df = tnl_params->frag_off;
- if (skb->protocol == htons(ETH_P_IP) && !tunnel->ignore_df)
- df |= (inner_iph->frag_off&htons(IP_DF));
-
max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)
+ rt->dst.header_len + ip_encap_hlen(&tunnel->encap);
- if (max_headroom > dev->needed_headroom)
- dev->needed_headroom = max_headroom;
- if (skb_cow_head(skb, dev->needed_headroom)) {
+ if (skb_cow_head(skb, max_headroom)) {
ip_rt_put(rt);
- dev->stats.tx_dropped++;
+ DEV_STATS_INC(dev, tx_dropped);
kfree_skb(skb);
return;
}
+ ip_tunnel_adj_headroom(dev, max_headroom);
+
iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl,
- df, !net_eq(tunnel->net, dev_net(dev)));
+ df, !net_eq(tunnel->net, dev_net(dev)), 0);
return;
#if IS_ENABLED(CONFIG_IPV6)
@@ -784,7 +851,7 @@ tx_error_icmp:
dst_link_failure(skb);
#endif
tx_error:
- dev->stats.tx_errors++;
+ DEV_STATS_INC(dev, tx_errors);
kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
@@ -792,7 +859,7 @@ EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
static void ip_tunnel_update(struct ip_tunnel_net *itn,
struct ip_tunnel *t,
struct net_device *dev,
- struct ip_tunnel_parm *p,
+ struct ip_tunnel_parm_kern *p,
bool set_mtu,
__u32 fwmark)
{
@@ -802,7 +869,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
t->parms.i_key = p->i_key;
t->parms.o_key = p->o_key;
if (dev->type != ARPHRD_ETHER) {
- memcpy(dev->dev_addr, &p->iph.saddr, 4);
+ __dev_addr_set(dev, &p->iph.saddr, 4);
memcpy(dev->broadcast, &p->iph.daddr, 4);
}
ip_tunnel_add(itn, t);
@@ -814,17 +881,18 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
if (t->parms.link != p->link || t->fwmark != fwmark) {
int mtu;
- t->parms.link = p->link;
+ WRITE_ONCE(t->parms.link, p->link);
t->fwmark = fwmark;
mtu = ip_tunnel_bind_dev(dev);
if (set_mtu)
- dev->mtu = mtu;
+ WRITE_ONCE(dev->mtu, mtu);
}
dst_cache_reset(&t->dst_cache);
netdev_state_change(dev);
}
-int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm_kern *p,
+ int cmd)
{
int err = 0;
struct ip_tunnel *t = netdev_priv(dev);
@@ -848,10 +916,10 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
goto done;
if (p->iph.ttl)
p->iph.frag_off |= htons(IP_DF);
- if (!(p->i_flags & VTI_ISVTI)) {
- if (!(p->i_flags & TUNNEL_KEY))
+ if (!test_bit(IP_TUNNEL_VTI_BIT, p->i_flags)) {
+ if (!test_bit(IP_TUNNEL_KEY_BIT, p->i_flags))
p->i_key = 0;
- if (!(p->o_flags & TUNNEL_KEY))
+ if (!test_bit(IP_TUNNEL_KEY_BIT, p->o_flags))
p->o_key = 0;
}
@@ -924,13 +992,73 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
done:
return err;
}
-EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
+EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
+
+bool ip_tunnel_parm_from_user(struct ip_tunnel_parm_kern *kp,
+ const void __user *data)
+{
+ struct ip_tunnel_parm p;
+
+ if (copy_from_user(&p, data, sizeof(p)))
+ return false;
+
+ strscpy(kp->name, p.name);
+ kp->link = p.link;
+ ip_tunnel_flags_from_be16(kp->i_flags, p.i_flags);
+ ip_tunnel_flags_from_be16(kp->o_flags, p.o_flags);
+ kp->i_key = p.i_key;
+ kp->o_key = p.o_key;
+ memcpy(&kp->iph, &p.iph, min(sizeof(kp->iph), sizeof(p.iph)));
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_parm_from_user);
+
+bool ip_tunnel_parm_to_user(void __user *data, struct ip_tunnel_parm_kern *kp)
+{
+ struct ip_tunnel_parm p;
+
+ if (!ip_tunnel_flags_is_be16_compat(kp->i_flags) ||
+ !ip_tunnel_flags_is_be16_compat(kp->o_flags))
+ return false;
+
+ memset(&p, 0, sizeof(p));
+
+ strscpy(p.name, kp->name);
+ p.link = kp->link;
+ p.i_flags = ip_tunnel_flags_to_be16(kp->i_flags);
+ p.o_flags = ip_tunnel_flags_to_be16(kp->o_flags);
+ p.i_key = kp->i_key;
+ p.o_key = kp->o_key;
+ memcpy(&p.iph, &kp->iph, min(sizeof(p.iph), sizeof(kp->iph)));
+
+ return !copy_to_user(data, &p, sizeof(p));
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_parm_to_user);
+
+int ip_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
+{
+ struct ip_tunnel_parm_kern p;
+ int err;
+
+ if (!ip_tunnel_parm_from_user(&p, data))
+ return -EFAULT;
+ err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
+ if (!err && !ip_tunnel_parm_to_user(data, &p))
+ return -EFAULT;
+ return err;
+}
+EXPORT_SYMBOL_GPL(ip_tunnel_siocdevprivate);
int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
int t_hlen = tunnel->hlen + sizeof(struct iphdr);
- int max_mtu = IP_MAX_MTU - dev->hard_header_len - t_hlen;
+ int max_mtu = IP_MAX_MTU - t_hlen;
+
+ if (dev->type == ARPHRD_ETHER)
+ max_mtu -= dev->hard_header_len;
if (new_mtu < ETH_MIN_MTU)
return -EINVAL;
@@ -942,7 +1070,7 @@ int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
new_mtu = max_mtu;
}
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
EXPORT_SYMBOL_GPL(__ip_tunnel_change_mtu);
@@ -959,7 +1087,6 @@ static void ip_tunnel_dev_free(struct net_device *dev)
gro_cells_destroy(&tunnel->gro_cells);
dst_cache_destroy(&tunnel->dst_cache);
- free_percpu(dev->tstats);
}
void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
@@ -980,15 +1107,15 @@ struct net *ip_tunnel_get_link_net(const struct net_device *dev)
{
struct ip_tunnel *tunnel = netdev_priv(dev);
- return tunnel->net;
+ return READ_ONCE(tunnel->net);
}
EXPORT_SYMBOL(ip_tunnel_get_link_net);
int ip_tunnel_get_iflink(const struct net_device *dev)
{
- struct ip_tunnel *tunnel = netdev_priv(dev);
+ const struct ip_tunnel *tunnel = netdev_priv(dev);
- return tunnel->parms.link;
+ return READ_ONCE(tunnel->parms.link);
}
EXPORT_SYMBOL(ip_tunnel_get_iflink);
@@ -996,7 +1123,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
struct rtnl_link_ops *ops, char *devname)
{
struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
- struct ip_tunnel_parm parms;
+ struct ip_tunnel_parm_kern parms;
unsigned int i;
itn->rtnl_link_ops = ops;
@@ -1014,7 +1141,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
memset(&parms, 0, sizeof(parms));
if (devname)
- strlcpy(parms.name, devname, IFNAMSIZ);
+ strscpy(parms.name, devname, IFNAMSIZ);
rtnl_lock();
itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
@@ -1022,7 +1149,7 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
* Allowing to move it to another netns is clearly unsafe.
*/
if (!IS_ERR(itn->fb_tunnel_dev)) {
- itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL;
+ itn->fb_tunnel_dev->netns_immutable = true;
itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev);
ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev));
itn->type = itn->fb_tunnel_dev->type;
@@ -1033,13 +1160,16 @@ int ip_tunnel_init_net(struct net *net, unsigned int ip_tnl_net_id,
}
EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
-static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
- struct list_head *head,
- struct rtnl_link_ops *ops)
+void ip_tunnel_delete_net(struct net *net, unsigned int id,
+ struct rtnl_link_ops *ops,
+ struct list_head *head)
{
+ struct ip_tunnel_net *itn = net_generic(net, id);
struct net_device *dev, *aux;
int h;
+ ASSERT_RTNL_NET(net);
+
for_each_netdev_safe(net, dev, aux)
if (dev->rtnl_link_ops == ops)
unregister_netdevice_queue(dev, head);
@@ -1057,29 +1187,13 @@ static void ip_tunnel_destroy(struct net *net, struct ip_tunnel_net *itn,
unregister_netdevice_queue(t->dev, head);
}
}
+EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
-void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id,
- struct rtnl_link_ops *ops)
-{
- struct ip_tunnel_net *itn;
- struct net *net;
- LIST_HEAD(list);
-
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list) {
- itn = net_generic(net, id);
- ip_tunnel_destroy(net, itn, &list, ops);
- }
- unregister_netdevice_many(&list);
- rtnl_unlock();
-}
-EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets);
-
-int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm *p, __u32 fwmark)
+int ip_tunnel_newlink(struct net *net, struct net_device *dev,
+ struct nlattr *tb[], struct ip_tunnel_parm_kern *p,
+ __u32 fwmark)
{
struct ip_tunnel *nt;
- struct net *net = dev_net(dev);
struct ip_tunnel_net *itn;
int mtu;
int err;
@@ -1107,10 +1221,12 @@ int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
mtu = ip_tunnel_bind_dev(dev);
if (tb[IFLA_MTU]) {
- unsigned int max = IP_MAX_MTU - dev->hard_header_len - nt->hlen;
+ unsigned int max = IP_MAX_MTU - (nt->hlen + sizeof(struct iphdr));
- mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU,
- (unsigned int)(max - sizeof(struct iphdr)));
+ if (dev->type == ARPHRD_ETHER)
+ max -= dev->hard_header_len;
+
+ mtu = clamp(dev->mtu, (unsigned int)ETH_MIN_MTU, max);
}
err = dev_set_mtu(dev, mtu);
@@ -1128,7 +1244,7 @@ err_register_netdevice:
EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
- struct ip_tunnel_parm *p, __u32 fwmark)
+ struct ip_tunnel_parm_kern *p, __u32 fwmark)
{
struct ip_tunnel *t;
struct ip_tunnel *tunnel = netdev_priv(dev);
@@ -1173,33 +1289,26 @@ int ip_tunnel_init(struct net_device *dev)
dev->needs_free_netdev = true;
dev->priv_destructor = ip_tunnel_dev_free;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_TSTATS;
err = dst_cache_init(&tunnel->dst_cache, GFP_KERNEL);
- if (err) {
- free_percpu(dev->tstats);
+ if (err)
return err;
- }
err = gro_cells_init(&tunnel->gro_cells, dev);
if (err) {
dst_cache_destroy(&tunnel->dst_cache);
- free_percpu(dev->tstats);
return err;
}
tunnel->dev = dev;
- tunnel->net = dev_net(dev);
- strcpy(tunnel->parms.name, dev->name);
+ strscpy(tunnel->parms.name, dev->name);
iph->version = 4;
iph->ihl = 5;
- if (tunnel->collect_md) {
- dev->features |= NETIF_F_NETNS_LOCAL;
+ if (tunnel->collect_md)
netif_keep_dst(dev);
- }
+ netdev_lockdep_set_classes(dev);
return 0;
}
EXPORT_SYMBOL_GPL(ip_tunnel_init);
@@ -1211,9 +1320,9 @@ void ip_tunnel_uninit(struct net_device *dev)
struct ip_tunnel_net *itn;
itn = net_generic(net, tunnel->ip_tnl_net_id);
- /* fb_tunnel_dev will be unregisted in net-exit call. */
- if (itn->fb_tunnel_dev != dev)
- ip_tunnel_del(itn, netdev_priv(dev));
+ ip_tunnel_del(itn, netdev_priv(dev));
+ if (itn->fb_tunnel_dev == dev)
+ WRITE_ONCE(itn->fb_tunnel_dev, NULL);
dst_cache_reset(&tunnel->dst_cache);
}
@@ -1227,4 +1336,5 @@ void ip_tunnel_setup(struct net_device *dev, unsigned int net_id)
}
EXPORT_SYMBOL_GPL(ip_tunnel_setup);
+MODULE_DESCRIPTION("IPv4 tunnel implementation library");
MODULE_LICENSE("GPL");