summaryrefslogtreecommitdiff
path: root/drivers/net/geneve.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/geneve.c')
-rw-r--r--drivers/net/geneve.c844
1 files changed, 478 insertions, 366 deletions
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 58bbba8582b0..77b0c3d52041 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -1,29 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* GENEVE: Generic Network Virtualization Encapsulation
*
* Copyright (c) 2015 Red Hat, Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/ethtool.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/etherdevice.h>
#include <linux/hash.h>
+#include <net/ipv6_stubs.h>
#include <net/dst_metadata.h>
#include <net/gro_cells.h>
#include <net/rtnetlink.h>
#include <net/geneve.h>
+#include <net/gro.h>
+#include <net/netdev_lock.h>
#include <net/protocol.h>
#define GENEVE_NETDEV_VER "0.6"
-#define GENEVE_UDP_PORT 6081
-
#define GENEVE_N_VID (1u << 24)
#define GENEVE_VID_MASK (GENEVE_N_VID - 1)
@@ -42,6 +41,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
/* per-network namespace private data for this module */
struct geneve_net {
struct list_head geneve_list;
+ /* sock_list is protected by rtnl lock */
struct list_head sock_list;
};
@@ -52,6 +52,19 @@ struct geneve_dev_node {
struct geneve_dev *geneve;
};
+struct geneve_config {
+ bool collect_md;
+ bool use_udp6_rx_checksums;
+ bool ttl_inherit;
+ enum ifla_geneve_df df;
+ bool inner_proto_inherit;
+ u16 port_min;
+ u16 port_max;
+
+ /* Must be last --ends in a flexible-array member. */
+ struct ip_tunnel_info info;
+};
+
/* Pseudo network device */
struct geneve_dev {
struct geneve_dev_node hlist4; /* vni hash table for IPv4 socket */
@@ -60,17 +73,13 @@ struct geneve_dev {
#endif
struct net *net; /* netns for packet i/o */
struct net_device *dev; /* netdev for geneve tunnel */
- struct ip_tunnel_info info;
struct geneve_sock __rcu *sock4; /* IPv4 socket used for geneve tunnel */
#if IS_ENABLED(CONFIG_IPV6)
struct geneve_sock __rcu *sock6; /* IPv6 socket used for geneve tunnel */
#endif
struct list_head next; /* geneve's per namespace list */
struct gro_cells gro_cells;
- bool collect_md;
- bool use_udp6_rx_checksums;
- bool ttl_inherit;
- enum ifla_geneve_df df;
+ struct geneve_config cfg;
};
struct geneve_sock {
@@ -136,8 +145,8 @@ static struct geneve_dev *geneve_lookup(struct geneve_sock *gs,
hash = geneve_net_vni_hash(vni);
vni_list_head = &gs->vni_list[hash];
hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
- if (eq_tun_id_and_vni((u8 *)&node->geneve->info.key.tun_id, vni) &&
- addr == node->geneve->info.key.u.ipv4.dst)
+ if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) &&
+ addr == node->geneve->cfg.info.key.u.ipv4.dst)
return node->geneve;
}
return NULL;
@@ -155,8 +164,8 @@ static struct geneve_dev *geneve6_lookup(struct geneve_sock *gs,
hash = geneve_net_vni_hash(vni);
vni_list_head = &gs->vni_list[hash];
hlist_for_each_entry_rcu(node, vni_list_head, hlist) {
- if (eq_tun_id_and_vni((u8 *)&node->geneve->info.key.tun_id, vni) &&
- ipv6_addr_equal(&addr6, &node->geneve->info.key.u.ipv6.dst))
+ if (eq_tun_id_and_vni((u8 *)&node->geneve->cfg.info.key.tun_id, vni) &&
+ ipv6_addr_equal(&addr6, &node->geneve->cfg.info.key.u.ipv6.dst))
return node->geneve;
}
return NULL;
@@ -217,56 +226,78 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
{
struct genevehdr *gnvh = geneve_hdr(skb);
struct metadata_dst *tun_dst = NULL;
- struct pcpu_sw_netstats *stats;
unsigned int len;
- int err = 0;
+ int nh, err = 0;
void *oiph;
if (ip_tunnel_collect_metadata() || gs->collect_md) {
- __be16 flags;
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
- flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
- (gnvh->oam ? TUNNEL_OAM : 0) |
- (gnvh->critical ? TUNNEL_CRIT_OPT : 0);
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ __assign_bit(IP_TUNNEL_OAM_BIT, flags, gnvh->oam);
+ __assign_bit(IP_TUNNEL_CRIT_OPT_BIT, flags, gnvh->critical);
tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags,
vni_to_tunnel_id(gnvh->vni),
gnvh->opt_len * 4);
if (!tun_dst) {
- geneve->dev->stats.rx_dropped++;
+ dev_dstats_rx_dropped(geneve->dev);
goto drop;
}
/* Update tunnel dst according to Geneve options. */
+ ip_tunnel_flags_zero(flags);
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, flags);
ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
gnvh->options, gnvh->opt_len * 4,
- TUNNEL_GENEVE_OPT);
+ flags);
} else {
/* Drop packets w/ critical options,
* since we don't support any...
*/
if (gnvh->critical) {
- geneve->dev->stats.rx_frame_errors++;
- geneve->dev->stats.rx_errors++;
+ DEV_STATS_INC(geneve->dev, rx_frame_errors);
+ DEV_STATS_INC(geneve->dev, rx_errors);
goto drop;
}
}
- skb_reset_mac_header(skb);
- skb->protocol = eth_type_trans(skb, geneve->dev);
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
-
if (tun_dst)
skb_dst_set(skb, &tun_dst->dst);
- /* Ignore packet loops (and multicast echo) */
- if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) {
- geneve->dev->stats.rx_errors++;
- goto drop;
+ if (gnvh->proto_type == htons(ETH_P_TEB)) {
+ skb_reset_mac_header(skb);
+ skb->protocol = eth_type_trans(skb, geneve->dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+
+ /* Ignore packet loops (and multicast echo) */
+ if (ether_addr_equal(eth_hdr(skb)->h_source,
+ geneve->dev->dev_addr)) {
+ DEV_STATS_INC(geneve->dev, rx_errors);
+ goto drop;
+ }
+ } else {
+ skb_reset_mac_header(skb);
+ skb->dev = geneve->dev;
+ skb->pkt_type = PACKET_HOST;
}
- oiph = skb_network_header(skb);
+ /* Save offset of outer header relative to skb->head,
+ * because we are going to reset the network header to the inner header
+ * and might change skb->head.
+ */
+ nh = skb_network_header(skb) - skb->head;
+
skb_reset_network_header(skb);
+ if (!pskb_inet_may_pull(skb)) {
+ DEV_STATS_INC(geneve->dev, rx_length_errors);
+ DEV_STATS_INC(geneve->dev, rx_errors);
+ goto drop;
+ }
+
+ /* Get the outer header. */
+ oiph = skb->head + nh;
+
if (geneve_get_sk_family(gs) == AF_INET)
err = IP_ECN_decapsulate(oiph, skb);
#if IS_ENABLED(CONFIG_IPV6)
@@ -288,21 +319,17 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
#endif
}
if (err > 1) {
- ++geneve->dev->stats.rx_frame_errors;
- ++geneve->dev->stats.rx_errors;
+ DEV_STATS_INC(geneve->dev, rx_frame_errors);
+ DEV_STATS_INC(geneve->dev, rx_errors);
goto drop;
}
}
len = skb->len;
err = gro_cells_receive(&geneve->gro_cells, skb);
- if (likely(err == NET_RX_SUCCESS)) {
- stats = this_cpu_ptr(geneve->dev->tstats);
- u64_stats_update_begin(&stats->syncp);
- stats->rx_packets++;
- stats->rx_bytes += len;
- u64_stats_update_end(&stats->syncp);
- }
+ if (likely(err == NET_RX_SUCCESS))
+ dev_dstats_rx_add(geneve->dev, len);
+
return;
drop:
/* Consume bad packet */
@@ -315,22 +342,16 @@ static int geneve_init(struct net_device *dev)
struct geneve_dev *geneve = netdev_priv(dev);
int err;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
err = gro_cells_init(&geneve->gro_cells, dev);
- if (err) {
- free_percpu(dev->tstats);
+ if (err)
return err;
- }
- err = dst_cache_init(&geneve->info.dst_cache, GFP_KERNEL);
+ err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL);
if (err) {
- free_percpu(dev->tstats);
gro_cells_destroy(&geneve->gro_cells);
return err;
}
+ netdev_lockdep_set_classes(dev);
return 0;
}
@@ -338,9 +359,8 @@ static void geneve_uninit(struct net_device *dev)
{
struct geneve_dev *geneve = netdev_priv(dev);
- dst_cache_destroy(&geneve->info.dst_cache);
+ dst_cache_destroy(&geneve->cfg.info.dst_cache);
gro_cells_destroy(&geneve->gro_cells);
- free_percpu(dev->tstats);
}
/* Callback from net/ipv4/udp.c to receive packets */
@@ -349,6 +369,7 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
struct genevehdr *geneveh;
struct geneve_dev *geneve;
struct geneve_sock *gs;
+ __be16 inner_proto;
int opts_len;
/* Need UDP and Geneve header to be present */
@@ -360,9 +381,6 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (unlikely(geneveh->ver != GENEVE_VER))
goto drop;
- if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
- goto drop;
-
gs = rcu_dereference_sk_user_data(sk);
if (!gs)
goto drop;
@@ -371,11 +389,18 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (!geneve)
goto drop;
+ inner_proto = geneveh->proto_type;
+
+ if (unlikely((!geneve->cfg.inner_proto_inherit &&
+ inner_proto != htons(ETH_P_TEB)))) {
+ dev_dstats_rx_dropped(geneve->dev);
+ goto drop;
+ }
+
opts_len = geneveh->opt_len * 4;
- if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
- htons(ETH_P_TEB),
+ if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto,
!net_eq(geneve->net, dev_net(geneve->dev)))) {
- geneve->dev->stats.rx_dropped++;
+ dev_dstats_rx_dropped(geneve->dev);
goto drop;
}
@@ -396,7 +421,7 @@ static int geneve_udp_encap_err_lookup(struct sock *sk, struct sk_buff *skb)
u8 zero_vni[3] = { 0 };
u8 *vni = zero_vni;
- if (skb->len < GENEVE_BASE_HLEN)
+ if (!pskb_may_pull(skb, skb_transport_offset(skb) + GENEVE_BASE_HLEN))
return -EINVAL;
geneveh = geneve_hdr(skb);
@@ -466,6 +491,7 @@ static struct socket *geneve_create_sock(struct net *net, bool ipv6,
if (err < 0)
return ERR_PTR(err);
+ udp_allow_gso(sock->sk);
return sock;
}
@@ -488,19 +514,16 @@ static struct sk_buff *geneve_gro_receive(struct sock *sk,
off_gnv = skb_gro_offset(skb);
hlen = off_gnv + sizeof(*gh);
- gh = skb_gro_header_fast(skb, off_gnv);
- if (skb_gro_header_hard(skb, hlen)) {
- gh = skb_gro_header_slow(skb, hlen, off_gnv);
- if (unlikely(!gh))
- goto out;
- }
+ gh = skb_gro_header(skb, hlen, off_gnv);
+ if (unlikely(!gh))
+ goto out;
if (gh->ver != GENEVE_VER || gh->oam)
goto out;
gh_len = geneve_hlen(gh);
hlen = off_gnv + gh_len;
- if (skb_gro_header_hard(skb, hlen)) {
+ if (!skb_gro_may_pull(skb, hlen)) {
gh = skb_gro_header_slow(skb, hlen, off_gnv);
if (unlikely(!gh))
goto out;
@@ -518,20 +541,19 @@ static struct sk_buff *geneve_gro_receive(struct sock *sk,
}
}
+ skb_gro_pull(skb, gh_len);
+ skb_gro_postpull_rcsum(skb, gh, gh_len);
type = gh->proto_type;
+ if (likely(type == htons(ETH_P_TEB)))
+ return call_gro_receive(eth_gro_receive, head, skb);
- rcu_read_lock();
ptype = gro_find_receive_by_type(type);
if (!ptype)
- goto out_unlock;
+ goto out;
- skb_gro_pull(skb, gh_len);
- skb_gro_postpull_rcsum(skb, gh, gh_len);
pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
flush = 0;
-out_unlock:
- rcu_read_unlock();
out:
skb_gro_flush_final(skb, pp, flush);
@@ -551,13 +573,14 @@ static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
gh_len = geneve_hlen(gh);
type = gh->proto_type;
- rcu_read_lock();
+ /* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */
+ if (likely(type == htons(ETH_P_TEB)))
+ return eth_gro_complete(skb, nhoff + gh_len);
+
ptype = gro_find_complete_by_type(type);
if (ptype)
err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
- rcu_read_unlock();
-
skb_set_inner_mac_header(skb, nhoff + gh_len);
return err;
@@ -658,19 +681,19 @@ static int geneve_sock_add(struct geneve_dev *geneve, bool ipv6)
__u8 vni[3];
__u32 hash;
- gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->info.key.tp_dst);
+ gs = geneve_find_sock(gn, ipv6 ? AF_INET6 : AF_INET, geneve->cfg.info.key.tp_dst);
if (gs) {
gs->refcnt++;
goto out;
}
- gs = geneve_socket_create(net, geneve->info.key.tp_dst, ipv6,
- geneve->use_udp6_rx_checksums);
+ gs = geneve_socket_create(net, geneve->cfg.info.key.tp_dst, ipv6,
+ geneve->cfg.use_udp6_rx_checksums);
if (IS_ERR(gs))
return PTR_ERR(gs);
out:
- gs->collect_md = geneve->collect_md;
+ gs->collect_md = geneve->cfg.collect_md;
#if IS_ENABLED(CONFIG_IPV6)
if (ipv6) {
rcu_assign_pointer(geneve->sock6, gs);
@@ -683,7 +706,7 @@ out:
}
node->geneve = geneve;
- tunnel_id_to_vni(geneve->info.key.tun_id, vni);
+ tunnel_id_to_vni(geneve->cfg.info.key.tun_id, vni);
hash = geneve_net_vni_hash(vni);
hlist_add_head_rcu(&node->hlist, &gs->vni_list[hash]);
return 0;
@@ -692,15 +715,20 @@ out:
static int geneve_open(struct net_device *dev)
{
struct geneve_dev *geneve = netdev_priv(dev);
- bool ipv6 = !!(geneve->info.mode & IP_TUNNEL_INFO_IPV6);
- bool metadata = geneve->collect_md;
+ bool metadata = geneve->cfg.collect_md;
+ bool ipv4, ipv6;
int ret = 0;
+ ipv6 = geneve->cfg.info.mode & IP_TUNNEL_INFO_IPV6 || metadata;
+ ipv4 = !ipv6 || metadata;
#if IS_ENABLED(CONFIG_IPV6)
- if (ipv6 || metadata)
+ if (ipv6) {
ret = geneve_sock_add(geneve, true);
+ if (ret < 0 && ret != -EAFNOSUPPORT)
+ ipv4 = false;
+ }
#endif
- if (!ret && (!ipv6 || metadata))
+ if (ipv4)
ret = geneve_sock_add(geneve, false);
if (ret < 0)
geneve_sock_release(geneve);
@@ -721,27 +749,31 @@ static int geneve_stop(struct net_device *dev)
}
static void geneve_build_header(struct genevehdr *geneveh,
- const struct ip_tunnel_info *info)
+ const struct ip_tunnel_info *info,
+ __be16 inner_proto)
{
geneveh->ver = GENEVE_VER;
geneveh->opt_len = info->options_len / 4;
- geneveh->oam = !!(info->key.tun_flags & TUNNEL_OAM);
- geneveh->critical = !!(info->key.tun_flags & TUNNEL_CRIT_OPT);
+ geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, info->key.tun_flags);
+ geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT,
+ info->key.tun_flags);
geneveh->rsvd1 = 0;
tunnel_id_to_vni(info->key.tun_id, geneveh->vni);
- geneveh->proto_type = htons(ETH_P_TEB);
+ geneveh->proto_type = inner_proto;
geneveh->rsvd2 = 0;
- if (info->key.tun_flags & TUNNEL_GENEVE_OPT)
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags))
ip_tunnel_info_opts_get(geneveh->options, info);
}
static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
const struct ip_tunnel_info *info,
- bool xnet, int ip_hdr_len)
+ bool xnet, int ip_hdr_len,
+ bool inner_proto_inherit)
{
- bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
+ bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
struct genevehdr *gnvh;
+ __be16 inner_proto;
int min_headroom;
int err;
@@ -759,8 +791,9 @@ static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
goto free_dst;
gnvh = __skb_push(skb, sizeof(*gnvh) + info->options_len);
- geneve_build_header(gnvh, info);
- skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+ inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB);
+ geneve_build_header(gnvh, info, inner_proto);
+ skb_set_inner_protocol(skb, inner_proto);
return 0;
free_dst:
@@ -768,145 +801,109 @@ free_dst:
return err;
}
-static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
- struct net_device *dev,
- struct geneve_sock *gs4,
- struct flowi4 *fl4,
- const struct ip_tunnel_info *info)
-{
- bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
- struct geneve_dev *geneve = netdev_priv(dev);
- struct dst_cache *dst_cache;
- struct rtable *rt = NULL;
- __u8 tos;
-
- if (!gs4)
- return ERR_PTR(-EIO);
-
- memset(fl4, 0, sizeof(*fl4));
- fl4->flowi4_mark = skb->mark;
- fl4->flowi4_proto = IPPROTO_UDP;
- fl4->daddr = info->key.u.ipv4.dst;
- fl4->saddr = info->key.u.ipv4.src;
-
- tos = info->key.tos;
- if ((tos == 1) && !geneve->collect_md) {
- tos = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
- use_cache = false;
- }
- fl4->flowi4_tos = RT_TOS(tos);
-
- dst_cache = (struct dst_cache *)&info->dst_cache;
- if (use_cache) {
- rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
- if (rt)
- return rt;
- }
- rt = ip_route_output_key(geneve->net, fl4);
- if (IS_ERR(rt)) {
- netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
- return ERR_PTR(-ENETUNREACH);
- }
- if (rt->dst.dev == dev) { /* is this necessary? */
- netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
- ip_rt_put(rt);
- return ERR_PTR(-ELOOP);
- }
- if (use_cache)
- dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
- return rt;
-}
-
-#if IS_ENABLED(CONFIG_IPV6)
-static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
- struct net_device *dev,
- struct geneve_sock *gs6,
- struct flowi6 *fl6,
- const struct ip_tunnel_info *info)
+static u8 geneve_get_dsfield(struct sk_buff *skb, struct net_device *dev,
+ const struct ip_tunnel_info *info,
+ bool *use_cache)
{
- bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct geneve_dev *geneve = netdev_priv(dev);
- struct dst_entry *dst = NULL;
- struct dst_cache *dst_cache;
- __u8 prio;
-
- if (!gs6)
- return ERR_PTR(-EIO);
-
- memset(fl6, 0, sizeof(*fl6));
- fl6->flowi6_mark = skb->mark;
- fl6->flowi6_proto = IPPROTO_UDP;
- fl6->daddr = info->key.u.ipv6.dst;
- fl6->saddr = info->key.u.ipv6.src;
- prio = info->key.tos;
- if ((prio == 1) && !geneve->collect_md) {
- prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
- use_cache = false;
- }
+ u8 dsfield;
- fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
- info->key.label);
- dst_cache = (struct dst_cache *)&info->dst_cache;
- if (use_cache) {
- dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
- if (dst)
- return dst;
- }
- if (ipv6_stub->ipv6_dst_lookup(geneve->net, gs6->sock->sk, &dst, fl6)) {
- netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr);
- return ERR_PTR(-ENETUNREACH);
- }
- if (dst->dev == dev) { /* is this necessary? */
- netdev_dbg(dev, "circular route to %pI6\n", &fl6->daddr);
- dst_release(dst);
- return ERR_PTR(-ELOOP);
+ dsfield = info->key.tos;
+ if (dsfield == 1 && !geneve->cfg.collect_md) {
+ dsfield = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
+ *use_cache = false;
}
- if (use_cache)
- dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
- return dst;
+ return dsfield;
}
-#endif
static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
struct geneve_dev *geneve,
const struct ip_tunnel_info *info)
{
+ bool inner_proto_inherit = geneve->cfg.inner_proto_inherit;
bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
const struct ip_tunnel_key *key = &info->key;
struct rtable *rt;
- struct flowi4 fl4;
+ bool use_cache;
__u8 tos, ttl;
__be16 df = 0;
+ __be32 saddr;
__be16 sport;
int err;
- rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
+ if (skb_vlan_inet_prepare(skb, inner_proto_inherit))
+ return -EINVAL;
+
+ if (!gs4)
+ return -EIO;
+
+ use_cache = ip_tunnel_dst_cache_usable(skb, info);
+ tos = geneve_get_dsfield(skb, dev, info, &use_cache);
+ sport = udp_flow_src_port(geneve->net, skb,
+ geneve->cfg.port_min,
+ geneve->cfg.port_max, true);
+
+ rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr,
+ &info->key,
+ sport, geneve->cfg.info.key.tp_dst, tos,
+ use_cache ?
+ (struct dst_cache *)&info->dst_cache : NULL);
if (IS_ERR(rt))
return PTR_ERR(rt);
- skb_tunnel_check_pmtu(skb, &rt->dst,
- GENEVE_IPV4_HLEN + info->options_len);
+ err = skb_tunnel_check_pmtu(skb, &rt->dst,
+ GENEVE_IPV4_HLEN + info->options_len,
+ netif_is_any_bridge_port(dev));
+ if (err < 0) {
+ dst_release(&rt->dst);
+ return err;
+ } else if (err) {
+ struct ip_tunnel_info *info;
+
+ info = skb_tunnel_info(skb);
+ if (info) {
+ struct ip_tunnel_info *unclone;
+
+ unclone = skb_tunnel_info_unclone(skb);
+ if (unlikely(!unclone)) {
+ dst_release(&rt->dst);
+ return -ENOMEM;
+ }
+
+ unclone->key.u.ipv4.dst = saddr;
+ unclone->key.u.ipv4.src = info->key.u.ipv4.dst;
+ }
+
+ if (!pskb_may_pull(skb, ETH_HLEN)) {
+ dst_release(&rt->dst);
+ return -EINVAL;
+ }
+
+ skb->protocol = eth_type_trans(skb, geneve->dev);
+ __netif_rx(skb);
+ dst_release(&rt->dst);
+ return -EMSGSIZE;
+ }
- sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
- if (geneve->collect_md) {
- tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
+ tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb);
+ if (geneve->cfg.collect_md) {
ttl = key->ttl;
- df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
+ df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ?
+ htons(IP_DF) : 0;
} else {
- tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb);
- if (geneve->ttl_inherit)
+ if (geneve->cfg.ttl_inherit)
ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
else
ttl = key->ttl;
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
- if (geneve->df == GENEVE_DF_SET) {
+ if (geneve->cfg.df == GENEVE_DF_SET) {
df = htons(IP_DF);
- } else if (geneve->df == GENEVE_DF_INHERIT) {
- struct ethhdr *eth = eth_hdr(skb);
+ } else if (geneve->cfg.df == GENEVE_DF_INHERIT) {
+ struct ethhdr *eth = skb_eth_hdr(skb);
if (ntohs(eth->h_proto) == ETH_P_IPV6) {
df = htons(IP_DF);
@@ -919,14 +916,16 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
}
}
- err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr));
+ err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr),
+ inner_proto_inherit);
if (unlikely(err))
return err;
- udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr,
- tos, ttl, df, sport, geneve->info.key.tp_dst,
+ udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, saddr, info->key.u.ipv4.dst,
+ tos, ttl, df, sport, geneve->cfg.info.key.tp_dst,
!net_eq(geneve->net, dev_net(geneve->dev)),
- !(info->key.tun_flags & TUNNEL_CSUM));
+ !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags),
+ 0);
return 0;
}
@@ -935,42 +934,91 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
struct geneve_dev *geneve,
const struct ip_tunnel_info *info)
{
+ bool inner_proto_inherit = geneve->cfg.inner_proto_inherit;
bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
const struct ip_tunnel_key *key = &info->key;
struct dst_entry *dst = NULL;
- struct flowi6 fl6;
+ struct in6_addr saddr;
+ bool use_cache;
__u8 prio, ttl;
__be16 sport;
int err;
- dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info);
+ if (skb_vlan_inet_prepare(skb, inner_proto_inherit))
+ return -EINVAL;
+
+ if (!gs6)
+ return -EIO;
+
+ use_cache = ip_tunnel_dst_cache_usable(skb, info);
+ prio = geneve_get_dsfield(skb, dev, info, &use_cache);
+ sport = udp_flow_src_port(geneve->net, skb,
+ geneve->cfg.port_min,
+ geneve->cfg.port_max, true);
+
+ dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0,
+ &saddr, key, sport,
+ geneve->cfg.info.key.tp_dst, prio,
+ use_cache ?
+ (struct dst_cache *)&info->dst_cache : NULL);
if (IS_ERR(dst))
return PTR_ERR(dst);
- skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len);
+ err = skb_tunnel_check_pmtu(skb, dst,
+ GENEVE_IPV6_HLEN + info->options_len,
+ netif_is_any_bridge_port(dev));
+ if (err < 0) {
+ dst_release(dst);
+ return err;
+ } else if (err) {
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+
+ if (info) {
+ struct ip_tunnel_info *unclone;
- sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
- if (geneve->collect_md) {
- prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
+ unclone = skb_tunnel_info_unclone(skb);
+ if (unlikely(!unclone)) {
+ dst_release(dst);
+ return -ENOMEM;
+ }
+
+ unclone->key.u.ipv6.dst = saddr;
+ unclone->key.u.ipv6.src = info->key.u.ipv6.dst;
+ }
+
+ if (!pskb_may_pull(skb, ETH_HLEN)) {
+ dst_release(dst);
+ return -EINVAL;
+ }
+
+ skb->protocol = eth_type_trans(skb, geneve->dev);
+ __netif_rx(skb);
+ dst_release(dst);
+ return -EMSGSIZE;
+ }
+
+ prio = ip_tunnel_ecn_encap(prio, ip_hdr(skb), skb);
+ if (geneve->cfg.collect_md) {
ttl = key->ttl;
} else {
- prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel),
- ip_hdr(skb), skb);
- if (geneve->ttl_inherit)
+ if (geneve->cfg.ttl_inherit)
ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
else
ttl = key->ttl;
ttl = ttl ? : ip6_dst_hoplimit(dst);
}
- err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr));
+ err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr),
+ inner_proto_inherit);
if (unlikely(err))
return err;
udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
- &fl6.saddr, &fl6.daddr, prio, ttl,
- info->key.label, sport, geneve->info.key.tp_dst,
- !(info->key.tun_flags & TUNNEL_CSUM));
+ &saddr, &key->u.ipv6.dst, prio, ttl,
+ info->key.label, sport, geneve->cfg.info.key.tp_dst,
+ !test_bit(IP_TUNNEL_CSUM_BIT,
+ info->key.tun_flags),
+ 0);
return 0;
}
#endif
@@ -981,15 +1029,16 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
struct ip_tunnel_info *info = NULL;
int err;
- if (geneve->collect_md) {
+ if (geneve->cfg.collect_md) {
info = skb_tunnel_info(skb);
if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
- err = -EINVAL;
netdev_dbg(dev, "no tunnel metadata\n");
- goto tx_error;
+ dev_kfree_skb(skb);
+ dev_dstats_tx_dropped(dev);
+ return NETDEV_TX_OK;
}
} else {
- info = &geneve->info;
+ info = &geneve->cfg.info;
}
rcu_read_lock();
@@ -1003,15 +1052,16 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
if (likely(!err))
return NETDEV_TX_OK;
-tx_error:
- dev_kfree_skb(skb);
+
+ if (err != -EMSGSIZE)
+ dev_kfree_skb(skb);
if (err == -ELOOP)
- dev->stats.collisions++;
+ DEV_STATS_INC(dev, collisions);
else if (err == -ENETUNREACH)
- dev->stats.tx_carrier_errors++;
+ DEV_STATS_INC(dev, tx_carrier_errors);
- dev->stats.tx_errors++;
+ DEV_STATS_INC(dev, tx_errors);
return NETDEV_TX_OK;
}
@@ -1022,7 +1072,7 @@ static int geneve_change_mtu(struct net_device *dev, int new_mtu)
else if (new_mtu < dev->min_mtu)
new_mtu = dev->min_mtu;
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
@@ -1030,38 +1080,67 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct ip_tunnel_info *info = skb_tunnel_info(skb);
struct geneve_dev *geneve = netdev_priv(dev);
+ __be16 sport;
if (ip_tunnel_info_af(info) == AF_INET) {
struct rtable *rt;
- struct flowi4 fl4;
struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
-
- rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info);
+ bool use_cache;
+ __be32 saddr;
+ u8 tos;
+
+ if (!gs4)
+ return -EIO;
+
+ use_cache = ip_tunnel_dst_cache_usable(skb, info);
+ tos = geneve_get_dsfield(skb, dev, info, &use_cache);
+ sport = udp_flow_src_port(geneve->net, skb,
+ geneve->cfg.port_min,
+ geneve->cfg.port_max, true);
+
+ rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr,
+ &info->key,
+ sport, geneve->cfg.info.key.tp_dst,
+ tos,
+ use_cache ? &info->dst_cache : NULL);
if (IS_ERR(rt))
return PTR_ERR(rt);
ip_rt_put(rt);
- info->key.u.ipv4.src = fl4.saddr;
+ info->key.u.ipv4.src = saddr;
#if IS_ENABLED(CONFIG_IPV6)
} else if (ip_tunnel_info_af(info) == AF_INET6) {
struct dst_entry *dst;
- struct flowi6 fl6;
struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
-
- dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info);
+ struct in6_addr saddr;
+ bool use_cache;
+ u8 prio;
+
+ if (!gs6)
+ return -EIO;
+
+ use_cache = ip_tunnel_dst_cache_usable(skb, info);
+ prio = geneve_get_dsfield(skb, dev, info, &use_cache);
+ sport = udp_flow_src_port(geneve->net, skb,
+ geneve->cfg.port_min,
+ geneve->cfg.port_max, true);
+
+ dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0,
+ &saddr, &info->key, sport,
+ geneve->cfg.info.key.tp_dst, prio,
+ use_cache ? &info->dst_cache : NULL);
if (IS_ERR(dst))
return PTR_ERR(dst);
dst_release(dst);
- info->key.u.ipv6.src = fl6.saddr;
+ info->key.u.ipv6.src = saddr;
#endif
} else {
return -EINVAL;
}
- info->key.tp_src = udp_flow_src_port(geneve->net, skb,
- 1, USHRT_MAX, true);
- info->key.tp_dst = geneve->info.key.tp_dst;
+ info->key.tp_src = sport;
+ info->key.tp_dst = geneve->cfg.info.key.tp_dst;
return 0;
}
@@ -1071,7 +1150,6 @@ static const struct net_device_ops geneve_netdev_ops = {
.ndo_open = geneve_open,
.ndo_stop = geneve_stop,
.ndo_start_xmit = geneve_xmit,
- .ndo_get_stats64 = ip_tunnel_get_stats64,
.ndo_change_mtu = geneve_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
@@ -1081,8 +1159,8 @@ static const struct net_device_ops geneve_netdev_ops = {
static void geneve_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
{
- strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
- strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
+ strscpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
+ strscpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
}
static const struct ethtool_ops geneve_ethtool_ops = {
@@ -1091,7 +1169,7 @@ static const struct ethtool_ops geneve_ethtool_ops = {
};
/* Info for udev, that this is a virtual tunnel endpoint */
-static struct device_type geneve_type = {
+static const struct device_type geneve_type = {
.name = "geneve",
};
@@ -1105,8 +1183,9 @@ static void geneve_offload_rx_ports(struct net_device *dev, bool push)
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_sock *gs;
- rcu_read_lock();
- list_for_each_entry_rcu(gs, &gn->sock_list, list) {
+ ASSERT_RTNL();
+
+ list_for_each_entry(gs, &gn->sock_list, list) {
if (push) {
udp_tunnel_push_rx_port(dev, gs->sock,
UDP_TUNNEL_TYPE_GENEVE);
@@ -1115,7 +1194,6 @@ static void geneve_offload_rx_ports(struct net_device *dev, bool push)
UDP_TUNNEL_TYPE_GENEVE);
}
}
- rcu_read_unlock();
}
/* Initialize the device structure. */
@@ -1129,14 +1207,15 @@ static void geneve_setup(struct net_device *dev)
SET_NETDEV_DEVTYPE(dev, &geneve_type);
- dev->features |= NETIF_F_LLTX;
- dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM;
+ dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
dev->features |= NETIF_F_RXCSUM;
dev->features |= NETIF_F_GSO_SOFTWARE;
- dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
+ dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
+ dev->hw_features |= NETIF_F_RXCSUM;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
/* MTU range: 68 - (something less than 65535) */
dev->min_mtu = ETH_MIN_MTU;
/* The max_mtu calculation does not take account of GENEVE
@@ -1148,12 +1227,14 @@ static void geneve_setup(struct net_device *dev)
netif_keep_dst(dev);
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
+ dev->lltx = true;
eth_hw_addr_random(dev);
}
static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
+ [IFLA_GENEVE_UNSPEC] = { .strict_start_type = IFLA_GENEVE_INNER_PROTO_INHERIT },
[IFLA_GENEVE_ID] = { .type = NLA_U32 },
- [IFLA_GENEVE_REMOTE] = { .len = FIELD_SIZEOF(struct iphdr, daddr) },
+ [IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
[IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) },
[IFLA_GENEVE_TTL] = { .type = NLA_U8 },
[IFLA_GENEVE_TOS] = { .type = NLA_U8 },
@@ -1165,6 +1246,8 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
[IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 },
[IFLA_GENEVE_DF] = { .type = NLA_U8 },
+ [IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG },
+ [IFLA_GENEVE_PORT_RANGE] = NLA_POLICY_EXACT_LEN(sizeof(struct ifla_geneve_port_range)),
};
static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1204,12 +1287,23 @@ static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
enum ifla_geneve_df df = nla_get_u8(data[IFLA_GENEVE_DF]);
if (df < 0 || df > GENEVE_DF_MAX) {
- NL_SET_ERR_MSG_ATTR(extack, tb[IFLA_GENEVE_DF],
+ NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_DF],
"Invalid DF attribute");
return -EINVAL;
}
}
+ if (data[IFLA_GENEVE_PORT_RANGE]) {
+ const struct ifla_geneve_port_range *p;
+
+ p = nla_data(data[IFLA_GENEVE_PORT_RANGE]);
+ if (ntohs(p->high) < ntohs(p->low)) {
+ NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_PORT_RANGE],
+ "Invalid source port range");
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -1223,13 +1317,13 @@ static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
*tun_on_same_port = false;
*tun_collect_md = false;
list_for_each_entry(geneve, &gn->geneve_list, next) {
- if (info->key.tp_dst == geneve->info.key.tp_dst) {
- *tun_collect_md = geneve->collect_md;
+ if (info->key.tp_dst == geneve->cfg.info.key.tp_dst) {
+ *tun_collect_md = geneve->cfg.collect_md;
*tun_on_same_port = true;
}
- if (info->key.tun_id == geneve->info.key.tun_id &&
- info->key.tp_dst == geneve->info.key.tp_dst &&
- !memcmp(&info->key.u, &geneve->info.key.u, sizeof(info->key.u)))
+ if (info->key.tun_id == geneve->cfg.info.key.tun_id &&
+ info->key.tp_dst == geneve->cfg.info.key.tp_dst &&
+ !memcmp(&info->key.u, &geneve->cfg.info.key.u, sizeof(info->key.u)))
t = geneve;
}
return t;
@@ -1237,7 +1331,8 @@ static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
static bool is_tnl_info_zero(const struct ip_tunnel_info *info)
{
- return !(info->key.tun_id || info->key.tun_flags || info->key.tos ||
+ return !(info->key.tun_id || info->key.tos ||
+ !ip_tunnel_flags_empty(info->key.tun_flags) ||
info->key.ttl || info->key.label || info->key.tp_src ||
memchr_inv(&info->key.u, 0, sizeof(info->key.u)));
}
@@ -1253,16 +1348,15 @@ static bool geneve_dst_addr_equal(struct ip_tunnel_info *a,
static int geneve_configure(struct net *net, struct net_device *dev,
struct netlink_ext_ack *extack,
- const struct ip_tunnel_info *info,
- bool metadata, bool ipv6_rx_csum,
- bool ttl_inherit, enum ifla_geneve_df df)
+ const struct geneve_config *cfg)
{
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_dev *t, *geneve = netdev_priv(dev);
+ const struct ip_tunnel_info *info = &cfg->info;
bool tun_collect_md, tun_on_same_port;
int err, encap_len;
- if (metadata && !is_tnl_info_zero(info)) {
+ if (cfg->collect_md && !is_tnl_info_zero(info)) {
NL_SET_ERR_MSG(extack,
"Device is externally controlled, so attributes (VNI, Port, and so on) must not be specified");
return -EINVAL;
@@ -1277,7 +1371,7 @@ static int geneve_configure(struct net *net, struct net_device *dev,
/* make enough headroom for basic scenario */
encap_len = GENEVE_BASE_HLEN + ETH_HLEN;
- if (!metadata && ip_tunnel_info_af(info) == AF_INET) {
+ if (!cfg->collect_md && ip_tunnel_info_af(info) == AF_INET) {
encap_len += sizeof(struct iphdr);
dev->max_mtu -= sizeof(struct iphdr);
} else {
@@ -1286,7 +1380,7 @@ static int geneve_configure(struct net *net, struct net_device *dev,
}
dev->needed_headroom = encap_len + ETH_HLEN;
- if (metadata) {
+ if (cfg->collect_md) {
if (tun_on_same_port) {
NL_SET_ERR_MSG(extack,
"There can be only one externally controlled device on a destination port");
@@ -1300,12 +1394,16 @@ static int geneve_configure(struct net *net, struct net_device *dev,
}
}
- dst_cache_reset(&geneve->info.dst_cache);
- geneve->info = *info;
- geneve->collect_md = metadata;
- geneve->use_udp6_rx_checksums = ipv6_rx_csum;
- geneve->ttl_inherit = ttl_inherit;
- geneve->df = df;
+ dst_cache_reset(&geneve->cfg.info.dst_cache);
+ memcpy(&geneve->cfg, cfg, sizeof(*cfg));
+
+ if (geneve->cfg.inner_proto_inherit) {
+ dev->header_ops = NULL;
+ dev->type = ARPHRD_NONE;
+ dev->hard_header_len = 0;
+ dev->addr_len = 0;
+ dev->flags = IFF_POINTOPOINT | IFF_NOARP;
+ }
err = register_netdevice(dev);
if (err)
@@ -1323,10 +1421,9 @@ static void init_tnl_info(struct ip_tunnel_info *info, __u16 dst_port)
static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
struct netlink_ext_ack *extack,
- struct ip_tunnel_info *info, bool *metadata,
- bool *use_udp6_rx_checksums, bool *ttl_inherit,
- enum ifla_geneve_df *df, bool changelink)
+ struct geneve_config *cfg, bool changelink)
{
+ struct ip_tunnel_info *info = &cfg->info;
int attrtype;
if (data[IFLA_GENEVE_REMOTE] && data[IFLA_GENEVE_REMOTE6]) {
@@ -1344,7 +1441,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
info->key.u.ipv4.dst =
nla_get_in_addr(data[IFLA_GENEVE_REMOTE]);
- if (IN_MULTICAST(ntohl(info->key.u.ipv4.dst))) {
+ if (ipv4_is_multicast(info->key.u.ipv4.dst)) {
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE],
"Remote IPv4 address cannot be Multicast");
return -EINVAL;
@@ -1373,8 +1470,8 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
"Remote IPv6 address cannot be Multicast");
return -EINVAL;
}
- info->key.tun_flags |= TUNNEL_CSUM;
- *use_udp6_rx_checksums = true;
+ __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
+ cfg->use_udp6_rx_checksums = true;
#else
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
"IPv6 support not enabled in the kernel");
@@ -1402,19 +1499,19 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
if (data[IFLA_GENEVE_TTL_INHERIT]) {
if (nla_get_u8(data[IFLA_GENEVE_TTL_INHERIT]))
- *ttl_inherit = true;
+ cfg->ttl_inherit = true;
else
- *ttl_inherit = false;
+ cfg->ttl_inherit = false;
} else if (data[IFLA_GENEVE_TTL]) {
info->key.ttl = nla_get_u8(data[IFLA_GENEVE_TTL]);
- *ttl_inherit = false;
+ cfg->ttl_inherit = false;
}
if (data[IFLA_GENEVE_TOS])
info->key.tos = nla_get_u8(data[IFLA_GENEVE_TOS]);
if (data[IFLA_GENEVE_DF])
- *df = nla_get_u8(data[IFLA_GENEVE_DF]);
+ cfg->df = nla_get_u8(data[IFLA_GENEVE_DF]);
if (data[IFLA_GENEVE_LABEL]) {
info->key.label = nla_get_be32(data[IFLA_GENEVE_LABEL]) &
@@ -1434,12 +1531,24 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]);
}
+ if (data[IFLA_GENEVE_PORT_RANGE]) {
+ const struct ifla_geneve_port_range *p;
+
+ if (changelink) {
+ attrtype = IFLA_GENEVE_PORT_RANGE;
+ goto change_notsup;
+ }
+ p = nla_data(data[IFLA_GENEVE_PORT_RANGE]);
+ cfg->port_min = ntohs(p->low);
+ cfg->port_max = ntohs(p->high);
+ }
+
if (data[IFLA_GENEVE_COLLECT_METADATA]) {
if (changelink) {
attrtype = IFLA_GENEVE_COLLECT_METADATA;
goto change_notsup;
}
- *metadata = true;
+ cfg->collect_md = true;
}
if (data[IFLA_GENEVE_UDP_CSUM]) {
@@ -1448,7 +1557,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
goto change_notsup;
}
if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
- info->key.tun_flags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
}
if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) {
@@ -1458,7 +1567,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
goto change_notsup;
}
if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
- info->key.tun_flags &= ~TUNNEL_CSUM;
+ __clear_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
#else
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX],
"IPv6 support not enabled in the kernel");
@@ -1473,7 +1582,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
goto change_notsup;
}
if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX]))
- *use_udp6_rx_checksums = false;
+ cfg->use_udp6_rx_checksums = false;
#else
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_RX],
"IPv6 support not enabled in the kernel");
@@ -1481,10 +1590,18 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
#endif
}
+ if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) {
+ if (changelink) {
+ attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT;
+ goto change_notsup;
+ }
+ cfg->inner_proto_inherit = true;
+ }
+
return 0;
change_notsup:
NL_SET_ERR_MSG_ATTR(extack, data[attrtype],
- "Changing VNI, Port, endpoint IP address family, external, and UDP checksum attributes are not supported");
+ "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, and UDP checksum attributes are not supported");
return -EOPNOTSUPP;
}
@@ -1512,9 +1629,13 @@ static void geneve_link_config(struct net_device *dev,
}
#if IS_ENABLED(CONFIG_IPV6)
case AF_INET6: {
- struct rt6_info *rt = rt6_lookup(geneve->net,
- &info->key.u.ipv6.dst, NULL, 0,
- NULL, 0);
+ struct rt6_info *rt;
+
+ if (!__in6_dev_get(dev))
+ break;
+
+ rt = rt6_lookup(geneve->net, &info->key.u.ipv6.dst, NULL, 0,
+ NULL, 0);
if (rt && rt->dst.dev)
ldev_mtu = rt->dst.dev->mtu - GENEVE_IPV6_HLEN;
@@ -1530,29 +1651,33 @@ static void geneve_link_config(struct net_device *dev,
geneve_change_mtu(dev, ldev_mtu - info->options_len);
}
-static int geneve_newlink(struct net *net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
+static int geneve_newlink(struct net_device *dev,
+ struct rtnl_newlink_params *params,
struct netlink_ext_ack *extack)
{
- enum ifla_geneve_df df = GENEVE_DF_UNSET;
- bool use_udp6_rx_checksums = false;
- struct ip_tunnel_info info;
- bool ttl_inherit = false;
- bool metadata = false;
+ struct net *link_net = rtnl_newlink_link_net(params);
+ struct nlattr **data = params->data;
+ struct nlattr **tb = params->tb;
+ struct geneve_config cfg = {
+ .df = GENEVE_DF_UNSET,
+ .use_udp6_rx_checksums = false,
+ .ttl_inherit = false,
+ .collect_md = false,
+ .port_min = 1,
+ .port_max = USHRT_MAX,
+ };
int err;
- init_tnl_info(&info, GENEVE_UDP_PORT);
- err = geneve_nl2info(tb, data, extack, &info, &metadata,
- &use_udp6_rx_checksums, &ttl_inherit, &df, false);
+ init_tnl_info(&cfg.info, GENEVE_UDP_PORT);
+ err = geneve_nl2info(tb, data, extack, &cfg, false);
if (err)
return err;
- err = geneve_configure(net, dev, extack, &info, metadata,
- use_udp6_rx_checksums, ttl_inherit, df);
+ err = geneve_configure(link_net, dev, extack, &cfg);
if (err)
return err;
- geneve_link_config(dev, &info, tb);
+ geneve_link_config(dev, &cfg.info, tb);
return 0;
}
@@ -1608,39 +1733,28 @@ static int geneve_changelink(struct net_device *dev, struct nlattr *tb[],
{
struct geneve_dev *geneve = netdev_priv(dev);
struct geneve_sock *gs4, *gs6;
- struct ip_tunnel_info info;
- bool metadata;
- bool use_udp6_rx_checksums;
- enum ifla_geneve_df df;
- bool ttl_inherit;
+ struct geneve_config cfg;
int err;
/* If the geneve device is configured for metadata (or externally
* controlled, for example, OVS), then nothing can be changed.
*/
- if (geneve->collect_md)
+ if (geneve->cfg.collect_md)
return -EOPNOTSUPP;
/* Start with the existing info. */
- memcpy(&info, &geneve->info, sizeof(info));
- metadata = geneve->collect_md;
- use_udp6_rx_checksums = geneve->use_udp6_rx_checksums;
- ttl_inherit = geneve->ttl_inherit;
- err = geneve_nl2info(tb, data, extack, &info, &metadata,
- &use_udp6_rx_checksums, &ttl_inherit, &df, true);
+ memcpy(&cfg, &geneve->cfg, sizeof(cfg));
+ err = geneve_nl2info(tb, data, extack, &cfg, true);
if (err)
return err;
- if (!geneve_dst_addr_equal(&geneve->info, &info)) {
- dst_cache_reset(&info.dst_cache);
- geneve_link_config(dev, &info, tb);
+ if (!geneve_dst_addr_equal(&geneve->cfg.info, &cfg.info)) {
+ dst_cache_reset(&cfg.info.dst_cache);
+ geneve_link_config(dev, &cfg.info, tb);
}
geneve_quiesce(geneve, &gs4, &gs6);
- geneve->info = info;
- geneve->collect_md = metadata;
- geneve->use_udp6_rx_checksums = use_udp6_rx_checksums;
- geneve->ttl_inherit = ttl_inherit;
+ memcpy(&geneve->cfg, &cfg, sizeof(cfg));
geneve_unquiesce(geneve, gs4, gs6);
return 0;
@@ -1668,15 +1782,21 @@ static size_t geneve_get_size(const struct net_device *dev)
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */
+ nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */
+ nla_total_size(sizeof(struct ifla_geneve_port_range)) + /* IFLA_GENEVE_PORT_RANGE */
0;
}
static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
{
struct geneve_dev *geneve = netdev_priv(dev);
- struct ip_tunnel_info *info = &geneve->info;
- bool ttl_inherit = geneve->ttl_inherit;
- bool metadata = geneve->collect_md;
+ struct ip_tunnel_info *info = &geneve->cfg.info;
+ bool ttl_inherit = geneve->cfg.ttl_inherit;
+ bool metadata = geneve->cfg.collect_md;
+ struct ifla_geneve_port_range ports = {
+ .low = htons(geneve->cfg.port_min),
+ .high = htons(geneve->cfg.port_max),
+ };
__u8 tmp_vni[3];
__u32 vni;
@@ -1690,7 +1810,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
info->key.u.ipv4.dst))
goto nla_put_failure;
if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
- !!(info->key.tun_flags & TUNNEL_CSUM)))
+ test_bit(IP_TUNNEL_CSUM_BIT,
+ info->key.tun_flags)))
goto nla_put_failure;
#if IS_ENABLED(CONFIG_IPV6)
@@ -1699,7 +1820,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
&info->key.u.ipv6.dst))
goto nla_put_failure;
if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
- !(info->key.tun_flags & TUNNEL_CSUM)))
+ !test_bit(IP_TUNNEL_CSUM_BIT,
+ info->key.tun_flags)))
goto nla_put_failure;
#endif
}
@@ -1709,7 +1831,7 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_be32(skb, IFLA_GENEVE_LABEL, info->key.label))
goto nla_put_failure;
- if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->df))
+ if (nla_put_u8(skb, IFLA_GENEVE_DF, geneve->cfg.df))
goto nla_put_failure;
if (nla_put_be16(skb, IFLA_GENEVE_PORT, info->key.tp_dst))
@@ -1720,13 +1842,20 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
#if IS_ENABLED(CONFIG_IPV6)
if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_RX,
- !geneve->use_udp6_rx_checksums))
+ !geneve->cfg.use_udp6_rx_checksums))
goto nla_put_failure;
#endif
if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit))
goto nla_put_failure;
+ if (geneve->cfg.inner_proto_inherit &&
+ nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT))
+ goto nla_put_failure;
+
+ if (nla_put(skb, IFLA_GENEVE_PORT_RANGE, sizeof(ports), &ports))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -1751,10 +1880,17 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
u8 name_assign_type, u16 dst_port)
{
struct nlattr *tb[IFLA_MAX + 1];
- struct ip_tunnel_info info;
struct net_device *dev;
LIST_HEAD(list_kill);
int err;
+ struct geneve_config cfg = {
+ .df = GENEVE_DF_UNSET,
+ .use_udp6_rx_checksums = true,
+ .ttl_inherit = false,
+ .collect_md = true,
+ .port_min = 1,
+ .port_max = USHRT_MAX,
+ };
memset(tb, 0, sizeof(tb));
dev = rtnl_create_link(net, name, name_assign_type,
@@ -1762,9 +1898,8 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
if (IS_ERR(dev))
return dev;
- init_tnl_info(&info, dst_port);
- err = geneve_configure(net, dev, NULL, &info,
- true, true, false, GENEVE_DF_UNSET);
+ init_tnl_info(&cfg.info, dst_port);
+ err = geneve_configure(net, dev, NULL, &cfg);
if (err) {
free_netdev(dev);
return ERR_PTR(err);
@@ -1777,7 +1912,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
if (err)
goto err;
- err = rtnl_configure_link(dev, NULL);
+ err = rtnl_configure_link(dev, NULL, 0, NULL);
if (err < 0)
goto err;
@@ -1794,14 +1929,10 @@ static int geneve_netdevice_event(struct notifier_block *unused,
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- if (event == NETDEV_UDP_TUNNEL_PUSH_INFO ||
- event == NETDEV_UDP_TUNNEL_DROP_INFO) {
- geneve_offload_rx_ports(dev, event == NETDEV_UDP_TUNNEL_PUSH_INFO);
- } else if (event == NETDEV_UNREGISTER) {
- geneve_offload_rx_ports(dev, false);
- } else if (event == NETDEV_REGISTER) {
+ if (event == NETDEV_UDP_TUNNEL_PUSH_INFO)
geneve_offload_rx_ports(dev, true);
- }
+ else if (event == NETDEV_UDP_TUNNEL_DROP_INFO)
+ geneve_offload_rx_ports(dev, false);
return NOTIFY_DONE;
}
@@ -1819,46 +1950,27 @@ static __net_init int geneve_init_net(struct net *net)
return 0;
}
-static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
+static void __net_exit geneve_exit_rtnl_net(struct net *net,
+ struct list_head *dev_to_kill)
{
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_dev *geneve, *next;
- struct net_device *dev, *aux;
-
- /* gather any geneve devices that were moved into this ns */
- for_each_netdev_safe(net, dev, aux)
- if (dev->rtnl_link_ops == &geneve_link_ops)
- unregister_netdevice_queue(dev, head);
- /* now gather any other geneve devices that were created in this ns */
- list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
- /* If geneve->dev is in the same netns, it was already added
- * to the list by the previous loop.
- */
- if (!net_eq(dev_net(geneve->dev), net))
- unregister_netdevice_queue(geneve->dev, head);
- }
-
- WARN_ON_ONCE(!list_empty(&gn->sock_list));
+ list_for_each_entry_safe(geneve, next, &gn->geneve_list, next)
+ geneve_dellink(geneve->dev, dev_to_kill);
}
-static void __net_exit geneve_exit_batch_net(struct list_head *net_list)
+static void __net_exit geneve_exit_net(struct net *net)
{
- struct net *net;
- LIST_HEAD(list);
+ const struct geneve_net *gn = net_generic(net, geneve_net_id);
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list)
- geneve_destroy_tunnels(net, &list);
-
- /* unregister the devices gathered above */
- unregister_netdevice_many(&list);
- rtnl_unlock();
+ WARN_ON_ONCE(!list_empty(&gn->sock_list));
}
static struct pernet_operations geneve_net_ops = {
.init = geneve_init_net,
- .exit_batch = geneve_exit_batch_net,
+ .exit_rtnl = geneve_exit_rtnl_net,
+ .exit = geneve_exit_net,
.id = &geneve_net_id,
.size = sizeof(struct geneve_net),
};