summaryrefslogtreecommitdiff
path: root/drivers/net/geneve.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/geneve.c')
-rw-r--r--drivers/net/geneve.c544
1 files changed, 290 insertions, 254 deletions
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index c1fdd721a730..77b0c3d52041 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -18,6 +18,7 @@
#include <net/rtnetlink.h>
#include <net/geneve.h>
#include <net/gro.h>
+#include <net/netdev_lock.h>
#include <net/protocol.h>
#define GENEVE_NETDEV_VER "0.6"
@@ -40,6 +41,7 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");
/* per-network namespace private data for this module */
struct geneve_net {
struct list_head geneve_list;
+ /* sock_list is protected by rtnl lock */
struct list_head sock_list;
};
@@ -51,11 +53,16 @@ struct geneve_dev_node {
};
struct geneve_config {
- struct ip_tunnel_info info;
bool collect_md;
bool use_udp6_rx_checksums;
bool ttl_inherit;
enum ifla_geneve_df df;
+ bool inner_proto_inherit;
+ u16 port_min;
+ u16 port_max;
+
+ /* Must be last --ends in a flexible-array member. */
+ struct ip_tunnel_info info;
};
/* Pseudo network device */
@@ -220,53 +227,77 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
struct genevehdr *gnvh = geneve_hdr(skb);
struct metadata_dst *tun_dst = NULL;
unsigned int len;
- int err = 0;
+ int nh, err = 0;
void *oiph;
if (ip_tunnel_collect_metadata() || gs->collect_md) {
- __be16 flags;
+ IP_TUNNEL_DECLARE_FLAGS(flags) = { };
- flags = TUNNEL_KEY | (gnvh->oam ? TUNNEL_OAM : 0) |
- (gnvh->critical ? TUNNEL_CRIT_OPT : 0);
+ __set_bit(IP_TUNNEL_KEY_BIT, flags);
+ __assign_bit(IP_TUNNEL_OAM_BIT, flags, gnvh->oam);
+ __assign_bit(IP_TUNNEL_CRIT_OPT_BIT, flags, gnvh->critical);
tun_dst = udp_tun_rx_dst(skb, geneve_get_sk_family(gs), flags,
vni_to_tunnel_id(gnvh->vni),
gnvh->opt_len * 4);
if (!tun_dst) {
- geneve->dev->stats.rx_dropped++;
+ dev_dstats_rx_dropped(geneve->dev);
goto drop;
}
/* Update tunnel dst according to Geneve options. */
+ ip_tunnel_flags_zero(flags);
+ __set_bit(IP_TUNNEL_GENEVE_OPT_BIT, flags);
ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
gnvh->options, gnvh->opt_len * 4,
- TUNNEL_GENEVE_OPT);
+ flags);
} else {
/* Drop packets w/ critical options,
* since we don't support any...
*/
if (gnvh->critical) {
- geneve->dev->stats.rx_frame_errors++;
- geneve->dev->stats.rx_errors++;
+ DEV_STATS_INC(geneve->dev, rx_frame_errors);
+ DEV_STATS_INC(geneve->dev, rx_errors);
goto drop;
}
}
- skb_reset_mac_header(skb);
- skb->protocol = eth_type_trans(skb, geneve->dev);
- skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
-
if (tun_dst)
skb_dst_set(skb, &tun_dst->dst);
- /* Ignore packet loops (and multicast echo) */
- if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) {
- geneve->dev->stats.rx_errors++;
- goto drop;
+ if (gnvh->proto_type == htons(ETH_P_TEB)) {
+ skb_reset_mac_header(skb);
+ skb->protocol = eth_type_trans(skb, geneve->dev);
+ skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+
+ /* Ignore packet loops (and multicast echo) */
+ if (ether_addr_equal(eth_hdr(skb)->h_source,
+ geneve->dev->dev_addr)) {
+ DEV_STATS_INC(geneve->dev, rx_errors);
+ goto drop;
+ }
+ } else {
+ skb_reset_mac_header(skb);
+ skb->dev = geneve->dev;
+ skb->pkt_type = PACKET_HOST;
}
- oiph = skb_network_header(skb);
+ /* Save offset of outer header relative to skb->head,
+ * because we are going to reset the network header to the inner header
+ * and might change skb->head.
+ */
+ nh = skb_network_header(skb) - skb->head;
+
skb_reset_network_header(skb);
+ if (!pskb_inet_may_pull(skb)) {
+ DEV_STATS_INC(geneve->dev, rx_length_errors);
+ DEV_STATS_INC(geneve->dev, rx_errors);
+ goto drop;
+ }
+
+ /* Get the outer header. */
+ oiph = skb->head + nh;
+
if (geneve_get_sk_family(gs) == AF_INET)
err = IP_ECN_decapsulate(oiph, skb);
#if IS_ENABLED(CONFIG_IPV6)
@@ -288,8 +319,8 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
#endif
}
if (err > 1) {
- ++geneve->dev->stats.rx_frame_errors;
- ++geneve->dev->stats.rx_errors;
+ DEV_STATS_INC(geneve->dev, rx_frame_errors);
+ DEV_STATS_INC(geneve->dev, rx_errors);
goto drop;
}
}
@@ -297,7 +328,7 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs,
len = skb->len;
err = gro_cells_receive(&geneve->gro_cells, skb);
if (likely(err == NET_RX_SUCCESS))
- dev_sw_netstats_rx_add(geneve->dev, len);
+ dev_dstats_rx_add(geneve->dev, len);
return;
drop:
@@ -311,22 +342,16 @@ static int geneve_init(struct net_device *dev)
struct geneve_dev *geneve = netdev_priv(dev);
int err;
- dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);
- if (!dev->tstats)
- return -ENOMEM;
-
err = gro_cells_init(&geneve->gro_cells, dev);
- if (err) {
- free_percpu(dev->tstats);
+ if (err)
return err;
- }
err = dst_cache_init(&geneve->cfg.info.dst_cache, GFP_KERNEL);
if (err) {
- free_percpu(dev->tstats);
gro_cells_destroy(&geneve->gro_cells);
return err;
}
+ netdev_lockdep_set_classes(dev);
return 0;
}
@@ -336,7 +361,6 @@ static void geneve_uninit(struct net_device *dev)
dst_cache_destroy(&geneve->cfg.info.dst_cache);
gro_cells_destroy(&geneve->gro_cells);
- free_percpu(dev->tstats);
}
/* Callback from net/ipv4/udp.c to receive packets */
@@ -345,6 +369,7 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
struct genevehdr *geneveh;
struct geneve_dev *geneve;
struct geneve_sock *gs;
+ __be16 inner_proto;
int opts_len;
/* Need UDP and Geneve header to be present */
@@ -356,9 +381,6 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (unlikely(geneveh->ver != GENEVE_VER))
goto drop;
- if (unlikely(geneveh->proto_type != htons(ETH_P_TEB)))
- goto drop;
-
gs = rcu_dereference_sk_user_data(sk);
if (!gs)
goto drop;
@@ -367,11 +389,18 @@ static int geneve_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
if (!geneve)
goto drop;
+ inner_proto = geneveh->proto_type;
+
+ if (unlikely((!geneve->cfg.inner_proto_inherit &&
+ inner_proto != htons(ETH_P_TEB)))) {
+ dev_dstats_rx_dropped(geneve->dev);
+ goto drop;
+ }
+
opts_len = geneveh->opt_len * 4;
- if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len,
- htons(ETH_P_TEB),
+ if (iptunnel_pull_header(skb, GENEVE_BASE_HLEN + opts_len, inner_proto,
!net_eq(geneve->net, dev_net(geneve->dev)))) {
- geneve->dev->stats.rx_dropped++;
+ dev_dstats_rx_dropped(geneve->dev);
goto drop;
}
@@ -485,19 +514,16 @@ static struct sk_buff *geneve_gro_receive(struct sock *sk,
off_gnv = skb_gro_offset(skb);
hlen = off_gnv + sizeof(*gh);
- gh = skb_gro_header_fast(skb, off_gnv);
- if (skb_gro_header_hard(skb, hlen)) {
- gh = skb_gro_header_slow(skb, hlen, off_gnv);
- if (unlikely(!gh))
- goto out;
- }
+ gh = skb_gro_header(skb, hlen, off_gnv);
+ if (unlikely(!gh))
+ goto out;
if (gh->ver != GENEVE_VER || gh->oam)
goto out;
gh_len = geneve_hlen(gh);
hlen = off_gnv + gh_len;
- if (skb_gro_header_hard(skb, hlen)) {
+ if (!skb_gro_may_pull(skb, hlen)) {
gh = skb_gro_header_slow(skb, hlen, off_gnv);
if (unlikely(!gh))
goto out;
@@ -515,14 +541,16 @@ static struct sk_buff *geneve_gro_receive(struct sock *sk,
}
}
+ skb_gro_pull(skb, gh_len);
+ skb_gro_postpull_rcsum(skb, gh, gh_len);
type = gh->proto_type;
+ if (likely(type == htons(ETH_P_TEB)))
+ return call_gro_receive(eth_gro_receive, head, skb);
ptype = gro_find_receive_by_type(type);
if (!ptype)
goto out;
- skb_gro_pull(skb, gh_len);
- skb_gro_postpull_rcsum(skb, gh, gh_len);
pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
flush = 0;
@@ -545,6 +573,10 @@ static int geneve_gro_complete(struct sock *sk, struct sk_buff *skb,
gh_len = geneve_hlen(gh);
type = gh->proto_type;
+ /* since skb->encapsulation is set, eth_gro_complete() sets the inner mac header */
+ if (likely(type == htons(ETH_P_TEB)))
+ return eth_gro_complete(skb, nhoff + gh_len);
+
ptype = gro_find_complete_by_type(type);
if (ptype)
err = ptype->callbacks.gro_complete(skb, nhoff + gh_len);
@@ -717,27 +749,31 @@ static int geneve_stop(struct net_device *dev)
}
static void geneve_build_header(struct genevehdr *geneveh,
- const struct ip_tunnel_info *info)
+ const struct ip_tunnel_info *info,
+ __be16 inner_proto)
{
geneveh->ver = GENEVE_VER;
geneveh->opt_len = info->options_len / 4;
- geneveh->oam = !!(info->key.tun_flags & TUNNEL_OAM);
- geneveh->critical = !!(info->key.tun_flags & TUNNEL_CRIT_OPT);
+ geneveh->oam = test_bit(IP_TUNNEL_OAM_BIT, info->key.tun_flags);
+ geneveh->critical = test_bit(IP_TUNNEL_CRIT_OPT_BIT,
+ info->key.tun_flags);
geneveh->rsvd1 = 0;
tunnel_id_to_vni(info->key.tun_id, geneveh->vni);
- geneveh->proto_type = htons(ETH_P_TEB);
+ geneveh->proto_type = inner_proto;
geneveh->rsvd2 = 0;
- if (info->key.tun_flags & TUNNEL_GENEVE_OPT)
+ if (test_bit(IP_TUNNEL_GENEVE_OPT_BIT, info->key.tun_flags))
ip_tunnel_info_opts_get(geneveh->options, info);
}
static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
const struct ip_tunnel_info *info,
- bool xnet, int ip_hdr_len)
+ bool xnet, int ip_hdr_len,
+ bool inner_proto_inherit)
{
- bool udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
+ bool udp_sum = test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
struct genevehdr *gnvh;
+ __be16 inner_proto;
int min_headroom;
int err;
@@ -755,8 +791,9 @@ static int geneve_build_skb(struct dst_entry *dst, struct sk_buff *skb,
goto free_dst;
gnvh = __skb_push(skb, sizeof(*gnvh) + info->options_len);
- geneve_build_header(gnvh, info);
- skb_set_inner_protocol(skb, htons(ETH_P_TEB));
+ inner_proto = inner_proto_inherit ? skb->protocol : htons(ETH_P_TEB);
+ geneve_build_header(gnvh, info, inner_proto);
+ skb_set_inner_protocol(skb, inner_proto);
return 0;
free_dst:
@@ -764,135 +801,55 @@ free_dst:
return err;
}
-static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
- struct net_device *dev,
- struct geneve_sock *gs4,
- struct flowi4 *fl4,
- const struct ip_tunnel_info *info,
- __be16 dport, __be16 sport)
+static u8 geneve_get_dsfield(struct sk_buff *skb, struct net_device *dev,
+ const struct ip_tunnel_info *info,
+ bool *use_cache)
{
- bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct geneve_dev *geneve = netdev_priv(dev);
- struct dst_cache *dst_cache;
- struct rtable *rt = NULL;
- __u8 tos;
+ u8 dsfield;
- if (!gs4)
- return ERR_PTR(-EIO);
-
- memset(fl4, 0, sizeof(*fl4));
- fl4->flowi4_mark = skb->mark;
- fl4->flowi4_proto = IPPROTO_UDP;
- fl4->daddr = info->key.u.ipv4.dst;
- fl4->saddr = info->key.u.ipv4.src;
- fl4->fl4_dport = dport;
- fl4->fl4_sport = sport;
-
- tos = info->key.tos;
- if ((tos == 1) && !geneve->cfg.collect_md) {
- tos = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
- use_cache = false;
+ dsfield = info->key.tos;
+ if (dsfield == 1 && !geneve->cfg.collect_md) {
+ dsfield = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
+ *use_cache = false;
}
- fl4->flowi4_tos = RT_TOS(tos);
- dst_cache = (struct dst_cache *)&info->dst_cache;
- if (use_cache) {
- rt = dst_cache_get_ip4(dst_cache, &fl4->saddr);
- if (rt)
- return rt;
- }
- rt = ip_route_output_key(geneve->net, fl4);
- if (IS_ERR(rt)) {
- netdev_dbg(dev, "no route to %pI4\n", &fl4->daddr);
- return ERR_PTR(-ENETUNREACH);
- }
- if (rt->dst.dev == dev) { /* is this necessary? */
- netdev_dbg(dev, "circular route to %pI4\n", &fl4->daddr);
- ip_rt_put(rt);
- return ERR_PTR(-ELOOP);
- }
- if (use_cache)
- dst_cache_set_ip4(dst_cache, &rt->dst, fl4->saddr);
- return rt;
+ return dsfield;
}
-#if IS_ENABLED(CONFIG_IPV6)
-static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
- struct net_device *dev,
- struct geneve_sock *gs6,
- struct flowi6 *fl6,
- const struct ip_tunnel_info *info,
- __be16 dport, __be16 sport)
-{
- bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
- struct geneve_dev *geneve = netdev_priv(dev);
- struct dst_entry *dst = NULL;
- struct dst_cache *dst_cache;
- __u8 prio;
-
- if (!gs6)
- return ERR_PTR(-EIO);
-
- memset(fl6, 0, sizeof(*fl6));
- fl6->flowi6_mark = skb->mark;
- fl6->flowi6_proto = IPPROTO_UDP;
- fl6->daddr = info->key.u.ipv6.dst;
- fl6->saddr = info->key.u.ipv6.src;
- fl6->fl6_dport = dport;
- fl6->fl6_sport = sport;
-
- prio = info->key.tos;
- if ((prio == 1) && !geneve->cfg.collect_md) {
- prio = ip_tunnel_get_dsfield(ip_hdr(skb), skb);
- use_cache = false;
- }
-
- fl6->flowlabel = ip6_make_flowinfo(RT_TOS(prio),
- info->key.label);
- dst_cache = (struct dst_cache *)&info->dst_cache;
- if (use_cache) {
- dst = dst_cache_get_ip6(dst_cache, &fl6->saddr);
- if (dst)
- return dst;
- }
- dst = ipv6_stub->ipv6_dst_lookup_flow(geneve->net, gs6->sock->sk, fl6,
- NULL);
- if (IS_ERR(dst)) {
- netdev_dbg(dev, "no route to %pI6\n", &fl6->daddr);
- return ERR_PTR(-ENETUNREACH);
- }
- if (dst->dev == dev) { /* is this necessary? */
- netdev_dbg(dev, "circular route to %pI6\n", &fl6->daddr);
- dst_release(dst);
- return ERR_PTR(-ELOOP);
- }
-
- if (use_cache)
- dst_cache_set_ip6(dst_cache, dst, &fl6->saddr);
- return dst;
-}
-#endif
-
static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
struct geneve_dev *geneve,
const struct ip_tunnel_info *info)
{
+ bool inner_proto_inherit = geneve->cfg.inner_proto_inherit;
bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
const struct ip_tunnel_key *key = &info->key;
struct rtable *rt;
- struct flowi4 fl4;
+ bool use_cache;
__u8 tos, ttl;
__be16 df = 0;
+ __be32 saddr;
__be16 sport;
int err;
- if (!pskb_inet_may_pull(skb))
+ if (skb_vlan_inet_prepare(skb, inner_proto_inherit))
return -EINVAL;
- sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
- rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info,
- geneve->cfg.info.key.tp_dst, sport);
+ if (!gs4)
+ return -EIO;
+
+ use_cache = ip_tunnel_dst_cache_usable(skb, info);
+ tos = geneve_get_dsfield(skb, dev, info, &use_cache);
+ sport = udp_flow_src_port(geneve->net, skb,
+ geneve->cfg.port_min,
+ geneve->cfg.port_max, true);
+
+ rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr,
+ &info->key,
+ sport, geneve->cfg.info.key.tp_dst, tos,
+ use_cache ?
+ (struct dst_cache *)&info->dst_cache : NULL);
if (IS_ERR(rt))
return PTR_ERR(rt);
@@ -915,8 +872,8 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
return -ENOMEM;
}
- unclone->key.u.ipv4.dst = fl4.saddr;
- unclone->key.u.ipv4.src = fl4.daddr;
+ unclone->key.u.ipv4.dst = saddr;
+ unclone->key.u.ipv4.src = info->key.u.ipv4.dst;
}
if (!pskb_may_pull(skb, ETH_HLEN)) {
@@ -925,18 +882,18 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
}
skb->protocol = eth_type_trans(skb, geneve->dev);
- netif_rx(skb);
+ __netif_rx(skb);
dst_release(&rt->dst);
return -EMSGSIZE;
}
+ tos = ip_tunnel_ecn_encap(tos, ip_hdr(skb), skb);
if (geneve->cfg.collect_md) {
- tos = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
ttl = key->ttl;
- df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0;
+ df = test_bit(IP_TUNNEL_DONT_FRAGMENT_BIT, key->tun_flags) ?
+ htons(IP_DF) : 0;
} else {
- tos = ip_tunnel_ecn_encap(fl4.flowi4_tos, ip_hdr(skb), skb);
if (geneve->cfg.ttl_inherit)
ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
else
@@ -946,7 +903,7 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (geneve->cfg.df == GENEVE_DF_SET) {
df = htons(IP_DF);
} else if (geneve->cfg.df == GENEVE_DF_INHERIT) {
- struct ethhdr *eth = eth_hdr(skb);
+ struct ethhdr *eth = skb_eth_hdr(skb);
if (ntohs(eth->h_proto) == ETH_P_IPV6) {
df = htons(IP_DF);
@@ -959,14 +916,16 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
}
}
- err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr));
+ err = geneve_build_skb(&rt->dst, skb, info, xnet, sizeof(struct iphdr),
+ inner_proto_inherit);
if (unlikely(err))
return err;
- udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, fl4.saddr, fl4.daddr,
+ udp_tunnel_xmit_skb(rt, gs4->sock->sk, skb, saddr, info->key.u.ipv4.dst,
tos, ttl, df, sport, geneve->cfg.info.key.tp_dst,
!net_eq(geneve->net, dev_net(geneve->dev)),
- !(info->key.tun_flags & TUNNEL_CSUM));
+ !test_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags),
+ 0);
return 0;
}
@@ -975,21 +934,34 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
struct geneve_dev *geneve,
const struct ip_tunnel_info *info)
{
+ bool inner_proto_inherit = geneve->cfg.inner_proto_inherit;
bool xnet = !net_eq(geneve->net, dev_net(geneve->dev));
struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
const struct ip_tunnel_key *key = &info->key;
struct dst_entry *dst = NULL;
- struct flowi6 fl6;
+ struct in6_addr saddr;
+ bool use_cache;
__u8 prio, ttl;
__be16 sport;
int err;
- if (!pskb_inet_may_pull(skb))
+ if (skb_vlan_inet_prepare(skb, inner_proto_inherit))
return -EINVAL;
- sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
- dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info,
- geneve->cfg.info.key.tp_dst, sport);
+ if (!gs6)
+ return -EIO;
+
+ use_cache = ip_tunnel_dst_cache_usable(skb, info);
+ prio = geneve_get_dsfield(skb, dev, info, &use_cache);
+ sport = udp_flow_src_port(geneve->net, skb,
+ geneve->cfg.port_min,
+ geneve->cfg.port_max, true);
+
+ dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0,
+ &saddr, key, sport,
+ geneve->cfg.info.key.tp_dst, prio,
+ use_cache ?
+ (struct dst_cache *)&info->dst_cache : NULL);
if (IS_ERR(dst))
return PTR_ERR(dst);
@@ -1011,8 +983,8 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
return -ENOMEM;
}
- unclone->key.u.ipv6.dst = fl6.saddr;
- unclone->key.u.ipv6.src = fl6.daddr;
+ unclone->key.u.ipv6.dst = saddr;
+ unclone->key.u.ipv6.src = info->key.u.ipv6.dst;
}
if (!pskb_may_pull(skb, ETH_HLEN)) {
@@ -1021,31 +993,32 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
}
skb->protocol = eth_type_trans(skb, geneve->dev);
- netif_rx(skb);
+ __netif_rx(skb);
dst_release(dst);
return -EMSGSIZE;
}
+ prio = ip_tunnel_ecn_encap(prio, ip_hdr(skb), skb);
if (geneve->cfg.collect_md) {
- prio = ip_tunnel_ecn_encap(key->tos, ip_hdr(skb), skb);
ttl = key->ttl;
} else {
- prio = ip_tunnel_ecn_encap(ip6_tclass(fl6.flowlabel),
- ip_hdr(skb), skb);
if (geneve->cfg.ttl_inherit)
ttl = ip_tunnel_get_ttl(ip_hdr(skb), skb);
else
ttl = key->ttl;
ttl = ttl ? : ip6_dst_hoplimit(dst);
}
- err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr));
+ err = geneve_build_skb(dst, skb, info, xnet, sizeof(struct ipv6hdr),
+ inner_proto_inherit);
if (unlikely(err))
return err;
udp_tunnel6_xmit_skb(dst, gs6->sock->sk, skb, dev,
- &fl6.saddr, &fl6.daddr, prio, ttl,
+ &saddr, &key->u.ipv6.dst, prio, ttl,
info->key.label, sport, geneve->cfg.info.key.tp_dst,
- !(info->key.tun_flags & TUNNEL_CSUM));
+ !test_bit(IP_TUNNEL_CSUM_BIT,
+ info->key.tun_flags),
+ 0);
return 0;
}
#endif
@@ -1061,7 +1034,7 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
if (unlikely(!info || !(info->mode & IP_TUNNEL_INFO_TX))) {
netdev_dbg(dev, "no tunnel metadata\n");
dev_kfree_skb(skb);
- dev->stats.tx_dropped++;
+ dev_dstats_tx_dropped(dev);
return NETDEV_TX_OK;
}
} else {
@@ -1084,11 +1057,11 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
dev_kfree_skb(skb);
if (err == -ELOOP)
- dev->stats.collisions++;
+ DEV_STATS_INC(dev, collisions);
else if (err == -ENETUNREACH)
- dev->stats.tx_carrier_errors++;
+ DEV_STATS_INC(dev, tx_carrier_errors);
- dev->stats.tx_errors++;
+ DEV_STATS_INC(dev, tx_errors);
return NETDEV_TX_OK;
}
@@ -1099,7 +1072,7 @@ static int geneve_change_mtu(struct net_device *dev, int new_mtu)
else if (new_mtu < dev->min_mtu)
new_mtu = dev->min_mtu;
- dev->mtu = new_mtu;
+ WRITE_ONCE(dev->mtu, new_mtu);
return 0;
}
@@ -1111,35 +1084,56 @@ static int geneve_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
if (ip_tunnel_info_af(info) == AF_INET) {
struct rtable *rt;
- struct flowi4 fl4;
-
struct geneve_sock *gs4 = rcu_dereference(geneve->sock4);
- sport = udp_flow_src_port(geneve->net, skb,
- 1, USHRT_MAX, true);
+ bool use_cache;
+ __be32 saddr;
+ u8 tos;
+
+ if (!gs4)
+ return -EIO;
- rt = geneve_get_v4_rt(skb, dev, gs4, &fl4, info,
- geneve->cfg.info.key.tp_dst, sport);
+ use_cache = ip_tunnel_dst_cache_usable(skb, info);
+ tos = geneve_get_dsfield(skb, dev, info, &use_cache);
+ sport = udp_flow_src_port(geneve->net, skb,
+ geneve->cfg.port_min,
+ geneve->cfg.port_max, true);
+
+ rt = udp_tunnel_dst_lookup(skb, dev, geneve->net, 0, &saddr,
+ &info->key,
+ sport, geneve->cfg.info.key.tp_dst,
+ tos,
+ use_cache ? &info->dst_cache : NULL);
if (IS_ERR(rt))
return PTR_ERR(rt);
ip_rt_put(rt);
- info->key.u.ipv4.src = fl4.saddr;
+ info->key.u.ipv4.src = saddr;
#if IS_ENABLED(CONFIG_IPV6)
} else if (ip_tunnel_info_af(info) == AF_INET6) {
struct dst_entry *dst;
- struct flowi6 fl6;
-
struct geneve_sock *gs6 = rcu_dereference(geneve->sock6);
+ struct in6_addr saddr;
+ bool use_cache;
+ u8 prio;
+
+ if (!gs6)
+ return -EIO;
+
+ use_cache = ip_tunnel_dst_cache_usable(skb, info);
+ prio = geneve_get_dsfield(skb, dev, info, &use_cache);
sport = udp_flow_src_port(geneve->net, skb,
- 1, USHRT_MAX, true);
+ geneve->cfg.port_min,
+ geneve->cfg.port_max, true);
- dst = geneve_get_v6_dst(skb, dev, gs6, &fl6, info,
- geneve->cfg.info.key.tp_dst, sport);
+ dst = udp_tunnel6_dst_lookup(skb, dev, geneve->net, gs6->sock, 0,
+ &saddr, &info->key, sport,
+ geneve->cfg.info.key.tp_dst, prio,
+ use_cache ? &info->dst_cache : NULL);
if (IS_ERR(dst))
return PTR_ERR(dst);
dst_release(dst);
- info->key.u.ipv6.src = fl6.saddr;
+ info->key.u.ipv6.src = saddr;
#endif
} else {
return -EINVAL;
@@ -1156,7 +1150,6 @@ static const struct net_device_ops geneve_netdev_ops = {
.ndo_open = geneve_open,
.ndo_stop = geneve_stop,
.ndo_start_xmit = geneve_xmit,
- .ndo_get_stats64 = dev_get_tstats64,
.ndo_change_mtu = geneve_change_mtu,
.ndo_validate_addr = eth_validate_addr,
.ndo_set_mac_address = eth_mac_addr,
@@ -1166,8 +1159,8 @@ static const struct net_device_ops geneve_netdev_ops = {
static void geneve_get_drvinfo(struct net_device *dev,
struct ethtool_drvinfo *drvinfo)
{
- strlcpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
- strlcpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
+ strscpy(drvinfo->version, GENEVE_NETDEV_VER, sizeof(drvinfo->version));
+ strscpy(drvinfo->driver, "geneve", sizeof(drvinfo->driver));
}
static const struct ethtool_ops geneve_ethtool_ops = {
@@ -1176,7 +1169,7 @@ static const struct ethtool_ops geneve_ethtool_ops = {
};
/* Info for udev, that this is a virtual tunnel endpoint */
-static struct device_type geneve_type = {
+static const struct device_type geneve_type = {
.name = "geneve",
};
@@ -1190,8 +1183,9 @@ static void geneve_offload_rx_ports(struct net_device *dev, bool push)
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_sock *gs;
- rcu_read_lock();
- list_for_each_entry_rcu(gs, &gn->sock_list, list) {
+ ASSERT_RTNL();
+
+ list_for_each_entry(gs, &gn->sock_list, list) {
if (push) {
udp_tunnel_push_rx_port(dev, gs->sock,
UDP_TUNNEL_TYPE_GENEVE);
@@ -1200,7 +1194,6 @@ static void geneve_offload_rx_ports(struct net_device *dev, bool push)
UDP_TUNNEL_TYPE_GENEVE);
}
}
- rcu_read_unlock();
}
/* Initialize the device structure. */
@@ -1214,7 +1207,6 @@ static void geneve_setup(struct net_device *dev)
SET_NETDEV_DEVTYPE(dev, &geneve_type);
- dev->features |= NETIF_F_LLTX;
dev->features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_FRAGLIST;
dev->features |= NETIF_F_RXCSUM;
dev->features |= NETIF_F_GSO_SOFTWARE;
@@ -1223,6 +1215,7 @@ static void geneve_setup(struct net_device *dev)
dev->hw_features |= NETIF_F_RXCSUM;
dev->hw_features |= NETIF_F_GSO_SOFTWARE;
+ dev->pcpu_stat_type = NETDEV_PCPU_STAT_DSTATS;
/* MTU range: 68 - (something less than 65535) */
dev->min_mtu = ETH_MIN_MTU;
/* The max_mtu calculation does not take account of GENEVE
@@ -1234,10 +1227,12 @@ static void geneve_setup(struct net_device *dev)
netif_keep_dst(dev);
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_NO_QUEUE;
+ dev->lltx = true;
eth_hw_addr_random(dev);
}
static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
+ [IFLA_GENEVE_UNSPEC] = { .strict_start_type = IFLA_GENEVE_INNER_PROTO_INHERIT },
[IFLA_GENEVE_ID] = { .type = NLA_U32 },
[IFLA_GENEVE_REMOTE] = { .len = sizeof_field(struct iphdr, daddr) },
[IFLA_GENEVE_REMOTE6] = { .len = sizeof(struct in6_addr) },
@@ -1251,6 +1246,8 @@ static const struct nla_policy geneve_policy[IFLA_GENEVE_MAX + 1] = {
[IFLA_GENEVE_UDP_ZERO_CSUM6_RX] = { .type = NLA_U8 },
[IFLA_GENEVE_TTL_INHERIT] = { .type = NLA_U8 },
[IFLA_GENEVE_DF] = { .type = NLA_U8 },
+ [IFLA_GENEVE_INNER_PROTO_INHERIT] = { .type = NLA_FLAG },
+ [IFLA_GENEVE_PORT_RANGE] = NLA_POLICY_EXACT_LEN(sizeof(struct ifla_geneve_port_range)),
};
static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
@@ -1296,6 +1293,17 @@ static int geneve_validate(struct nlattr *tb[], struct nlattr *data[],
}
}
+ if (data[IFLA_GENEVE_PORT_RANGE]) {
+ const struct ifla_geneve_port_range *p;
+
+ p = nla_data(data[IFLA_GENEVE_PORT_RANGE]);
+ if (ntohs(p->high) < ntohs(p->low)) {
+ NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_PORT_RANGE],
+ "Invalid source port range");
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -1323,7 +1331,8 @@ static struct geneve_dev *geneve_find_dev(struct geneve_net *gn,
static bool is_tnl_info_zero(const struct ip_tunnel_info *info)
{
- return !(info->key.tun_id || info->key.tun_flags || info->key.tos ||
+ return !(info->key.tun_id || info->key.tos ||
+ !ip_tunnel_flags_empty(info->key.tun_flags) ||
info->key.ttl || info->key.label || info->key.tp_src ||
memchr_inv(&info->key.u, 0, sizeof(info->key.u)));
}
@@ -1388,6 +1397,14 @@ static int geneve_configure(struct net *net, struct net_device *dev,
dst_cache_reset(&geneve->cfg.info.dst_cache);
memcpy(&geneve->cfg, cfg, sizeof(*cfg));
+ if (geneve->cfg.inner_proto_inherit) {
+ dev->header_ops = NULL;
+ dev->type = ARPHRD_NONE;
+ dev->hard_header_len = 0;
+ dev->addr_len = 0;
+ dev->flags = IFF_POINTOPOINT | IFF_NOARP;
+ }
+
err = register_netdevice(dev);
if (err)
return err;
@@ -1453,7 +1470,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
"Remote IPv6 address cannot be Multicast");
return -EINVAL;
}
- info->key.tun_flags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
cfg->use_udp6_rx_checksums = true;
#else
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_REMOTE6],
@@ -1514,6 +1531,18 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
info->key.tp_dst = nla_get_be16(data[IFLA_GENEVE_PORT]);
}
+ if (data[IFLA_GENEVE_PORT_RANGE]) {
+ const struct ifla_geneve_port_range *p;
+
+ if (changelink) {
+ attrtype = IFLA_GENEVE_PORT_RANGE;
+ goto change_notsup;
+ }
+ p = nla_data(data[IFLA_GENEVE_PORT_RANGE]);
+ cfg->port_min = ntohs(p->low);
+ cfg->port_max = ntohs(p->high);
+ }
+
if (data[IFLA_GENEVE_COLLECT_METADATA]) {
if (changelink) {
attrtype = IFLA_GENEVE_COLLECT_METADATA;
@@ -1528,7 +1557,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
goto change_notsup;
}
if (nla_get_u8(data[IFLA_GENEVE_UDP_CSUM]))
- info->key.tun_flags |= TUNNEL_CSUM;
+ __set_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
}
if (data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]) {
@@ -1538,7 +1567,7 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
goto change_notsup;
}
if (nla_get_u8(data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX]))
- info->key.tun_flags &= ~TUNNEL_CSUM;
+ __clear_bit(IP_TUNNEL_CSUM_BIT, info->key.tun_flags);
#else
NL_SET_ERR_MSG_ATTR(extack, data[IFLA_GENEVE_UDP_ZERO_CSUM6_TX],
"IPv6 support not enabled in the kernel");
@@ -1561,10 +1590,18 @@ static int geneve_nl2info(struct nlattr *tb[], struct nlattr *data[],
#endif
}
+ if (data[IFLA_GENEVE_INNER_PROTO_INHERIT]) {
+ if (changelink) {
+ attrtype = IFLA_GENEVE_INNER_PROTO_INHERIT;
+ goto change_notsup;
+ }
+ cfg->inner_proto_inherit = true;
+ }
+
return 0;
change_notsup:
NL_SET_ERR_MSG_ATTR(extack, data[attrtype],
- "Changing VNI, Port, endpoint IP address family, external, and UDP checksum attributes are not supported");
+ "Changing VNI, Port, endpoint IP address family, external, inner_proto_inherit, and UDP checksum attributes are not supported");
return -EOPNOTSUPP;
}
@@ -1614,15 +1651,20 @@ static void geneve_link_config(struct net_device *dev,
geneve_change_mtu(dev, ldev_mtu - info->options_len);
}
-static int geneve_newlink(struct net *net, struct net_device *dev,
- struct nlattr *tb[], struct nlattr *data[],
+static int geneve_newlink(struct net_device *dev,
+ struct rtnl_newlink_params *params,
struct netlink_ext_ack *extack)
{
+ struct net *link_net = rtnl_newlink_link_net(params);
+ struct nlattr **data = params->data;
+ struct nlattr **tb = params->tb;
struct geneve_config cfg = {
.df = GENEVE_DF_UNSET,
.use_udp6_rx_checksums = false,
.ttl_inherit = false,
.collect_md = false,
+ .port_min = 1,
+ .port_max = USHRT_MAX,
};
int err;
@@ -1631,7 +1673,7 @@ static int geneve_newlink(struct net *net, struct net_device *dev,
if (err)
return err;
- err = geneve_configure(net, dev, extack, &cfg);
+ err = geneve_configure(link_net, dev, extack, &cfg);
if (err)
return err;
@@ -1740,6 +1782,8 @@ static size_t geneve_get_size(const struct net_device *dev)
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_TX */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_UDP_ZERO_CSUM6_RX */
nla_total_size(sizeof(__u8)) + /* IFLA_GENEVE_TTL_INHERIT */
+ nla_total_size(0) + /* IFLA_GENEVE_INNER_PROTO_INHERIT */
+ nla_total_size(sizeof(struct ifla_geneve_port_range)) + /* IFLA_GENEVE_PORT_RANGE */
0;
}
@@ -1749,6 +1793,10 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
struct ip_tunnel_info *info = &geneve->cfg.info;
bool ttl_inherit = geneve->cfg.ttl_inherit;
bool metadata = geneve->cfg.collect_md;
+ struct ifla_geneve_port_range ports = {
+ .low = htons(geneve->cfg.port_min),
+ .high = htons(geneve->cfg.port_max),
+ };
__u8 tmp_vni[3];
__u32 vni;
@@ -1762,7 +1810,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
info->key.u.ipv4.dst))
goto nla_put_failure;
if (nla_put_u8(skb, IFLA_GENEVE_UDP_CSUM,
- !!(info->key.tun_flags & TUNNEL_CSUM)))
+ test_bit(IP_TUNNEL_CSUM_BIT,
+ info->key.tun_flags)))
goto nla_put_failure;
#if IS_ENABLED(CONFIG_IPV6)
@@ -1771,7 +1820,8 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
&info->key.u.ipv6.dst))
goto nla_put_failure;
if (nla_put_u8(skb, IFLA_GENEVE_UDP_ZERO_CSUM6_TX,
- !(info->key.tun_flags & TUNNEL_CSUM)))
+ !test_bit(IP_TUNNEL_CSUM_BIT,
+ info->key.tun_flags)))
goto nla_put_failure;
#endif
}
@@ -1799,6 +1849,13 @@ static int geneve_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (nla_put_u8(skb, IFLA_GENEVE_TTL_INHERIT, ttl_inherit))
goto nla_put_failure;
+ if (geneve->cfg.inner_proto_inherit &&
+ nla_put_flag(skb, IFLA_GENEVE_INNER_PROTO_INHERIT))
+ goto nla_put_failure;
+
+ if (nla_put(skb, IFLA_GENEVE_PORT_RANGE, sizeof(ports), &ports))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -1831,6 +1888,8 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
.use_udp6_rx_checksums = true,
.ttl_inherit = false,
.collect_md = true,
+ .port_min = 1,
+ .port_max = USHRT_MAX,
};
memset(tb, 0, sizeof(tb));
@@ -1853,7 +1912,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name,
if (err)
goto err;
- err = rtnl_configure_link(dev, NULL);
+ err = rtnl_configure_link(dev, NULL, 0, NULL);
if (err < 0)
goto err;
@@ -1891,50 +1950,27 @@ static __net_init int geneve_init_net(struct net *net)
return 0;
}
-static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
+static void __net_exit geneve_exit_rtnl_net(struct net *net,
+ struct list_head *dev_to_kill)
{
struct geneve_net *gn = net_generic(net, geneve_net_id);
struct geneve_dev *geneve, *next;
- struct net_device *dev, *aux;
-
- /* gather any geneve devices that were moved into this ns */
- for_each_netdev_safe(net, dev, aux)
- if (dev->rtnl_link_ops == &geneve_link_ops)
- unregister_netdevice_queue(dev, head);
- /* now gather any other geneve devices that were created in this ns */
- list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
- /* If geneve->dev is in the same netns, it was already added
- * to the list by the previous loop.
- */
- if (!net_eq(dev_net(geneve->dev), net))
- unregister_netdevice_queue(geneve->dev, head);
- }
+ list_for_each_entry_safe(geneve, next, &gn->geneve_list, next)
+ geneve_dellink(geneve->dev, dev_to_kill);
}
-static void __net_exit geneve_exit_batch_net(struct list_head *net_list)
+static void __net_exit geneve_exit_net(struct net *net)
{
- struct net *net;
- LIST_HEAD(list);
-
- rtnl_lock();
- list_for_each_entry(net, net_list, exit_list)
- geneve_destroy_tunnels(net, &list);
+ const struct geneve_net *gn = net_generic(net, geneve_net_id);
- /* unregister the devices gathered above */
- unregister_netdevice_many(&list);
- rtnl_unlock();
-
- list_for_each_entry(net, net_list, exit_list) {
- const struct geneve_net *gn = net_generic(net, geneve_net_id);
-
- WARN_ON_ONCE(!list_empty(&gn->sock_list));
- }
+ WARN_ON_ONCE(!list_empty(&gn->sock_list));
}
static struct pernet_operations geneve_net_ops = {
.init = geneve_init_net,
- .exit_batch = geneve_exit_batch_net,
+ .exit_rtnl = geneve_exit_rtnl_net,
+ .exit = geneve_exit_net,
.id = &geneve_net_id,
.size = sizeof(struct geneve_net),
};