summaryrefslogtreecommitdiff
path: root/drivers/net/vxlan/vxlan_core.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/vxlan/vxlan_core.c')
-rw-r--r--drivers/net/vxlan/vxlan_core.c222
1 files changed, 153 insertions, 69 deletions
diff --git a/drivers/net/vxlan/vxlan_core.c b/drivers/net/vxlan/vxlan_core.c
index 78744549c1b3..e463f59e95c2 100644
--- a/drivers/net/vxlan/vxlan_core.c
+++ b/drivers/net/vxlan/vxlan_core.c
@@ -623,6 +623,32 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
return 1;
}
+static bool vxlan_parse_gpe_proto(struct vxlanhdr *hdr, __be16 *protocol)
+{
+ struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)hdr;
+
+ /* Need to have Next Protocol set for interfaces in GPE mode. */
+ if (!gpe->np_applied)
+ return false;
+ /* "The initial version is 0. If a receiver does not support the
+ * version indicated it MUST drop the packet.
+ */
+ if (gpe->version != 0)
+ return false;
+ /* "When the O bit is set to 1, the packet is an OAM packet and OAM
+ * processing MUST occur." However, we don't implement OAM
+ * processing, thus drop the packet.
+ */
+ if (gpe->oam_flag)
+ return false;
+
+ *protocol = tun_p_to_eth_p(gpe->next_protocol);
+ if (!*protocol)
+ return false;
+
+ return true;
+}
+
static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
unsigned int off,
struct vxlanhdr *vh, size_t hdrlen,
@@ -649,26 +675,24 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
return vh;
}
-static struct sk_buff *vxlan_gro_receive(struct sock *sk,
- struct list_head *head,
- struct sk_buff *skb)
+static struct vxlanhdr *vxlan_gro_prepare_receive(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb,
+ struct gro_remcsum *grc)
{
- struct sk_buff *pp = NULL;
struct sk_buff *p;
struct vxlanhdr *vh, *vh2;
unsigned int hlen, off_vx;
- int flush = 1;
struct vxlan_sock *vs = rcu_dereference_sk_user_data(sk);
__be32 flags;
- struct gro_remcsum grc;
- skb_gro_remcsum_init(&grc);
+ skb_gro_remcsum_init(grc);
off_vx = skb_gro_offset(skb);
hlen = off_vx + sizeof(*vh);
vh = skb_gro_header(skb, hlen, off_vx);
if (unlikely(!vh))
- goto out;
+ return NULL;
skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
@@ -676,12 +700,12 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk,
if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
- vh->vx_vni, &grc,
+ vh->vx_vni, grc,
!!(vs->flags &
VXLAN_F_REMCSUM_NOPARTIAL));
if (!vh)
- goto out;
+ return NULL;
}
skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
@@ -698,12 +722,48 @@ static struct sk_buff *vxlan_gro_receive(struct sock *sk,
}
}
- pp = call_gro_receive(eth_gro_receive, head, skb);
- flush = 0;
+ return vh;
+}
-out:
+static struct sk_buff *vxlan_gro_receive(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb)
+{
+ struct sk_buff *pp = NULL;
+ struct gro_remcsum grc;
+ int flush = 1;
+
+ if (vxlan_gro_prepare_receive(sk, head, skb, &grc)) {
+ pp = call_gro_receive(eth_gro_receive, head, skb);
+ flush = 0;
+ }
skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
+ return pp;
+}
+static struct sk_buff *vxlan_gpe_gro_receive(struct sock *sk,
+ struct list_head *head,
+ struct sk_buff *skb)
+{
+ const struct packet_offload *ptype;
+ struct sk_buff *pp = NULL;
+ struct gro_remcsum grc;
+ struct vxlanhdr *vh;
+ __be16 protocol;
+ int flush = 1;
+
+ vh = vxlan_gro_prepare_receive(sk, head, skb, &grc);
+ if (vh) {
+ if (!vxlan_parse_gpe_proto(vh, &protocol))
+ goto out;
+ ptype = gro_find_receive_by_type(protocol);
+ if (!ptype)
+ goto out;
+ pp = call_gro_receive(ptype->callbacks.gro_receive, head, skb);
+ flush = 0;
+ }
+out:
+ skb_gro_flush_final_remcsum(skb, pp, flush, &grc);
return pp;
}
@@ -715,6 +775,21 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
}
+static int vxlan_gpe_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
+{
+ struct vxlanhdr *vh = (struct vxlanhdr *)(skb->data + nhoff);
+ const struct packet_offload *ptype;
+ int err = -ENOSYS;
+ __be16 protocol;
+
+ if (!vxlan_parse_gpe_proto(vh, &protocol))
+ return err;
+ ptype = gro_find_complete_by_type(protocol);
+ if (ptype)
+ err = ptype->callbacks.gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
+ return err;
+}
+
static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, const u8 *mac,
__u16 state, __be32 src_vni,
__u16 ndm_flags)
@@ -1525,35 +1600,6 @@ out:
unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
}
-static bool vxlan_parse_gpe_hdr(struct vxlanhdr *unparsed,
- __be16 *protocol,
- struct sk_buff *skb, u32 vxflags)
-{
- struct vxlanhdr_gpe *gpe = (struct vxlanhdr_gpe *)unparsed;
-
- /* Need to have Next Protocol set for interfaces in GPE mode. */
- if (!gpe->np_applied)
- return false;
- /* "The initial version is 0. If a receiver does not support the
- * version indicated it MUST drop the packet.
- */
- if (gpe->version != 0)
- return false;
- /* "When the O bit is set to 1, the packet is an OAM packet and OAM
- * processing MUST occur." However, we don't implement OAM
- * processing, thus drop the packet.
- */
- if (gpe->oam_flag)
- return false;
-
- *protocol = tun_p_to_eth_p(gpe->next_protocol);
- if (!*protocol)
- return false;
-
- unparsed->vx_flags &= ~VXLAN_GPE_USED_BITS;
- return true;
-}
-
static bool vxlan_set_mac(struct vxlan_dev *vxlan,
struct vxlan_sock *vs,
struct sk_buff *skb, __be32 vni)
@@ -1655,8 +1701,9 @@ static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
* used by VXLAN extensions if explicitly requested.
*/
if (vs->flags & VXLAN_F_GPE) {
- if (!vxlan_parse_gpe_hdr(&unparsed, &protocol, skb, vs->flags))
+ if (!vxlan_parse_gpe_proto(&unparsed, &protocol))
goto drop;
+ unparsed.vx_flags &= ~VXLAN_GPE_USED_BITS;
raw_proto = true;
}
@@ -2281,14 +2328,11 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
struct vxlan_dev *dst_vxlan, __be32 vni,
bool snoop)
{
- struct pcpu_sw_netstats *tx_stats, *rx_stats;
union vxlan_addr loopback;
union vxlan_addr *remote_ip = &dst_vxlan->default_dst.remote_ip;
struct net_device *dev;
int len = skb->len;
- tx_stats = this_cpu_ptr(src_vxlan->dev->tstats);
- rx_stats = this_cpu_ptr(dst_vxlan->dev->tstats);
skb->pkt_type = PACKET_HOST;
skb->encapsulation = 0;
skb->dev = dst_vxlan->dev;
@@ -2314,17 +2358,11 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
if ((dst_vxlan->cfg.flags & VXLAN_F_LEARN) && snoop)
vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni);
- u64_stats_update_begin(&tx_stats->syncp);
- u64_stats_inc(&tx_stats->tx_packets);
- u64_stats_add(&tx_stats->tx_bytes, len);
- u64_stats_update_end(&tx_stats->syncp);
+ dev_sw_netstats_tx_add(src_vxlan->dev, 1, len);
vxlan_vnifilter_count(src_vxlan, vni, NULL, VXLAN_VNI_STATS_TX, len);
if (__netif_rx(skb) == NET_RX_SUCCESS) {
- u64_stats_update_begin(&rx_stats->syncp);
- u64_stats_inc(&rx_stats->rx_packets);
- u64_stats_add(&rx_stats->rx_bytes, len);
- u64_stats_update_end(&rx_stats->syncp);
+ dev_sw_netstats_rx_add(dst_vxlan->dev, len);
vxlan_vnifilter_count(dst_vxlan, vni, NULL, VXLAN_VNI_STATS_RX,
len);
} else {
@@ -2516,7 +2554,7 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
}
ndst = &rt->dst;
- err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM,
+ err = skb_tunnel_check_pmtu(skb, ndst, vxlan_headroom(flags & VXLAN_F_GPE),
netif_is_any_bridge_port(dev));
if (err < 0) {
goto tx_error;
@@ -2577,7 +2615,8 @@ void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto out_unlock;
}
- err = skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM,
+ err = skb_tunnel_check_pmtu(skb, ndst,
+ vxlan_headroom((flags & VXLAN_F_GPE) | VXLAN_F_IPV6),
netif_is_any_bridge_port(dev));
if (err < 0) {
goto tx_error;
@@ -2672,6 +2711,45 @@ drop:
dev_kfree_skb(skb);
}
+static netdev_tx_t vxlan_xmit_nhid(struct sk_buff *skb, struct net_device *dev,
+ u32 nhid, __be32 vni)
+{
+ struct vxlan_dev *vxlan = netdev_priv(dev);
+ struct vxlan_rdst nh_rdst;
+ struct nexthop *nh;
+ bool do_xmit;
+ u32 hash;
+
+ memset(&nh_rdst, 0, sizeof(struct vxlan_rdst));
+ hash = skb_get_hash(skb);
+
+ rcu_read_lock();
+ nh = nexthop_find_by_id(dev_net(dev), nhid);
+ if (unlikely(!nh || !nexthop_is_fdb(nh) || !nexthop_is_multipath(nh))) {
+ rcu_read_unlock();
+ goto drop;
+ }
+ do_xmit = vxlan_fdb_nh_path_select(nh, hash, &nh_rdst);
+ rcu_read_unlock();
+
+ if (vxlan->cfg.saddr.sa.sa_family != nh_rdst.remote_ip.sa.sa_family)
+ goto drop;
+
+ if (likely(do_xmit))
+ vxlan_xmit_one(skb, dev, vni, &nh_rdst, false);
+ else
+ goto drop;
+
+ return NETDEV_TX_OK;
+
+drop:
+ dev->stats.tx_dropped++;
+ vxlan_vnifilter_count(netdev_priv(dev), vni, NULL,
+ VXLAN_VNI_STATS_TX_DROPS, 0);
+ dev_kfree_skb(skb);
+ return NETDEV_TX_OK;
+}
+
/* Transmit local packets over Vxlan
*
* Outer IP header inherits ECN and DF from inner header.
@@ -2687,6 +2765,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
struct vxlan_fdb *f;
struct ethhdr *eth;
__be32 vni = 0;
+ u32 nhid = 0;
info = skb_tunnel_info(skb);
@@ -2696,6 +2775,7 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
if (info && info->mode & IP_TUNNEL_INFO_BRIDGE &&
info->mode & IP_TUNNEL_INFO_TX) {
vni = tunnel_id_to_key32(info->key.tun_id);
+ nhid = info->key.nhid;
} else {
if (info && info->mode & IP_TUNNEL_INFO_TX)
vxlan_xmit_one(skb, dev, vni, NULL, false);
@@ -2723,6 +2803,9 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
#endif
}
+ if (nhid)
+ return vxlan_xmit_nhid(skb, dev, nhid, vni);
+
if (vxlan->cfg.flags & VXLAN_F_MDB) {
struct vxlan_mdb_entry *mdb_entry;
@@ -2989,14 +3072,12 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
struct vxlan_rdst *dst = &vxlan->default_dst;
struct net_device *lowerdev = __dev_get_by_index(vxlan->net,
dst->remote_ifindex);
- bool use_ipv6 = !!(vxlan->cfg.flags & VXLAN_F_IPV6);
/* This check is different than dev->max_mtu, because it looks at
* the lowerdev->mtu, rather than the static dev->max_mtu
*/
if (lowerdev) {
- int max_mtu = lowerdev->mtu -
- (use_ipv6 ? VXLAN6_HEADROOM : VXLAN_HEADROOM);
+ int max_mtu = lowerdev->mtu - vxlan_headroom(vxlan->cfg.flags);
if (new_mtu > max_mtu)
return -EINVAL;
}
@@ -3379,8 +3460,13 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
tunnel_cfg.encap_rcv = vxlan_rcv;
tunnel_cfg.encap_err_lookup = vxlan_err_lookup;
tunnel_cfg.encap_destroy = NULL;
- tunnel_cfg.gro_receive = vxlan_gro_receive;
- tunnel_cfg.gro_complete = vxlan_gro_complete;
+ if (vs->flags & VXLAN_F_GPE) {
+ tunnel_cfg.gro_receive = vxlan_gpe_gro_receive;
+ tunnel_cfg.gro_complete = vxlan_gpe_gro_complete;
+ } else {
+ tunnel_cfg.gro_receive = vxlan_gro_receive;
+ tunnel_cfg.gro_complete = vxlan_gro_complete;
+ }
setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
@@ -3644,11 +3730,11 @@ static void vxlan_config_apply(struct net_device *dev,
struct vxlan_dev *vxlan = netdev_priv(dev);
struct vxlan_rdst *dst = &vxlan->default_dst;
unsigned short needed_headroom = ETH_HLEN;
- bool use_ipv6 = !!(conf->flags & VXLAN_F_IPV6);
int max_mtu = ETH_MAX_MTU;
+ u32 flags = conf->flags;
if (!changelink) {
- if (conf->flags & VXLAN_F_GPE)
+ if (flags & VXLAN_F_GPE)
vxlan_raw_setup(dev);
else
vxlan_ether_setup(dev);
@@ -3673,8 +3759,7 @@ static void vxlan_config_apply(struct net_device *dev,
dev->needed_tailroom = lowerdev->needed_tailroom;
- max_mtu = lowerdev->mtu - (use_ipv6 ? VXLAN6_HEADROOM :
- VXLAN_HEADROOM);
+ max_mtu = lowerdev->mtu - vxlan_headroom(flags);
if (max_mtu < ETH_MIN_MTU)
max_mtu = ETH_MIN_MTU;
@@ -3685,10 +3770,9 @@ static void vxlan_config_apply(struct net_device *dev,
if (dev->mtu > max_mtu)
dev->mtu = max_mtu;
- if (use_ipv6 || conf->flags & VXLAN_F_COLLECT_METADATA)
- needed_headroom += VXLAN6_HEADROOM;
- else
- needed_headroom += VXLAN_HEADROOM;
+ if (flags & VXLAN_F_COLLECT_METADATA)
+ flags |= VXLAN_F_IPV6;
+ needed_headroom += vxlan_headroom(flags);
dev->needed_headroom = needed_headroom;
memcpy(&vxlan->cfg, conf, sizeof(*conf));