summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/bareudp.c5
-rw-r--r--drivers/net/geneve.c55
-rw-r--r--drivers/net/vxlan.c47
-rw-r--r--include/linux/netdevice.h5
-rw-r--r--include/net/dst.h10
-rw-r--r--include/net/ip_tunnels.h2
-rw-r--r--net/ipv4/ip_tunnel_core.c244
-rw-r--r--net/ipv4/route.c5
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh346
9 files changed, 690 insertions, 29 deletions
diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c
index 3b6664c7e73c..841910f1db65 100644
--- a/drivers/net/bareudp.c
+++ b/drivers/net/bareudp.c
@@ -308,7 +308,7 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev,
return PTR_ERR(rt);
skb_tunnel_check_pmtu(skb, &rt->dst,
- BAREUDP_IPV4_HLEN + info->options_len);
+ BAREUDP_IPV4_HLEN + info->options_len, false);
sport = udp_flow_src_port(bareudp->net, skb,
bareudp->sport_min, USHRT_MAX,
@@ -369,7 +369,8 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (IS_ERR(dst))
return PTR_ERR(dst);
- skb_tunnel_check_pmtu(skb, dst, BAREUDP_IPV6_HLEN + info->options_len);
+ skb_tunnel_check_pmtu(skb, dst, BAREUDP_IPV6_HLEN + info->options_len,
+ false);
sport = udp_flow_src_port(bareudp->net, skb,
bareudp->sport_min, USHRT_MAX,
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 017c13acc911..c71f994fbc73 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -893,8 +893,31 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (IS_ERR(rt))
return PTR_ERR(rt);
- skb_tunnel_check_pmtu(skb, &rt->dst,
- GENEVE_IPV4_HLEN + info->options_len);
+ err = skb_tunnel_check_pmtu(skb, &rt->dst,
+ GENEVE_IPV4_HLEN + info->options_len,
+ netif_is_any_bridge_port(dev));
+ if (err < 0) {
+ dst_release(&rt->dst);
+ return err;
+ } else if (err) {
+ struct ip_tunnel_info *info;
+
+ info = skb_tunnel_info(skb);
+ if (info) {
+ info->key.u.ipv4.dst = fl4.saddr;
+ info->key.u.ipv4.src = fl4.daddr;
+ }
+
+ if (!pskb_may_pull(skb, ETH_HLEN)) {
+ dst_release(&rt->dst);
+ return -EINVAL;
+ }
+
+ skb->protocol = eth_type_trans(skb, geneve->dev);
+ netif_rx(skb);
+ dst_release(&rt->dst);
+ return -EMSGSIZE;
+ }
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
if (geneve->cfg.collect_md) {
@@ -955,7 +978,30 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (IS_ERR(dst))
return PTR_ERR(dst);
- skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len);
+ err = skb_tunnel_check_pmtu(skb, dst,
+ GENEVE_IPV6_HLEN + info->options_len,
+ netif_is_any_bridge_port(dev));
+ if (err < 0) {
+ dst_release(dst);
+ return err;
+ } else if (err) {
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+
+ if (info) {
+ info->key.u.ipv6.dst = fl6.saddr;
+ info->key.u.ipv6.src = fl6.daddr;
+ }
+
+ if (!pskb_may_pull(skb, ETH_HLEN)) {
+ dst_release(dst);
+ return -EINVAL;
+ }
+
+ skb->protocol = eth_type_trans(skb, geneve->dev);
+ netif_rx(skb);
+ dst_release(dst);
+ return -EMSGSIZE;
+ }
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
if (geneve->cfg.collect_md) {
@@ -1012,7 +1058,8 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
if (likely(!err))
return NETDEV_TX_OK;
- dev_kfree_skb(skb);
+ if (err != -EMSGSIZE)
+ dev_kfree_skb(skb);
if (err == -ELOOP)
dev->stats.collisions++;
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 77658425db8a..6d5816be6131 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2500,7 +2500,8 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
/* Bypass encapsulation if the destination is local */
static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
- struct vxlan_dev *dst_vxlan, __be32 vni)
+ struct vxlan_dev *dst_vxlan, __be32 vni,
+ bool snoop)
{
struct pcpu_sw_netstats *tx_stats, *rx_stats;
union vxlan_addr loopback;
@@ -2532,7 +2533,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
goto drop;
}
- if (dst_vxlan->cfg.flags & VXLAN_F_LEARN)
+ if ((dst_vxlan->cfg.flags & VXLAN_F_LEARN) && snoop)
vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni);
u64_stats_update_begin(&tx_stats->syncp);
@@ -2581,7 +2582,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
return -ENOENT;
}
- vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni);
+ vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
return 1;
}
@@ -2617,7 +2618,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (vxlan_addr_any(dst)) {
if (did_rsc) {
/* short-circuited back to local bridge */
- vxlan_encap_bypass(skb, vxlan, vxlan, default_vni);
+ vxlan_encap_bypass(skb, vxlan, vxlan,
+ default_vni, true);
return;
}
goto drop;
@@ -2720,7 +2722,23 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
}
ndst = &rt->dst;
- skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM);
+ err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM,
+ netif_is_any_bridge_port(dev));
+ if (err < 0) {
+ goto tx_error;
+ } else if (err) {
+ if (info) {
+ struct in_addr src, dst;
+
+ src = remote_ip.sin.sin_addr;
+ dst = local_ip.sin.sin_addr;
+ info->key.u.ipv4.src = src.s_addr;
+ info->key.u.ipv4.dst = dst.s_addr;
+ }
+ vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
+ dst_release(ndst);
+ goto out_unlock;
+ }
tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
@@ -2760,7 +2778,24 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto out_unlock;
}
- skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM);
+ err = skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM,
+ netif_is_any_bridge_port(dev));
+ if (err < 0) {
+ goto tx_error;
+ } else if (err) {
+ if (info) {
+ struct in6_addr src, dst;
+
+ src = remote_ip.sin6.sin6_addr;
+ dst = local_ip.sin6.sin6_addr;
+ info->key.u.ipv6.src = src;
+ info->key.u.ipv6.dst = dst;
+ }
+
+ vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
+ dst_release(ndst);
+ goto out_unlock;
+ }
tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb);
ttl = ttl ? : ip6_dst_hoplimit(ndst);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 88d40b9abaa1..90444622b703 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4840,6 +4840,11 @@ static inline bool netif_is_ovs_port(const struct net_device *dev)
return dev->priv_flags & IFF_OVS_DATAPATH;
}
+static inline bool netif_is_any_bridge_port(const struct net_device *dev)
+{
+ return netif_is_bridge_port(dev) || netif_is_ovs_port(dev);
+}
+
static inline bool netif_is_team_master(const struct net_device *dev)
{
return dev->priv_flags & IFF_TEAM;
diff --git a/include/net/dst.h b/include/net/dst.h
index 852d8fb36ab7..6ae2e625050d 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -535,14 +535,4 @@ static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu)
dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
}
-static inline void skb_tunnel_check_pmtu(struct sk_buff *skb,
- struct dst_entry *encap_dst,
- int headroom)
-{
- u32 encap_mtu = dst_mtu(encap_dst);
-
- if (skb->len > encap_mtu - headroom)
- skb_dst_update_pmtu_no_confirm(skb, encap_mtu - headroom);
-}
-
#endif /* _NET_DST_H */
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 36025dea7612..02ccd32542d0 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -420,6 +420,8 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb,
u8 tos, u8 ttl, __be16 df, bool xnet);
struct metadata_dst *iptunnel_metadata_reply(struct metadata_dst *md,
gfp_t flags);
+int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst,
+ int headroom, bool reply);
int iptunnel_handle_offloads(struct sk_buff *skb, int gso_type_mask);
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index f8b419e2475c..9ddee2a0c66d 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -184,6 +184,250 @@ int iptunnel_handle_offloads(struct sk_buff *skb,
}
EXPORT_SYMBOL_GPL(iptunnel_handle_offloads);
+/**
+ * iptunnel_pmtud_build_icmp() - Build ICMP error message for PMTUD
+ * @skb: Original packet with L2 header
+ * @mtu: MTU value for ICMP error
+ *
+ * Return: length on success, negative error code if message couldn't be built.
+ */
+static int iptunnel_pmtud_build_icmp(struct sk_buff *skb, int mtu)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct icmphdr *icmph;
+ struct iphdr *niph;
+ struct ethhdr eh;
+ int len, err;
+
+ if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr)))
+ return -EINVAL;
+
+ skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN);
+ pskb_pull(skb, ETH_HLEN);
+ skb_reset_network_header(skb);
+
+ err = pskb_trim(skb, 576 - sizeof(*niph) - sizeof(*icmph));
+ if (err)
+ return err;
+
+ len = skb->len + sizeof(*icmph);
+ err = skb_cow(skb, sizeof(*niph) + sizeof(*icmph) + ETH_HLEN);
+ if (err)
+ return err;
+
+ icmph = skb_push(skb, sizeof(*icmph));
+ *icmph = (struct icmphdr) {
+ .type = ICMP_DEST_UNREACH,
+ .code = ICMP_FRAG_NEEDED,
+ .checksum = 0,
+ .un.frag.__unused = 0,
+ .un.frag.mtu = ntohs(mtu),
+ };
+ icmph->checksum = ip_compute_csum(icmph, len);
+ skb_reset_transport_header(skb);
+
+ niph = skb_push(skb, sizeof(*niph));
+ *niph = (struct iphdr) {
+ .ihl = sizeof(*niph) / 4u,
+ .version = 4,
+ .tos = 0,
+ .tot_len = htons(len + sizeof(*niph)),
+ .id = 0,
+ .frag_off = htons(IP_DF),
+ .ttl = iph->ttl,
+ .protocol = IPPROTO_ICMP,
+ .saddr = iph->daddr,
+ .daddr = iph->saddr,
+ };
+ ip_send_check(niph);
+ skb_reset_network_header(skb);
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ eth_header(skb, skb->dev, htons(eh.h_proto), eh.h_source, eh.h_dest, 0);
+ skb_reset_mac_header(skb);
+
+ return skb->len;
+}
+
+/**
+ * iptunnel_pmtud_check_icmp() - Trigger ICMP reply if needed and allowed
+ * @skb: Buffer being sent by encapsulation, L2 headers expected
+ * @mtu: Network MTU for path
+ *
+ * Return: 0 for no ICMP reply, length if built, negative value on error.
+ */
+static int iptunnel_pmtud_check_icmp(struct sk_buff *skb, int mtu)
+{
+ const struct icmphdr *icmph = icmp_hdr(skb);
+ const struct iphdr *iph = ip_hdr(skb);
+
+ if (mtu <= 576 || iph->frag_off != htons(IP_DF))
+ return 0;
+
+ if (ipv4_is_lbcast(iph->daddr) || ipv4_is_multicast(iph->daddr) ||
+ ipv4_is_zeronet(iph->saddr) || ipv4_is_loopback(iph->saddr) ||
+ ipv4_is_lbcast(iph->saddr) || ipv4_is_multicast(iph->saddr))
+ return 0;
+
+ if (iph->protocol == IPPROTO_ICMP && icmp_is_err(icmph->type))
+ return 0;
+
+ return iptunnel_pmtud_build_icmp(skb, mtu);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+/**
+ * iptunnel_pmtud_build_icmpv6() - Build ICMPv6 error message for PMTUD
+ * @skb: Original packet with L2 header
+ * @mtu: MTU value for ICMPv6 error
+ *
+ * Return: length on success, negative error code if message couldn't be built.
+ */
+static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ struct icmp6hdr *icmp6h;
+ struct ipv6hdr *nip6h;
+ struct ethhdr eh;
+ int len, err;
+ __wsum csum;
+
+ if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr)))
+ return -EINVAL;
+
+ skb_copy_bits(skb, skb_mac_offset(skb), &eh, ETH_HLEN);
+ pskb_pull(skb, ETH_HLEN);
+ skb_reset_network_header(skb);
+
+ err = pskb_trim(skb, IPV6_MIN_MTU - sizeof(*nip6h) - sizeof(*icmp6h));
+ if (err)
+ return err;
+
+ len = skb->len + sizeof(*icmp6h);
+ err = skb_cow(skb, sizeof(*nip6h) + sizeof(*icmp6h) + ETH_HLEN);
+ if (err)
+ return err;
+
+ icmp6h = skb_push(skb, sizeof(*icmp6h));
+ *icmp6h = (struct icmp6hdr) {
+ .icmp6_type = ICMPV6_PKT_TOOBIG,
+ .icmp6_code = 0,
+ .icmp6_cksum = 0,
+ .icmp6_mtu = htonl(mtu),
+ };
+ skb_reset_transport_header(skb);
+
+ nip6h = skb_push(skb, sizeof(*nip6h));
+ *nip6h = (struct ipv6hdr) {
+ .priority = 0,
+ .version = 6,
+ .flow_lbl = { 0 },
+ .payload_len = htons(len),
+ .nexthdr = IPPROTO_ICMPV6,
+ .hop_limit = ip6h->hop_limit,
+ .saddr = ip6h->daddr,
+ .daddr = ip6h->saddr,
+ };
+ skb_reset_network_header(skb);
+
+ csum = csum_partial(icmp6h, len, 0);
+ icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len,
+ IPPROTO_ICMPV6, csum);
+
+ skb->ip_summed = CHECKSUM_NONE;
+
+ eth_header(skb, skb->dev, htons(eh.h_proto), eh.h_source, eh.h_dest, 0);
+ skb_reset_mac_header(skb);
+
+ return skb->len;
+}
+
+/**
+ * iptunnel_pmtud_check_icmpv6() - Trigger ICMPv6 reply if needed and allowed
+ * @skb: Buffer being sent by encapsulation, L2 headers expected
+ * @mtu: Network MTU for path
+ *
+ * Return: 0 for no ICMPv6 reply, length if built, negative value on error.
+ */
+static int iptunnel_pmtud_check_icmpv6(struct sk_buff *skb, int mtu)
+{
+ const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+ int stype = ipv6_addr_type(&ip6h->saddr);
+ u8 proto = ip6h->nexthdr;
+ __be16 frag_off;
+ int offset;
+
+ if (mtu <= IPV6_MIN_MTU)
+ return 0;
+
+ if (stype == IPV6_ADDR_ANY || stype == IPV6_ADDR_MULTICAST ||
+ stype == IPV6_ADDR_LOOPBACK)
+ return 0;
+
+ offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &proto,
+ &frag_off);
+ if (offset < 0 || (frag_off & htons(~0x7)))
+ return 0;
+
+ if (proto == IPPROTO_ICMPV6) {
+ struct icmp6hdr *icmp6h;
+
+ if (!pskb_may_pull(skb, skb_network_header(skb) +
+ offset + 1 - skb->data))
+ return 0;
+
+ icmp6h = (struct icmp6hdr *)(skb_network_header(skb) + offset);
+ if (icmpv6_is_err(icmp6h->icmp6_type) ||
+ icmp6h->icmp6_type == NDISC_REDIRECT)
+ return 0;
+ }
+
+ return iptunnel_pmtud_build_icmpv6(skb, mtu);
+}
+#endif /* IS_ENABLED(CONFIG_IPV6) */
+
+/**
+ * skb_tunnel_check_pmtu() - Check, update PMTU and trigger ICMP reply as needed
+ * @skb: Buffer being sent by encapsulation, L2 headers expected
+ * @encap_dst: Destination for tunnel encapsulation (outer IP)
+ * @headroom: Encapsulation header size, bytes
+ * @reply: Build matching ICMP or ICMPv6 message as a result
+ *
+ * L2 tunnel implementations that can carry IP and can be directly bridged
+ * (currently UDP tunnels) can't always rely on IP forwarding paths to handle
+ * PMTU discovery. In the bridged case, ICMP or ICMPv6 messages need to be built
+ * based on payload and sent back by the encapsulation itself.
+ *
+ * For routable interfaces, we just need to update the PMTU for the destination.
+ *
+ * Return: 0 if ICMP error not needed, length if built, negative value on error
+ */
+int skb_tunnel_check_pmtu(struct sk_buff *skb, struct dst_entry *encap_dst,
+ int headroom, bool reply)
+{
+ u32 mtu = dst_mtu(encap_dst) - headroom;
+
+ if ((skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) ||
+ (!skb_is_gso(skb) && (skb->len - skb_mac_header_len(skb)) <= mtu))
+ return 0;
+
+ skb_dst_update_pmtu_no_confirm(skb, mtu);
+
+ if (!reply || skb->pkt_type == PACKET_HOST)
+ return 0;
+
+ if (skb->protocol == htons(ETH_P_IP))
+ return iptunnel_pmtud_check_icmp(skb, mtu);
+
+#if IS_ENABLED(CONFIG_IPV6)
+ if (skb->protocol == htons(ETH_P_IPV6))
+ return iptunnel_pmtud_check_icmpv6(skb, mtu);
+#endif
+ return 0;
+}
+EXPORT_SYMBOL(skb_tunnel_check_pmtu);
+
/* Often modified stats are per cpu, other are shared (netdev->stats) */
void ip_tunnel_get_stats64(struct net_device *dev,
struct rtnl_link_stats64 *tot)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a01efa062f6b..8ca6bcab7b03 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1050,6 +1050,11 @@ static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,
struct flowi4 fl4;
ip_rt_build_flow_key(&fl4, sk, skb);
+
+ /* Don't make lookup fail for bridged encapsulations */
+ if (skb && netif_is_any_bridge_port(skb->dev))
+ fl4.flowi4_oif = 0;
+
__ip_rt_update_pmtu(rt, &fl4, mtu);
}
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 77c09cd339c3..6bbf69a28e12 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -59,6 +59,45 @@
# Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of
# VXLAN
#
+# - pmtu_ipv{4,6}_br_vxlan{4,6}_exception
+# Set up three namespaces, A, B, and C, with routing between A and B over
+# R1. R2 is unused in these tests. A has a veth connection to C, and is
+# connected to B via a VXLAN endpoint, which is directly bridged to C.
+# MTU on the B-R1 link is lower than other MTUs.
+#
+# Check that both C and A are able to communicate with B over the VXLAN
+# tunnel, and that PMTU exceptions with the correct values are created.
+#
+# segment a_r1 segment b_r1 b_r1: 4000
+# .--------------R1--------------. everything
+# C---veth A B else: 5000
+# ' bridge |
+# '---- - - - - - VXLAN - - - - - - - '
+#
+# - pmtu_ipv{4,6}_br_geneve{4,6}_exception
+# Same as pmtu_ipv{4,6}_br_vxlan{4,6}_exception, with a GENEVE tunnel
+# instead.
+#
+# - pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception
+# Set up two namespaces, B, and C, with routing between the init namespace
+# and B over R1. A and R2 are unused in these tests. The init namespace
+# has a veth connection to C, and is connected to B via a VXLAN endpoint,
+# which is handled by Open vSwitch and bridged to C. MTU on the B-R1 link
+# is lower than other MTUs.
+#
+# Check that C is able to communicate with B over the VXLAN tunnel, and
+# that PMTU exceptions with the correct values are created.
+#
+# segment a_r1 segment b_r1 b_r1: 4000
+# .--------------R1--------------. everything
+# C---veth init B else: 5000
+# '- ovs |
+# '---- - - - - - VXLAN - - - - - - - '
+#
+# - pmtu_ipv{4,6}_ovs_geneve{4,6}_exception
+# Same as pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception, with a GENEVE tunnel
+# instead.
+#
# - pmtu_ipv{4,6}_fou{4,6}_exception
# Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation
# (FoU) over IPv4/IPv6, instead of VXLAN
@@ -147,6 +186,22 @@ tests="
pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions 1
pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions 1
pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions 1
+ pmtu_ipv4_br_vxlan4_exception IPv4, bridged vxlan4: PMTU exceptions 1
+ pmtu_ipv6_br_vxlan4_exception IPv6, bridged vxlan4: PMTU exceptions 1
+ pmtu_ipv4_br_vxlan6_exception IPv4, bridged vxlan6: PMTU exceptions 1
+ pmtu_ipv6_br_vxlan6_exception IPv6, bridged vxlan6: PMTU exceptions 1
+ pmtu_ipv4_br_geneve4_exception IPv4, bridged geneve4: PMTU exceptions 1
+ pmtu_ipv6_br_geneve4_exception IPv6, bridged geneve4: PMTU exceptions 1
+ pmtu_ipv4_br_geneve6_exception IPv4, bridged geneve6: PMTU exceptions 1
+ pmtu_ipv6_br_geneve6_exception IPv6, bridged geneve6: PMTU exceptions 1
+ pmtu_ipv4_ovs_vxlan4_exception IPv4, OVS vxlan4: PMTU exceptions 1
+ pmtu_ipv6_ovs_vxlan4_exception IPv6, OVS vxlan4: PMTU exceptions 1
+ pmtu_ipv4_ovs_vxlan6_exception IPv4, OVS vxlan6: PMTU exceptions 1
+ pmtu_ipv6_ovs_vxlan6_exception IPv6, OVS vxlan6: PMTU exceptions 1
+ pmtu_ipv4_ovs_geneve4_exception IPv4, OVS geneve4: PMTU exceptions 1
+ pmtu_ipv6_ovs_geneve4_exception IPv6, OVS geneve4: PMTU exceptions 1
+ pmtu_ipv4_ovs_geneve6_exception IPv4, OVS geneve6: PMTU exceptions 1
+ pmtu_ipv6_ovs_geneve6_exception IPv6, OVS geneve6: PMTU exceptions 1
pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions 1
pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions 1
pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions 1
@@ -173,10 +228,12 @@ tests="
NS_A="ns-A"
NS_B="ns-B"
+NS_C="ns-C"
NS_R1="ns-R1"
NS_R2="ns-R2"
ns_a="ip netns exec ${NS_A}"
ns_b="ip netns exec ${NS_B}"
+ns_c="ip netns exec ${NS_C}"
ns_r1="ip netns exec ${NS_R1}"
ns_r2="ip netns exec ${NS_R2}"
@@ -239,9 +296,11 @@ routes_nh="
veth4_a_addr="192.168.1.1"
veth4_b_addr="192.168.1.2"
+veth4_c_addr="192.168.2.10"
veth4_mask="24"
veth6_a_addr="fd00:1::a"
veth6_b_addr="fd00:1::b"
+veth6_c_addr="fd00:2::c"
veth6_mask="64"
tunnel4_a_addr="192.168.2.1"
@@ -428,7 +487,7 @@ setup_ip6ip6() {
}
setup_namespaces() {
- for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
+ for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
ip netns add ${n} || return 1
# Disable DAD, so that we don't have to wait to use the
@@ -484,6 +543,7 @@ setup_vxlan_or_geneve() {
a_addr="${2}"
b_addr="${3}"
opts="${4}"
+ br_if_a="${5}"
if [ "${type}" = "vxlan" ]; then
opts="${opts} ttl 64 dstport 4789"
@@ -497,10 +557,16 @@ setup_vxlan_or_geneve() {
run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
- run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
- run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
+ if [ -n "${br_if_a}" ]; then
+ run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${br_if_a}
+ run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${br_if_a}
+ run_cmd ${ns_a} ip link set ${type}_a master ${br_if_a}
+ else
+ run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
+ run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
+ fi
- run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
+ run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
run_cmd ${ns_a} ip link set ${type}_a up
@@ -516,11 +582,27 @@ setup_vxlan4() {
}
setup_geneve6() {
- setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+ setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ""
}
setup_vxlan6() {
- setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+ setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ""
+}
+
+setup_bridged_geneve4() {
+ setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" "br0"
+}
+
+setup_bridged_vxlan4() {
+ setup_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" "br0"
+}
+
+setup_bridged_geneve6() {
+ setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0"
+}
+
+setup_bridged_vxlan6() {
+ setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0"
}
setup_xfrm() {
@@ -630,6 +712,80 @@ setup_routing() {
return 0
}
+setup_bridge() {
+ run_cmd ${ns_a} ip link add br0 type bridge || return 2
+ run_cmd ${ns_a} ip link set br0 up
+
+ run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
+ run_cmd ${ns_c} ip link set veth_A-C netns ns-A
+
+ run_cmd ${ns_a} ip link set veth_A-C up
+ run_cmd ${ns_c} ip link set veth_C-A up
+ run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
+ run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
+ run_cmd ${ns_a} ip link set veth_A-C master br0
+}
+
+setup_ovs_vxlan_or_geneve() {
+ type="${1}"
+ a_addr="${2}"
+ b_addr="${3}"
+
+ if [ "${type}" = "vxlan" ]; then
+ opts="${opts} ttl 64 dstport 4789"
+ opts_b="local ${b_addr}"
+ fi
+
+ run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \
+ set interface ${type}_a type=${type} \
+ options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1
+
+ run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} || return 1
+
+ run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
+ run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
+
+ run_cmd ${ns_b} ip link set ${type}_b up
+}
+
+setup_ovs_geneve4() {
+ setup_ovs_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1
+}
+
+setup_ovs_vxlan4() {
+ setup_ovs_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1
+}
+
+setup_ovs_geneve6() {
+ setup_ovs_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+}
+
+setup_ovs_vxlan6() {
+ setup_ovs_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+}
+
+setup_ovs_bridge() {
+ run_cmd ovs-vsctl add-br ovs_br0 || return 2
+ run_cmd ip link set ovs_br0 up
+
+ run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
+ run_cmd ${ns_c} ip link set veth_A-C netns 1
+
+ run_cmd ip link set veth_A-C up
+ run_cmd ${ns_c} ip link set veth_C-A up
+ run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
+ run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
+ run_cmd ovs-vsctl add-port ovs_br0 veth_A-C
+
+ # Move veth_A-R1 to init
+ run_cmd ${ns_a} ip link set veth_A-R1 netns 1
+ run_cmd ip addr add ${prefix4}.${a_r1}.1/${veth4_mask} dev veth_A-R1
+ run_cmd ip addr add ${prefix6}:${a_r1}::1/${veth6_mask} dev veth_A-R1
+ run_cmd ip link set veth_A-R1 up
+ run_cmd ip route add ${prefix4}.${b_r1}.1 via ${prefix4}.${a_r1}.2
+ run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2
+}
+
setup() {
[ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
@@ -657,9 +813,14 @@ cleanup() {
done
tcpdump_pids=
- for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
+ for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
ip netns del ${n} 2> /dev/null
done
+
+ ip link del veth_A-C 2>/dev/null
+ ip link del veth_A-R1 2>/dev/null
+ ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null
+ ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null
}
mtu() {
@@ -892,6 +1053,177 @@ test_pmtu_ipv6_geneve6_exception() {
test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6
}
+test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
+ type=${1}
+ family=${2}
+ outer_family=${3}
+ ll_mtu=4000
+
+ if [ ${outer_family} -eq 4 ]; then
+ setup namespaces routing bridge bridged_${type}4 || return 2
+ # IPv4 header UDP header VXLAN/GENEVE header Ethernet header
+ exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
+ else
+ setup namespaces routing bridge bridged_${type}6 || return 2
+ # IPv6 header UDP header VXLAN/GENEVE header Ethernet header
+ exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
+ fi
+
+ trace "${ns_a}" ${type}_a "${ns_b}" ${type}_b \
+ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
+ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \
+ "${ns_a}" br0 "${ns_a}" veth-A-C \
+ "${ns_c}" veth_C-A
+
+ if [ ${family} -eq 4 ]; then
+ ping=ping
+ dst=${tunnel4_b_addr}
+ else
+ ping=${ping6}
+ dst=${tunnel6_b_addr}
+ fi
+
+ # Create route exception by exceeding link layer MTU
+ mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
+ mtu "${ns_a}" br0 $((${ll_mtu} + 1000))
+ mtu "${ns_a}" veth_A-C $((${ll_mtu} + 1000))
+ mtu "${ns_c}" veth_C-A $((${ll_mtu} + 1000))
+ mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+ mtu "${ns_b}" veth_B-R1 ${ll_mtu}
+ mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+ mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
+ mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
+
+ run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 10 -s $((${ll_mtu} + 500)) ${dst} || return 1
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} || return 1
+
+ # Check that exceptions were created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface"
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface"
+}
+
+test_pmtu_ipv4_br_vxlan4_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 4 4
+}
+
+test_pmtu_ipv6_br_vxlan4_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 6 4
+}
+
+test_pmtu_ipv4_br_geneve4_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 4
+}
+
+test_pmtu_ipv6_br_geneve4_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 4
+}
+
+test_pmtu_ipv4_br_vxlan6_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 4 6
+}
+
+test_pmtu_ipv6_br_vxlan6_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 6 6
+}
+
+test_pmtu_ipv4_br_geneve6_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 6
+}
+
+test_pmtu_ipv6_br_geneve6_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 6
+}
+
+test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() {
+ type=${1}
+ family=${2}
+ outer_family=${3}
+ ll_mtu=4000
+
+ if [ ${outer_family} -eq 4 ]; then
+ setup namespaces routing ovs_bridge ovs_${type}4 || return 2
+ # IPv4 header UDP header VXLAN/GENEVE header Ethernet header
+ exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
+ else
+ setup namespaces routing ovs_bridge ovs_${type}6 || return 2
+ # IPv6 header UDP header VXLAN/GENEVE header Ethernet header
+ exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
+ fi
+
+ if [ "${type}" = "vxlan" ]; then
+ tun_a="vxlan_sys_4789"
+ elif [ "${type}" = "geneve" ]; then
+ tun_a="genev_sys_6081"
+ fi
+
+ trace "" "${tun_a}" "${ns_b}" ${type}_b \
+ "" veth_A-R1 "${ns_r1}" veth_R1-A \
+ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \
+ "" ovs_br0 "" veth-A-C \
+ "${ns_c}" veth_C-A
+
+ if [ ${family} -eq 4 ]; then
+ ping=ping
+ dst=${tunnel4_b_addr}
+ else
+ ping=${ping6}
+ dst=${tunnel6_b_addr}
+ fi
+
+ # Create route exception by exceeding link layer MTU
+ mtu "" veth_A-R1 $((${ll_mtu} + 1000))
+ mtu "" ovs_br0 $((${ll_mtu} + 1000))
+ mtu "" veth_A-C $((${ll_mtu} + 1000))
+ mtu "${ns_c}" veth_C-A $((${ll_mtu} + 1000))
+ mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+ mtu "${ns_b}" veth_B-R1 ${ll_mtu}
+ mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+ mtu "" ${tun_a} $((${ll_mtu} + 1000))
+ mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
+
+ run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 20 -s $((${ll_mtu} + 500)) ${dst} || return 1
+
+ # Check that exceptions were created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on Open vSwitch ${type} interface"
+}
+
+test_pmtu_ipv4_ovs_vxlan4_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 4 4
+}
+
+test_pmtu_ipv6_ovs_vxlan4_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 6 4
+}
+
+test_pmtu_ipv4_ovs_geneve4_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 4
+}
+
+test_pmtu_ipv6_ovs_geneve4_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 4
+}
+
+test_pmtu_ipv4_ovs_vxlan6_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 4 6
+}
+
+test_pmtu_ipv6_ovs_vxlan6_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 6 6
+}
+
+test_pmtu_ipv4_ovs_geneve6_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 6
+}
+
+test_pmtu_ipv6_ovs_geneve6_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 6
+}
+
test_pmtu_ipvX_over_fouY_or_gueY() {
inner_family=${1}
outer_family=${2}