summaryrefslogtreecommitdiff
path: root/net/ipv6/seg6_iptunnel.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6/seg6_iptunnel.c')
-rw-r--r--net/ipv6/seg6_iptunnel.c271
1 files changed, 222 insertions, 49 deletions
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index d64855010948..3e1b9991131a 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -36,9 +36,11 @@ static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
case SEG6_IPTUN_MODE_INLINE:
break;
case SEG6_IPTUN_MODE_ENCAP:
+ case SEG6_IPTUN_MODE_ENCAP_RED:
head = sizeof(struct ipv6hdr);
break;
case SEG6_IPTUN_MODE_L2ENCAP:
+ case SEG6_IPTUN_MODE_L2ENCAP_RED:
return 0;
}
@@ -122,11 +124,12 @@ static __be32 seg6_make_flowlabel(struct net *net, struct sk_buff *skb,
return flowlabel;
}
-/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
-int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
+static int __seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+ int proto, struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
- struct net *net = dev_net(dst->dev);
+ struct net_device *dev = dst_dev(dst);
+ struct net *net = dev_net(dev);
struct ipv6hdr *hdr, *inner_hdr;
struct ipv6_sr_hdr *isrh;
int hdrlen, tot_len, err;
@@ -135,7 +138,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
hdrlen = (osrh->hdrlen + 1) << 3;
tot_len = hdrlen + sizeof(*hdr);
- err = skb_cow_head(skb, tot_len + skb->mac_len);
+ err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -179,7 +182,7 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
isrh->nexthdr = proto;
hdr->daddr = isrh->segments[isrh->first_segment];
- set_tun_src(net, dst->dev, &hdr->daddr, &hdr->saddr);
+ set_tun_src(net, dev, &hdr->daddr, &hdr->saddr);
#ifdef CONFIG_IPV6_SEG6_HMAC
if (sr_has_hmac(isrh)) {
@@ -189,14 +192,142 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
}
#endif
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
skb_postpush_rcsum(skb, hdr, tot_len);
return 0;
}
+
+/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
+int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
+{
+ return __seg6_do_srh_encap(skb, osrh, proto, NULL);
+}
EXPORT_SYMBOL_GPL(seg6_do_srh_encap);
-/* insert an SRH within an IPv6 packet, just after the IPv6 header */
-int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+/* encapsulate an IPv6 packet within an outer IPv6 header with reduced SRH */
+static int seg6_do_srh_encap_red(struct sk_buff *skb,
+ struct ipv6_sr_hdr *osrh, int proto,
+ struct dst_entry *cache_dst)
+{
+ __u8 first_seg = osrh->first_segment;
+ struct dst_entry *dst = skb_dst(skb);
+ struct net_device *dev = dst_dev(dst);
+ struct net *net = dev_net(dev);
+ struct ipv6hdr *hdr, *inner_hdr;
+ int hdrlen = ipv6_optlen(osrh);
+ int red_tlv_offset, tlv_offset;
+ struct ipv6_sr_hdr *isrh;
+ bool skip_srh = false;
+ __be32 flowlabel;
+ int tot_len, err;
+ int red_hdrlen;
+ int tlvs_len;
+
+ if (first_seg > 0) {
+ red_hdrlen = hdrlen - sizeof(struct in6_addr);
+ } else {
+ /* NOTE: if tag/flags and/or other TLVs are introduced in the
+ * seg6_iptunnel infrastructure, they should be considered when
+ * deciding to skip the SRH.
+ */
+ skip_srh = !sr_has_hmac(osrh);
+
+ red_hdrlen = skip_srh ? 0 : hdrlen;
+ }
+
+ tot_len = red_hdrlen + sizeof(struct ipv6hdr);
+
+ err = skb_cow_head(skb, tot_len + dst_dev_overhead(cache_dst, skb));
+ if (unlikely(err))
+ return err;
+
+ inner_hdr = ipv6_hdr(skb);
+ flowlabel = seg6_make_flowlabel(net, skb, inner_hdr);
+
+ skb_push(skb, tot_len);
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+ hdr = ipv6_hdr(skb);
+
+ /* based on seg6_do_srh_encap() */
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
+ flowlabel);
+ hdr->hop_limit = inner_hdr->hop_limit;
+ } else {
+ ip6_flow_hdr(hdr, 0, flowlabel);
+ hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+
+ memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+ IP6CB(skb)->iif = skb->skb_iif;
+ }
+
+ /* no matter if we have to skip the SRH or not, the first segment
+ * always comes in the pushed IPv6 header.
+ */
+ hdr->daddr = osrh->segments[first_seg];
+
+ if (skip_srh) {
+ hdr->nexthdr = proto;
+
+ set_tun_src(net, dev, &hdr->daddr, &hdr->saddr);
+ goto out;
+ }
+
+ /* we cannot skip the SRH, slow path */
+
+ hdr->nexthdr = NEXTHDR_ROUTING;
+ isrh = (void *)hdr + sizeof(struct ipv6hdr);
+
+ if (unlikely(!first_seg)) {
+ /* this is a very rare case; we have only one SID but
+ * we cannot skip the SRH since we are carrying some
+ * other info.
+ */
+ memcpy(isrh, osrh, hdrlen);
+ goto srcaddr;
+ }
+
+ tlv_offset = sizeof(*osrh) + (first_seg + 1) * sizeof(struct in6_addr);
+ red_tlv_offset = tlv_offset - sizeof(struct in6_addr);
+
+ memcpy(isrh, osrh, red_tlv_offset);
+
+ tlvs_len = hdrlen - tlv_offset;
+ if (unlikely(tlvs_len > 0)) {
+ const void *s = (const void *)osrh + tlv_offset;
+ void *d = (void *)isrh + red_tlv_offset;
+
+ memcpy(d, s, tlvs_len);
+ }
+
+ --isrh->first_segment;
+ isrh->hdrlen -= 2;
+
+srcaddr:
+ isrh->nexthdr = proto;
+ set_tun_src(net, dev, &hdr->daddr, &hdr->saddr);
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ if (unlikely(!skip_srh && sr_has_hmac(isrh))) {
+ err = seg6_push_hmac(net, &hdr->saddr, isrh);
+ if (unlikely(err))
+ return err;
+ }
+#endif
+
+out:
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
+ skb_postpush_rcsum(skb, hdr, tot_len);
+
+ return 0;
+}
+
+static int __seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+ struct dst_entry *cache_dst)
{
struct ipv6hdr *hdr, *oldhdr;
struct ipv6_sr_hdr *isrh;
@@ -204,7 +335,7 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
hdrlen = (osrh->hdrlen + 1) << 3;
- err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ err = skb_cow_head(skb, hdrlen + dst_dev_overhead(cache_dst, skb));
if (unlikely(err))
return err;
@@ -233,7 +364,7 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
#ifdef CONFIG_IPV6_SEG6_HMAC
if (sr_has_hmac(isrh)) {
- struct net *net = dev_net(skb_dst(skb)->dev);
+ struct net *net = skb_dst_dev_net(skb);
err = seg6_push_hmac(net, &hdr->saddr, isrh);
if (unlikely(err))
@@ -241,13 +372,14 @@ int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
}
#endif
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
return 0;
}
-EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
-static int seg6_do_srh(struct sk_buff *skb)
+static int seg6_do_srh(struct sk_buff *skb, struct dst_entry *cache_dst)
{
struct dst_entry *dst = skb_dst(skb);
struct seg6_iptunnel_encap *tinfo;
@@ -260,11 +392,12 @@ static int seg6_do_srh(struct sk_buff *skb)
if (skb->protocol != htons(ETH_P_IPV6))
return -EINVAL;
- err = seg6_do_srh_inline(skb, tinfo->srh);
+ err = __seg6_do_srh_inline(skb, tinfo->srh, cache_dst);
if (err)
return err;
break;
case SEG6_IPTUN_MODE_ENCAP:
+ case SEG6_IPTUN_MODE_ENCAP_RED:
err = iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6);
if (err)
return err;
@@ -276,7 +409,13 @@ static int seg6_do_srh(struct sk_buff *skb)
else
return -EINVAL;
- err = seg6_do_srh_encap(skb, tinfo->srh, proto);
+ if (tinfo->mode == SEG6_IPTUN_MODE_ENCAP)
+ err = __seg6_do_srh_encap(skb, tinfo->srh,
+ proto, cache_dst);
+ else
+ err = seg6_do_srh_encap_red(skb, tinfo->srh,
+ proto, cache_dst);
+
if (err)
return err;
@@ -285,6 +424,7 @@ static int seg6_do_srh(struct sk_buff *skb)
skb->protocol = htons(ETH_P_IPV6);
break;
case SEG6_IPTUN_MODE_L2ENCAP:
+ case SEG6_IPTUN_MODE_L2ENCAP_RED:
if (!skb_mac_header_was_set(skb))
return -EINVAL;
@@ -294,7 +434,15 @@ static int seg6_do_srh(struct sk_buff *skb)
skb_mac_header_rebuild(skb);
skb_push(skb, skb->mac_len);
- err = seg6_do_srh_encap(skb, tinfo->srh, IPPROTO_ETHERNET);
+ if (tinfo->mode == SEG6_IPTUN_MODE_L2ENCAP)
+ err = __seg6_do_srh_encap(skb, tinfo->srh,
+ IPPROTO_ETHERNET,
+ cache_dst);
+ else
+ err = seg6_do_srh_encap_red(skb, tinfo->srh,
+ IPPROTO_ETHERNET,
+ cache_dst);
+
if (err)
return err;
@@ -302,13 +450,19 @@ static int seg6_do_srh(struct sk_buff *skb)
break;
}
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
nf_reset_ct(skb);
return 0;
}
+/* insert an SRH within an IPv6 packet, just after the IPv6 header */
+int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+{
+ return __seg6_do_srh_inline(skb, osrh, NULL);
+}
+EXPORT_SYMBOL_GPL(seg6_do_srh_inline);
+
static int seg6_input_finish(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
@@ -320,51 +474,63 @@ static int seg6_input_core(struct net *net, struct sock *sk,
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
+ struct lwtunnel_state *lwtst;
struct seg6_lwt *slwt;
int err;
- err = seg6_do_srh(skb);
- if (unlikely(err)) {
- kfree_skb(skb);
- return err;
- }
+ /* We cannot dereference "orig_dst" once ip6_route_input() or
+ * skb_dst_drop() is called. However, in order to detect a dst loop, we
+ * need the address of its lwtstate. So, save the address of lwtstate
+ * now and use it later as a comparison.
+ */
+ lwtst = orig_dst->lwtstate;
- slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
+ slwt = seg6_lwt_lwtunnel(lwtst);
- preempt_disable();
+ local_bh_disable();
dst = dst_cache_get(&slwt->cache);
- preempt_enable();
+ local_bh_enable();
- skb_dst_drop(skb);
+ err = seg6_do_srh(skb, dst);
+ if (unlikely(err)) {
+ dst_release(dst);
+ goto drop;
+ }
if (!dst) {
ip6_route_input(skb);
dst = skb_dst(skb);
- if (!dst->error) {
- preempt_disable();
+
+ /* cache only if we don't create a dst reference loop */
+ if (!dst->error && lwtst != dst->lwtstate) {
+ local_bh_disable();
dst_cache_set_ip6(&slwt->cache, dst,
&ipv6_hdr(skb)->saddr);
- preempt_enable();
+ local_bh_enable();
}
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst)));
+ if (unlikely(err))
+ goto drop;
} else {
+ skb_dst_drop(skb);
skb_dst_set(skb, dst);
}
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- return err;
-
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
dev_net(skb->dev), NULL, skb, NULL,
- skb_dst(skb)->dev, seg6_input_finish);
+ skb_dst_dev(skb), seg6_input_finish);
return seg6_input_finish(dev_net(skb->dev), NULL, skb);
+drop:
+ kfree_skb(skb);
+ return err;
}
static int seg6_input_nf(struct sk_buff *skb)
{
- struct net_device *dev = skb_dst(skb)->dev;
+ struct net_device *dev = skb_dst_dev(skb);
struct net *net = dev_net(skb->dev);
switch (skb->protocol) {
@@ -395,15 +561,15 @@ static int seg6_output_core(struct net *net, struct sock *sk,
struct seg6_lwt *slwt;
int err;
- err = seg6_do_srh(skb);
- if (unlikely(err))
- goto drop;
-
slwt = seg6_lwt_lwtunnel(orig_dst->lwtstate);
- preempt_disable();
+ local_bh_disable();
dst = dst_cache_get(&slwt->cache);
- preempt_enable();
+ local_bh_enable();
+
+ err = seg6_do_srh(skb, dst);
+ if (unlikely(err))
+ goto drop;
if (unlikely(!dst)) {
struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -419,35 +585,38 @@ static int seg6_output_core(struct net *net, struct sock *sk,
dst = ip6_route_output(net, NULL, &fl6);
if (dst->error) {
err = dst->error;
- dst_release(dst);
goto drop;
}
- preempt_disable();
- dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
- preempt_enable();
+ /* cache only if we don't create a dst reference loop */
+ if (orig_dst->lwtstate != dst->lwtstate) {
+ local_bh_disable();
+ dst_cache_set_ip6(&slwt->cache, dst, &fl6.saddr);
+ local_bh_enable();
+ }
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(dst_dev(dst)));
+ if (unlikely(err))
+ goto drop;
}
skb_dst_drop(skb);
skb_dst_set(skb, dst);
- err = skb_cow_head(skb, LL_RESERVED_SPACE(dst->dev));
- if (unlikely(err))
- goto drop;
-
if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
- NULL, skb_dst(skb)->dev, dst_output);
+ NULL, dst_dev(dst), dst_output);
return dst_output(net, sk, skb);
drop:
+ dst_release(dst);
kfree_skb(skb);
return err;
}
static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb)
{
- struct net_device *dev = skb_dst(skb)->dev;
+ struct net_device *dev = skb_dst_dev(skb);
switch (skb->protocol) {
case htons(ETH_P_IP):
@@ -514,6 +683,10 @@ static int seg6_build_state(struct net *net, struct nlattr *nla,
break;
case SEG6_IPTUN_MODE_L2ENCAP:
break;
+ case SEG6_IPTUN_MODE_ENCAP_RED:
+ break;
+ case SEG6_IPTUN_MODE_L2ENCAP_RED:
+ break;
default:
return -EINVAL;
}