From b439475a0dba2b50fb39a6356069969780e45fd4 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 4 Oct 2023 15:05:10 +0200 Subject: xfrm: Use the XFRM_GRO to indicate a GRO call on input This is needed to support GRO for ESP in UDP encapsulation. Signed-off-by: Steffen Klassert Co-developed-by: Antony Antony Signed-off-by: Antony Antony Reviewed-by: Eyal Birger --- net/ipv4/esp4_offload.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 10e96ed6c9e3..5b487d12d0cf 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -77,7 +77,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, /* We don't need to handle errors from xfrm_input, it does all * the error handling and frees the resources on error. */ - xfrm_input(skb, IPPROTO_ESP, spi, -2); + xfrm_input(skb, IPPROTO_ESP, spi, 0); return ERR_PTR(-EINPROGRESS); out_reset: -- cgit From 172bf009c18d3ba4b841408f864a5322989ab6c0 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 4 Oct 2023 15:05:27 +0200 Subject: xfrm: Support GRO for IPv4 ESP in UDP encapsulation This patch enables the GRO codepath for IPv4 ESP in UDP encapsulated packets. Decapsulation happens at L2 and saves a full round through the stack for each packet. This is also needed to support HW offload for ESP in UDP encapsulation. Enabling this would imporove performance for ESP in UDP datapath, i.e IPsec with NAT in between. By default GRP for ESP-in-UDP is disabled for UDP sockets. To enable this feature for an ESP socket, the following two options need to be set: 1. enable ESP-in-UDP: (this is already set by an IKE daemon). int type = UDP_ENCAP_ESPINUDP; setsockopt(fd, SOL_UDP, UDP_ENCAP, &type, sizeof(type)); 2. To enable GRO for ESP in UDP socket: type = true; setsockopt(fd, SOL_UDP, UDP_GRO, &type, sizeof(type)); Enabling ESP-in-UDP has the side effect of preventing the Linux stack from seeing ESP packets at the L3 (when ESP OFFLOAD is disabled), as packets are immediately decapsulated from UDP and decrypted. This change may affect nftable rules that match on ESP packets at L3. Also tcpdump won't see the ESP packet. Developers/admins are advised to review and adapt any nftable rules accordingly before enabling this feature to prevent potential rule breakage. Also tcpdump will not see from ESP packets from a ESP in UDP flow, when this is enabled. Signed-off-by: Steffen Klassert Co-developed-by: Antony Antony Signed-off-by: Antony Antony Reviewed-by: Eyal Birger --- net/ipv4/esp4_offload.c | 6 +++- net/ipv4/udp.c | 14 ++++++++ net/ipv4/xfrm4_input.c | 94 +++++++++++++++++++++++++++++++++++++++---------- 3 files changed, 95 insertions(+), 19 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index 5b487d12d0cf..b3271957ad9a 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -33,6 +33,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, int offset = skb_gro_offset(skb); struct xfrm_offload *xo; struct xfrm_state *x; + int encap_type = 0; __be32 seq; __be32 spi; @@ -70,6 +71,9 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, xo->flags |= XFRM_GRO; + if (NAPI_GRO_CB(skb)->proto == IPPROTO_UDP) + encap_type = UDP_ENCAP_ESPINUDP; + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL; XFRM_SPI_SKB_CB(skb)->family = AF_INET; XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); @@ -77,7 +81,7 @@ static struct sk_buff *esp4_gro_receive(struct list_head *head, /* We don't need to handle errors from xfrm_input, it does all * the error handling and frees the resources on error. */ - xfrm_input(skb, IPPROTO_ESP, spi, 0); + xfrm_input(skb, IPPROTO_ESP, spi, encap_type); return ERR_PTR(-EINPROGRESS); out_reset: diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c3ff984b6354..b8d7c5e86d0d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2625,6 +2625,17 @@ void udp_destroy_sock(struct sock *sk) } } +static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family, + struct sock *sk) +{ +#ifdef CONFIG_XFRM + if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) { + if (family == AF_INET) + WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv); + } +#endif +} + /* * Socket option code for UDP */ @@ -2674,6 +2685,8 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, case 0: #ifdef CONFIG_XFRM case UDP_ENCAP_ESPINUDP: + set_xfrm_gro_udp_encap_rcv(val, sk->sk_family, sk); + fallthrough; case UDP_ENCAP_ESPINUDP_NON_IKE: #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) @@ -2716,6 +2729,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, udp_tunnel_encap_enable(sk); udp_assign_bit(GRO_ENABLED, sk, valbool); udp_assign_bit(ACCEPT_L4, sk, valbool); + set_xfrm_gro_udp_encap_rcv(up->encap_type, sk->sk_family, sk); break; /* diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 183f6dc37242..42879c5e026a 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk, struct sk_buff *skb) @@ -72,14 +74,7 @@ int xfrm4_transport_finish(struct sk_buff *skb, int async) return 0; } -/* If it's a keepalive packet, then just eat it. - * If it's an encapsulated packet, then pass it to the - * IPsec xfrm input. - * Returns 0 if skb passed to xfrm or was dropped. - * Returns >0 if skb should be passed to UDP. - * Returns <0 if skb should be resubmitted (-ret is protocol) - */ -int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull) { struct udp_sock *up = udp_sk(sk); struct udphdr *uh; @@ -110,7 +105,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) case UDP_ENCAP_ESPINUDP: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { - goto drop; + return -EINVAL; } else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) { /* ESP Packet without Non-ESP header */ len = sizeof(struct udphdr); @@ -121,7 +116,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) case UDP_ENCAP_ESPINUDP_NON_IKE: /* Check if this is a keepalive packet. If so, eat it. */ if (len == 1 && udpdata[0] == 0xff) { - goto drop; + return -EINVAL; } else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) && udpdata32[0] == 0 && udpdata32[1] == 0) { @@ -139,7 +134,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) * protocol to ESP, and then call into the transform receiver. */ if (skb_unclone(skb, GFP_ATOMIC)) - goto drop; + return -EINVAL; /* Now we can update and verify the packet length... */ iph = ip_hdr(skb); @@ -147,25 +142,88 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) iph->tot_len = htons(ntohs(iph->tot_len) - len); if (skb->len < iphlen + len) { /* packet is too small!?! */ - goto drop; + return -EINVAL; } /* pull the data buffer up to the ESP header and set the * transport header to point to ESP. Keep UDP on the stack * for later. */ - __skb_pull(skb, len); - skb_reset_transport_header(skb); + if (pull) { + __skb_pull(skb, len); + skb_reset_transport_header(skb); + } else { + skb_set_transport_header(skb, len); + } /* process ESP */ - return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, encap_type); - -drop: - kfree_skb(skb); return 0; } EXPORT_SYMBOL(xfrm4_udp_encap_rcv); +/* If it's a keepalive packet, then just eat it. + * If it's an encapsulated packet, then pass it to the + * IPsec xfrm input. + * Returns 0 if skb passed to xfrm or was dropped. + * Returns >0 if skb should be passed to UDP. + * Returns <0 if skb should be resubmitted (-ret is protocol) + */ +int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) +{ + int ret; + + ret = __xfrm4_udp_encap_rcv(sk, skb, true); + if (!ret) + return xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, + udp_sk(sk)->encap_type); + + if (ret < 0) { + kfree_skb(skb); + return 0; + } + + return ret; +} + +struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, + struct sk_buff *skb) +{ + int offset = skb_gro_offset(skb); + const struct net_offload *ops; + struct sk_buff *pp = NULL; + int ret; + + offset = offset - sizeof(struct udphdr); + + if (!pskb_pull(skb, offset)) + return NULL; + + rcu_read_lock(); + ops = rcu_dereference(inet_offloads[IPPROTO_ESP]); + if (!ops || !ops->callbacks.gro_receive) + goto out; + + ret = __xfrm4_udp_encap_rcv(sk, skb, false); + if (ret) + goto out; + + skb_push(skb, offset); + NAPI_GRO_CB(skb)->proto = IPPROTO_UDP; + + pp = call_gro_receive(ops->callbacks.gro_receive, head, skb); + rcu_read_unlock(); + + return pp; + +out: + rcu_read_unlock(); + skb_push(skb, offset); + NAPI_GRO_CB(skb)->same_flow = 0; + NAPI_GRO_CB(skb)->flush = 1; + + return NULL; +} + int xfrm4_rcv(struct sk_buff *skb) { return xfrm4_rcv_spi(skb, ip_hdr(skb)->protocol, 0); -- cgit From 221ddb723d90907019838b89b9e9bccb11461381 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Wed, 4 Oct 2023 15:05:44 +0200 Subject: xfrm: Support GRO for IPv6 ESP in UDP encapsulation This patch enables the GRO codepath for IPv6 ESP in UDP encapsulated packets. Decapsulation happens at L2 and saves a full round through the stack for each packet. This is also needed to support HW offload for ESP in UDP encapsulation. Signed-off-by: Steffen Klassert Co-developed-by: Antony Antony Signed-off-by: Antony Antony Reviewed-by: Eyal Birger --- net/ipv4/udp.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/ipv4') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b8d7c5e86d0d..7fdc250e0679 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2632,6 +2632,8 @@ static void set_xfrm_gro_udp_encap_rcv(__u16 encap_type, unsigned short family, if (udp_test_bit(GRO_ENABLED, sk) && encap_type == UDP_ENCAP_ESPINUDP) { if (family == AF_INET) WRITE_ONCE(udp_sk(sk)->gro_receive, xfrm4_gro_udp_encap_rcv); + else if (IS_ENABLED(CONFIG_IPV6) && family == AF_INET6) + WRITE_ONCE(udp_sk(sk)->gro_receive, ipv6_stub->xfrm6_gro_udp_encap_rcv); } #endif } -- cgit From 2b1dc6285c3f6e6fcc9e25bf8cd0ca66f2443697 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 4 Oct 2023 18:09:51 +0200 Subject: xfrm: pass struct net to xfrm_decode_session wrappers Preparation patch, extra arg is not used. No functional changes intended. This is needed to replace the xfrm session decode functions with the flow dissector. skb_flow_dissect() cannot be used as-is, because it attempts to deduce the 'struct net' to use for bpf program fetch from skb->sk or skb->dev, but xfrm code path can see skbs that have neither sk or dev filled in. So either flow dissector needs to try harder, e.g. by also trying skb->dst->dev, or we have to pass the struct net explicitly. Passing the struct net doesn't look too bad to me, most places already have it available or can derive it from the output device. Reported-by: kernel test robot Link: https://lore.kernel.org/netdev/202309271628.27fd2187-oliver.sang@intel.com/ Signed-off-by: Florian Westphal Reviewed-by: Simon Horman Signed-off-by: Steffen Klassert --- net/ipv4/icmp.c | 2 +- net/ipv4/ip_vti.c | 4 ++-- net/ipv4/netfilter.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index b8607763d113..e63a3bf99617 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -517,7 +517,7 @@ static struct rtable *icmp_route_lookup(struct net *net, } else return rt; - err = xfrm_decode_session_reverse(skb_in, flowi4_to_flowi(&fl4_dec), AF_INET); + err = xfrm_decode_session_reverse(net, skb_in, flowi4_to_flowi(&fl4_dec), AF_INET); if (err) goto relookup_failed; diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index d1e7d0ceb7ed..9ab9b3ebe0cd 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -288,11 +288,11 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) switch (skb->protocol) { case htons(ETH_P_IP): memset(IPCB(skb), 0, sizeof(*IPCB(skb))); - xfrm_decode_session(skb, &fl, AF_INET); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET); break; case htons(ETH_P_IPV6): memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); - xfrm_decode_session(skb, &fl, AF_INET6); + xfrm_decode_session(dev_net(dev), skb, &fl, AF_INET6); break; default: goto tx_err; diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c index bd135165482a..591a2737808e 100644 --- a/net/ipv4/netfilter.c +++ b/net/ipv4/netfilter.c @@ -62,7 +62,7 @@ int ip_route_me_harder(struct net *net, struct sock *sk, struct sk_buff *skb, un #ifdef CONFIG_XFRM if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && - xfrm_decode_session(skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { + xfrm_decode_session(net, skb, flowi4_to_flowi(&fl4), AF_INET) == 0) { struct dst_entry *dst = skb_dst(skb); skb_dst_set(skb, NULL); dst = xfrm_lookup(net, dst, flowi4_to_flowi(&fl4), sk, 0); -- cgit From 53a5b4f2ea858482a427878bca988a7fa9b2ebdc Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 20 Oct 2023 10:05:55 +0200 Subject: xfrm Fix use after free in __xfrm6_udp_encap_rcv. A recent patch changed xfrm6_udp_encap_rcv to not free the skb itself anymore but fogot the case where xfrm4_udp_encap_rcv is called subsequently. Fix this by moving the call to xfrm4_udp_encap_rcv from __xfrm6_udp_encap_rcv to xfrm6_udp_encap_rcv. Fixes: 221ddb723d90 ("xfrm: Support GRO for IPv6 ESP in UDP encapsulation") Signed-off-by: Steffen Klassert --- net/ipv4/xfrm4_input.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 42879c5e026a..c54676998eb6 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -159,7 +159,6 @@ static int __xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb, bool pull /* process ESP */ return 0; } -EXPORT_SYMBOL(xfrm4_udp_encap_rcv); /* If it's a keepalive packet, then just eat it. * If it's an encapsulated packet, then pass it to the @@ -184,6 +183,7 @@ int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb) return ret; } +EXPORT_SYMBOL(xfrm4_udp_encap_rcv); struct sk_buff *xfrm4_gro_udp_encap_rcv(struct sock *sk, struct list_head *head, struct sk_buff *skb) @@ -223,6 +223,7 @@ out: return NULL; } +EXPORT_SYMBOL(xfrm4_gro_udp_encap_rcv); int xfrm4_rcv(struct sk_buff *skb) { -- cgit