summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZiyang Xuan <william.xuanziyang@huawei.com>2023-01-13 17:24:51 +0800
committerMartin KaFai Lau <martin.lau@kernel.org>2023-01-15 12:56:17 -0800
commitd219df60a70ed0739aa5dd34b477763311fc5a7b (patch)
treed8f5455628830a487861b88051f3811e01e405a9
parent1c48391bc6739f5e3306919d4b887b92c35d5490 (diff)
bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()
Add ipip6 and ip6ip decap support for bpf_skb_adjust_room(). Main use case is for using cls_bpf on ingress hook to decapsulate IPv4 over IPv6 and IPv6 over IPv4 tunnel packets. Add two new flags BPF_F_ADJ_ROOM_DECAP_L3_IPV{4,6} to indicate the new IP header version after decapsulating the outer IP header. Suggested-by: Willem de Bruijn <willemb@google.com> Signed-off-by: Ziyang Xuan <william.xuanziyang@huawei.com> Reviewed-by: Willem de Bruijn <willemb@google.com> Link: https://lore.kernel.org/r/b268ec7f0ff9431f4f43b1b40ab856ebb28cb4e1.1673574419.git.william.xuanziyang@huawei.com Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
-rw-r--r--include/uapi/linux/bpf.h7
-rw-r--r--net/core/filter.c31
-rw-r--r--tools/include/uapi/linux/bpf.h7
3 files changed, 44 insertions, 1 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bc1a3d232ae4..adae5b168f9d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2647,6 +2647,11 @@ union bpf_attr {
* Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
* L2 type as Ethernet.
*
+ * * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
+ * **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
+ * Indicate the new IP header version after decapsulating the outer
+ * IP header. Used when the inner and outer IP versions are different.
+ *
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
@@ -5807,6 +5812,8 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
+ BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = (1ULL << 7),
+ BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = (1ULL << 8),
};
enum {
diff --git a/net/core/filter.c b/net/core/filter.c
index d9befa6ba04e..b4547a2c02f4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3381,13 +3381,17 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+#define BPF_F_ADJ_ROOM_DECAP_L3_MASK (BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | \
+ BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
+
#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
BPF_F_ADJ_ROOM_ENCAP_L2( \
- BPF_ADJ_ROOM_ENCAP_L2_MASK))
+ BPF_ADJ_ROOM_ENCAP_L2_MASK) | \
+ BPF_F_ADJ_ROOM_DECAP_L3_MASK)
static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
u64 flags)
@@ -3501,6 +3505,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
int ret;
if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_DECAP_L3_MASK |
BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
return -EINVAL;
@@ -3519,6 +3524,14 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
if (unlikely(ret < 0))
return ret;
+ /* Match skb->protocol to new outer l3 protocol */
+ if (skb->protocol == htons(ETH_P_IP) &&
+ flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
+ skb->protocol = htons(ETH_P_IPV6);
+ else if (skb->protocol == htons(ETH_P_IPV6) &&
+ flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
+ skb->protocol = htons(ETH_P_IP);
+
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -3608,6 +3621,22 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
return -ENOTSUPP;
}
+ if (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
+ if (!shrink)
+ return -EINVAL;
+
+ switch (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
+ case BPF_F_ADJ_ROOM_DECAP_L3_IPV4:
+ len_min = sizeof(struct iphdr);
+ break;
+ case BPF_F_ADJ_ROOM_DECAP_L3_IPV6:
+ len_min = sizeof(struct ipv6hdr);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
len_cur = skb->len - skb_network_offset(skb);
if ((shrink && (len_diff_abs >= len_cur ||
len_cur - len_diff_abs < len_min)) ||
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index bc1a3d232ae4..142b81bcbb2e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2647,6 +2647,11 @@ union bpf_attr {
* Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
* L2 type as Ethernet.
*
+ * * **BPF_F_ADJ_ROOM_DECAP_L3_IPV4**,
+ * **BPF_F_ADJ_ROOM_DECAP_L3_IPV6**:
+ * Indicate the new IP header version after decapsulating the outer
+ * IP header. Used when the inner and outer IP versions are different.
+ *
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
@@ -5807,6 +5812,8 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
+ BPF_F_ADJ_ROOM_DECAP_L3_IPV4 = (1ULL << 7),
+ BPF_F_ADJ_ROOM_DECAP_L3_IPV6 = (1ULL << 8),
};
enum {