diff options
Diffstat (limited to 'tools/testing/selftests/net/lib')
-rw-r--r-- | tools/testing/selftests/net/lib/.gitignore | 1 | ||||
-rw-r--r-- | tools/testing/selftests/net/lib/Makefile | 1 | ||||
-rw-r--r-- | tools/testing/selftests/net/lib/ksft.h | 56 | ||||
-rw-r--r-- | tools/testing/selftests/net/lib/py/__init__.py | 2 | ||||
-rw-r--r-- | tools/testing/selftests/net/lib/py/ksft.py | 31 | ||||
-rw-r--r-- | tools/testing/selftests/net/lib/py/utils.py | 39 | ||||
-rw-r--r-- | tools/testing/selftests/net/lib/py/ynl.py | 9 | ||||
-rw-r--r-- | tools/testing/selftests/net/lib/xdp_helper.c | 131 | ||||
-rw-r--r-- | tools/testing/selftests/net/lib/xdp_native.bpf.c | 621 |
9 files changed, 884 insertions, 7 deletions
diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore index 1ebc6187f421..bbc97d6bf556 100644 --- a/tools/testing/selftests/net/lib/.gitignore +++ b/tools/testing/selftests/net/lib/.gitignore @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only csum +xdp_helper diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile index c22623b9a2a5..88c4bc461459 100644 --- a/tools/testing/selftests/net/lib/Makefile +++ b/tools/testing/selftests/net/lib/Makefile @@ -10,6 +10,7 @@ TEST_FILES += ../../../../net/ynl TEST_GEN_FILES += csum TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) +TEST_GEN_FILES += xdp_helper TEST_INCLUDES := $(wildcard py/*.py sh/*.sh) diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h new file mode 100644 index 000000000000..17dc34a612c6 --- /dev/null +++ b/tools/testing/selftests/net/lib/ksft.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(__NET_KSFT_H__) +#define __NET_KSFT_H__ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +static inline void ksft_ready(void) +{ + const char msg[7] = "ready\n"; + char *env_str; + int fd; + + env_str = getenv("KSFT_READY_FD"); + if (env_str) { + fd = atoi(env_str); + if (!fd) { + fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n", + env_str); + return; + } + } else { + fd = STDOUT_FILENO; + } + + write(fd, msg, sizeof(msg)); + if (fd != STDOUT_FILENO) + close(fd); +} + +static inline void ksft_wait(void) +{ + char *env_str; + char byte; + int fd; + + env_str = getenv("KSFT_WAIT_FD"); + if (env_str) { + fd = atoi(env_str); + if (!fd) { + fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n", + env_str); + return; + } + } else { + /* Not running in KSFT env, wait for input from STDIN instead */ + fd = STDIN_FILENO; + } + + read(fd, &byte, sizeof(byte)); + if (fd != STDIN_FILENO) + close(fd); +} + +#endif diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 8697bd27dc30..02be28dcc089 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -6,4 +6,4 @@ from .netns import NetNS, NetNSEnter from .nsim import * from .utils import * from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily -from .ynl import NetshaperFamily +from .ynl import NetshaperFamily, DevlinkFamily diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index 3cfad0fd4570..8e35ed12ed9e 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -3,6 +3,7 @@ import builtins import functools import inspect +import signal import sys import time import traceback @@ -26,7 +27,12 @@ class KsftXfailEx(Exception): pass +class KsftTerminate(KeyboardInterrupt): + pass + + def ksft_pr(*objs, **kwargs): + kwargs["flush"] = True print("#", *objs, **kwargs) @@ -134,7 +140,7 @@ def ktap_result(ok, cnt=1, case="", comment=""): res += "." + str(case.__name__) if comment: res += " # " + comment - print(res) + print(res, flush=True) def ksft_flush_defer(): @@ -193,6 +199,17 @@ def ksft_setup(env): return env +def _ksft_intr(signum, frame): + # ksft runner.sh sends 2 SIGTERMs in a row on a timeout + # if we don't ignore the second one it will stop us from handling cleanup + global term_cnt + term_cnt += 1 + if term_cnt == 1: + raise KsftTerminate() + else: + ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...") + + def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases = cases or [] @@ -205,10 +222,14 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases.append(value) break + global term_cnt + term_cnt = 0 + prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr) + totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} - print("TAP version 13") - print("1.." + str(len(cases))) + print("TAP version 13", flush=True) + print("1.." + str(len(cases)), flush=True) global KSFT_RESULT cnt = 0 @@ -233,7 +254,7 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): for line in tb.strip().split('\n'): ksft_pr("Exception|", line) if stop: - ksft_pr("Stopping tests due to KeyboardInterrupt.") + ksft_pr(f"Stopping tests due to {type(e).__name__}.") KSFT_RESULT = False cnt_key = 'fail' @@ -248,6 +269,8 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): if stop: break + signal.signal(signal.SIGTERM, prev_sigterm) + print( f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0" ) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 34470d65d871..f395c90fb0f1 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -175,6 +175,10 @@ def tool(name, args, json=None, ns=None, host=None): return cmd_obj +def bpftool(args, json=None, ns=None, host=None): + return tool('bpftool', args, json=json, ns=ns, host=host) + + def ip(args, json=None, ns=None, host=None): if ns: args = f'-netns {ns} ' + args @@ -185,6 +189,41 @@ def ethtool(args, json=None, ns=None, host=None): return tool('ethtool', args, json=json, ns=ns, host=host) +def bpftrace(expr, json=None, ns=None, host=None, timeout=None): + """ + Run bpftrace and return map data (if json=True). + The output of bpftrace is inconvenient, so the helper converts + to a dict indexed by map name, e.g.: + { + "@": { ... }, + "@map2": { ... }, + } + """ + cmd_arr = ['bpftrace'] + # Throw in --quiet if json, otherwise the output has two objects + if json: + cmd_arr += ['-f', 'json', '-q'] + if timeout: + expr += ' interval:s:' + str(timeout) + ' { exit(); }' + cmd_arr += ['-e', expr] + cmd_obj = cmd(cmd_arr, ns=ns, host=host, shell=False) + if json: + # bpftrace prints objects as lines + ret = {} + for l in cmd_obj.stdout.split('\n'): + if not l.strip(): + continue + one = _json.loads(l) + if one.get('type') != 'map': + continue + for k, v in one["data"].items(): + if k.startswith('@'): + k = k.lstrip('@') + ret[k] = v + return ret + return cmd_obj + + def rand_port(type=socket.SOCK_STREAM): """ Get a random unprivileged port. diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py index 8986c584cb37..2b3a61ea3bfa 100644 --- a/tools/testing/selftests/net/lib/py/ynl.py +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -39,12 +39,12 @@ class EthtoolFamily(YnlFamily): class RtnlFamily(YnlFamily): def __init__(self, recv_size=0): - super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(), + super().__init__((SPEC_PATH / Path('rt-link.yaml')).as_posix(), schema='', recv_size=recv_size) class RtnlAddrFamily(YnlFamily): def __init__(self, recv_size=0): - super().__init__((SPEC_PATH / Path('rt_addr.yaml')).as_posix(), + super().__init__((SPEC_PATH / Path('rt-addr.yaml')).as_posix(), schema='', recv_size=recv_size) class NetdevFamily(YnlFamily): @@ -56,3 +56,8 @@ class NetshaperFamily(YnlFamily): def __init__(self, recv_size=0): super().__init__((SPEC_PATH / Path('net_shaper.yaml')).as_posix(), schema='', recv_size=recv_size) + +class DevlinkFamily(YnlFamily): + def __init__(self, recv_size=0): + super().__init__((SPEC_PATH / Path('devlink.yaml')).as_posix(), + schema='', recv_size=recv_size) diff --git a/tools/testing/selftests/net/lib/xdp_helper.c b/tools/testing/selftests/net/lib/xdp_helper.c new file mode 100644 index 000000000000..eb025a9f35b1 --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_helper.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <linux/if_xdp.h> +#include <linux/if_link.h> +#include <net/if.h> +#include <inttypes.h> + +#include "ksft.h" + +#define UMEM_SZ (1U << 16) +#define NUM_DESC (UMEM_SZ / 2048) + + +static void print_usage(const char *bin) +{ + fprintf(stderr, "Usage: %s ifindex queue_id [-z]\n\n" + "where:\n\t-z: force zerocopy mode", bin); +} + +/* this is a simple helper program that creates an XDP socket and does the + * minimum necessary to get bind() to succeed. + * + * this test program is not intended to actually process packets, but could be + * extended in the future if that is actually needed. + * + * it is used by queues.py to ensure the xsk netlinux attribute is set + * correctly. + */ +int main(int argc, char **argv) +{ + struct xdp_umem_reg umem_reg = { 0 }; + struct sockaddr_xdp sxdp = { 0 }; + int num_desc = NUM_DESC; + void *umem_area; + int retry = 0; + int ifindex; + int sock_fd; + int queue; + + if (argc != 3 && argc != 4) { + print_usage(argv[0]); + return 1; + } + + sock_fd = socket(AF_XDP, SOCK_RAW, 0); + if (sock_fd < 0) { + perror("socket creation failed"); + /* if the kernel doesn't support AF_XDP, let the test program + * know with -1. All other error paths return 1. + */ + if (errno == EAFNOSUPPORT) + return -1; + return 1; + } + + /* "Probing mode", just checking if AF_XDP sockets are supported */ + if (!strcmp(argv[1], "-") && !strcmp(argv[2], "-")) { + printf("AF_XDP support detected\n"); + close(sock_fd); + return 0; + } + + ifindex = atoi(argv[1]); + queue = atoi(argv[2]); + + umem_area = mmap(NULL, UMEM_SZ, PROT_READ | PROT_WRITE, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (umem_area == MAP_FAILED) { + perror("mmap failed"); + return 1; + } + + umem_reg.addr = (uintptr_t)umem_area; + umem_reg.len = UMEM_SZ; + umem_reg.chunk_size = 2048; + umem_reg.headroom = 0; + + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg, + sizeof(umem_reg)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_FILL_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &num_desc, + sizeof(num_desc)); + setsockopt(sock_fd, SOL_XDP, XDP_RX_RING, &num_desc, sizeof(num_desc)); + + sxdp.sxdp_family = AF_XDP; + sxdp.sxdp_ifindex = ifindex; + sxdp.sxdp_queue_id = queue; + sxdp.sxdp_flags = 0; + + if (argc > 3) { + if (!strcmp(argv[3], "-z")) { + sxdp.sxdp_flags = XDP_ZEROCOPY; + } else { + print_usage(argv[0]); + return 1; + } + } + + while (1) { + if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0) + break; + + if (errno == EBUSY && retry < 3) { + retry++; + sleep(1); + continue; + } else { + perror("bind failed"); + munmap(umem_area, UMEM_SZ); + close(sock_fd); + return 1; + } + } + + ksft_ready(); + ksft_wait(); + + /* parent program will write a byte to stdin when its ready for this + * helper to exit + */ + + close(sock_fd); + return 0; +} diff --git a/tools/testing/selftests/net/lib/xdp_native.bpf.c b/tools/testing/selftests/net/lib/xdp_native.bpf.c new file mode 100644 index 000000000000..521ba38f2ddd --- /dev/null +++ b/tools/testing/selftests/net/lib/xdp_native.bpf.c @@ -0,0 +1,621 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stddef.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +#define MAX_ADJST_OFFSET 256 +#define MAX_PAYLOAD_LEN 5000 +#define MAX_HDR_LEN 64 + +enum { + XDP_MODE = 0, + XDP_PORT = 1, + XDP_ADJST_OFFSET = 2, + XDP_ADJST_TAG = 3, +} xdp_map_setup_keys; + +enum { + XDP_MODE_PASS = 0, + XDP_MODE_DROP = 1, + XDP_MODE_TX = 2, + XDP_MODE_TAIL_ADJST = 3, + XDP_MODE_HEAD_ADJST = 4, +} xdp_map_modes; + +enum { + STATS_RX = 0, + STATS_PASS = 1, + STATS_DROP = 2, + STATS_TX = 3, + STATS_ABORT = 4, +} xdp_stats; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 5); + __type(key, __u32); + __type(value, __s32); +} map_xdp_setup SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 5); + __type(key, __u32); + __type(value, __u64); +} map_xdp_stats SEC(".maps"); + +static __u32 min(__u32 a, __u32 b) +{ + return a < b ? a : b; +} + +static void record_stats(struct xdp_md *ctx, __u32 stat_type) +{ + __u64 *count; + + count = bpf_map_lookup_elem(&map_xdp_stats, &stat_type); + + if (count) + __sync_fetch_and_add(count, 1); +} + +static struct udphdr *filter_udphdr(struct xdp_md *ctx, __u16 port) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + + if (data + sizeof(*eth) > data_end) + return NULL; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + + if (iph + 1 > (struct iphdr *)data_end || + iph->protocol != IPPROTO_UDP) + return NULL; + + udph = (void *)eth + sizeof(*iph) + sizeof(*eth); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + + if (ipv6h + 1 > (struct ipv6hdr *)data_end || + ipv6h->nexthdr != IPPROTO_UDP) + return NULL; + + udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth); + } else { + return NULL; + } + + if (udph + 1 > (struct udphdr *)data_end) + return NULL; + + if (udph->dest != bpf_htons(port)) + return NULL; + + record_stats(ctx, STATS_RX); + + return udph; +} + +static int xdp_mode_pass(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + record_stats(ctx, STATS_PASS); + + return XDP_PASS; +} + +static int xdp_mode_drop_handler(struct xdp_md *ctx, __u16 port) +{ + struct udphdr *udph = NULL; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + record_stats(ctx, STATS_DROP); + + return XDP_DROP; +} + +static void swap_machdr(void *data) +{ + struct ethhdr *eth = data; + __u8 tmp_mac[ETH_ALEN]; + + __builtin_memcpy(tmp_mac, eth->h_source, ETH_ALEN); + __builtin_memcpy(eth->h_source, eth->h_dest, ETH_ALEN); + __builtin_memcpy(eth->h_dest, tmp_mac, ETH_ALEN); +} + +static int xdp_mode_tx_handler(struct xdp_md *ctx, __u16 port) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + + if (data + sizeof(*eth) > data_end) + return XDP_PASS; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + __be32 tmp_ip = iph->saddr; + + if (iph + 1 > (struct iphdr *)data_end || + iph->protocol != IPPROTO_UDP) + return XDP_PASS; + + udph = data + sizeof(*iph) + sizeof(*eth); + + if (udph + 1 > (struct udphdr *)data_end) + return XDP_PASS; + if (udph->dest != bpf_htons(port)) + return XDP_PASS; + + record_stats(ctx, STATS_RX); + swap_machdr((void *)eth); + + iph->saddr = iph->daddr; + iph->daddr = tmp_ip; + + record_stats(ctx, STATS_TX); + + return XDP_TX; + + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + struct in6_addr tmp_ipv6; + + if (ipv6h + 1 > (struct ipv6hdr *)data_end || + ipv6h->nexthdr != IPPROTO_UDP) + return XDP_PASS; + + udph = data + sizeof(*ipv6h) + sizeof(*eth); + + if (udph + 1 > (struct udphdr *)data_end) + return XDP_PASS; + if (udph->dest != bpf_htons(port)) + return XDP_PASS; + + record_stats(ctx, STATS_RX); + swap_machdr((void *)eth); + + __builtin_memcpy(&tmp_ipv6, &ipv6h->saddr, sizeof(tmp_ipv6)); + __builtin_memcpy(&ipv6h->saddr, &ipv6h->daddr, + sizeof(tmp_ipv6)); + __builtin_memcpy(&ipv6h->daddr, &tmp_ipv6, sizeof(tmp_ipv6)); + + record_stats(ctx, STATS_TX); + + return XDP_TX; + } + + return XDP_PASS; +} + +static void *update_pkt(struct xdp_md *ctx, __s16 offset, __u32 *udp_csum) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + struct ethhdr *eth = data; + __u32 len, len_new; + + if (data + sizeof(*eth) > data_end) + return NULL; + + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + struct iphdr *iph = data + sizeof(*eth); + __u16 total_len; + + if (iph + 1 > (struct iphdr *)data_end) + return NULL; + + iph->tot_len = bpf_htons(bpf_ntohs(iph->tot_len) + offset); + + udph = (void *)eth + sizeof(*iph) + sizeof(*eth); + if (!udph || udph + 1 > (struct udphdr *)data_end) + return NULL; + + len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + } else if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + struct ipv6hdr *ipv6h = data + sizeof(*eth); + __u16 payload_len; + + if (ipv6h + 1 > (struct ipv6hdr *)data_end) + return NULL; + + udph = (void *)eth + sizeof(*ipv6h) + sizeof(*eth); + if (!udph || udph + 1 > (struct udphdr *)data_end) + return NULL; + + *udp_csum = ~((__u32)udph->check); + + len = ipv6h->payload_len; + len_new = bpf_htons(bpf_ntohs(len) + offset); + ipv6h->payload_len = len_new; + + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + + len = udph->len; + len_new = bpf_htons(bpf_ntohs(udph->len) + offset); + *udp_csum = bpf_csum_diff(&len, sizeof(len), &len_new, + sizeof(len_new), *udp_csum); + } else { + return NULL; + } + + udph->len = len_new; + + return udph; +} + +static __u16 csum_fold_helper(__u32 csum) +{ + return ~((csum & 0xffff) + (csum >> 16)) ? : 0xffff; +} + +static int xdp_adjst_tail_shrnk_data(struct xdp_md *ctx, __u16 offset, + __u32 hdr_len) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + __u32 buff_pos, udp_csum = 0; + struct udphdr *udph = NULL; + __u32 buff_len; + + udph = update_pkt(ctx, 0 - offset, &udp_csum); + if (!udph) + return -1; + + buff_len = bpf_xdp_get_buff_len(ctx); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + /* Make sure we have enough data to avoid eating the header */ + if (buff_len - offset < hdr_len) + return -1; + + buff_pos = buff_len - offset; + if (bpf_xdp_load_bytes(ctx, buff_pos, tmp_buff, offset) < 0) + return -1; + + udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (bpf_xdp_adjust_tail(ctx, 0 - offset) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_tail_grow_data(struct xdp_md *ctx, __u16 offset) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + __u32 buff_pos, udp_csum = 0; + __u32 buff_len, hdr_len, key; + struct udphdr *udph; + __s32 *val; + __u8 tag; + + /* Proceed to update the packet headers before attempting to adjuste + * the tail. Once the tail is adjusted we lose access to the offset + * amount of data at the end of the packet which is crucial to update + * the checksum. + * Since any failure beyond this would abort the packet, we should + * not worry about passing a packet up the stack with wrong headers + */ + udph = update_pkt(ctx, offset, &udp_csum); + if (!udph) + return -1; + + key = XDP_ADJST_TAG; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return -1; + + tag = (__u8)(*val); + + for (int i = 0; i < MAX_ADJST_OFFSET; i++) + __builtin_memcpy(&tmp_buff[i], &tag, 1); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + udp_csum = bpf_csum_diff(0, 0, (__be32 *)tmp_buff, offset, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + buff_len = bpf_xdp_get_buff_len(ctx); + + if (bpf_xdp_adjust_tail(ctx, offset) < 0) { + bpf_printk("Failed to adjust tail\n"); + return -1; + } + + if (bpf_xdp_store_bytes(ctx, buff_len, tmp_buff, offset) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_tail(struct xdp_md *ctx, __u16 port) +{ + void *data = (void *)(long)ctx->data; + struct udphdr *udph = NULL; + __s32 *adjust_offset, *val; + __u32 key, hdr_len; + void *offset_ptr; + __u8 tag; + int ret; + + udph = filter_udphdr(ctx, port); + if (!udph) + return XDP_PASS; + + hdr_len = (void *)udph - data + sizeof(struct udphdr); + key = XDP_ADJST_OFFSET; + adjust_offset = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!adjust_offset) + return XDP_PASS; + + if (*adjust_offset < 0) + ret = xdp_adjst_tail_shrnk_data(ctx, + (__u16)(0 - *adjust_offset), + hdr_len); + else + ret = xdp_adjst_tail_grow_data(ctx, (__u16)(*adjust_offset)); + if (ret) + goto abort_pkt; + + record_stats(ctx, STATS_PASS); + return XDP_PASS; + +abort_pkt: + record_stats(ctx, STATS_ABORT); + return XDP_ABORTED; +} + +static int xdp_adjst_head_shrnk_data(struct xdp_md *ctx, __u64 hdr_len, + __u32 offset) +{ + char tmp_buff[MAX_ADJST_OFFSET]; + struct udphdr *udph; + void *offset_ptr; + __u32 udp_csum = 0; + + /* Update the length information in the IP and UDP headers before + * adjusting the headroom. This simplifies accessing the relevant + * fields in the IP and UDP headers for fragmented packets. Any + * failure beyond this point will result in the packet being aborted, + * so we don't need to worry about incorrect length information for + * passed packets. + */ + udph = update_pkt(ctx, (__s16)(0 - offset), &udp_csum); + if (!udph) + return -1; + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + if (bpf_xdp_load_bytes(ctx, hdr_len, tmp_buff, offset) < 0) + return -1; + + udp_csum = bpf_csum_diff((__be32 *)tmp_buff, offset, 0, 0, udp_csum); + + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (bpf_xdp_load_bytes(ctx, 0, tmp_buff, MAX_ADJST_OFFSET) < 0) + return -1; + + if (bpf_xdp_adjust_head(ctx, offset) < 0) + return -1; + + if (offset > MAX_ADJST_OFFSET) + return -1; + + if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) + return -1; + + /* Added here to handle clang complain about negative value */ + hdr_len = hdr_len & 0xff; + + if (hdr_len == 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, 0, tmp_buff, hdr_len) < 0) + return -1; + + return 0; +} + +static int xdp_adjst_head_grow_data(struct xdp_md *ctx, __u64 hdr_len, + __u32 offset) +{ + char hdr_buff[MAX_HDR_LEN]; + char data_buff[MAX_ADJST_OFFSET]; + void *offset_ptr; + __s32 *val; + __u32 key; + __u8 tag; + __u32 udp_csum = 0; + struct udphdr *udph; + + udph = update_pkt(ctx, (__s16)(offset), &udp_csum); + if (!udph) + return -1; + + key = XDP_ADJST_TAG; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return -1; + + tag = (__u8)(*val); + for (int i = 0; i < MAX_ADJST_OFFSET; i++) + __builtin_memcpy(&data_buff[i], &tag, 1); + + offset = (offset & 0x1ff) >= MAX_ADJST_OFFSET ? MAX_ADJST_OFFSET : + offset & 0xff; + if (offset == 0) + return -1; + + udp_csum = bpf_csum_diff(0, 0, (__be32 *)data_buff, offset, udp_csum); + udph->check = (__u16)csum_fold_helper(udp_csum); + + if (hdr_len > MAX_ADJST_OFFSET || hdr_len == 0) + return -1; + + /* Added here to handle clang complain about negative value */ + hdr_len = hdr_len & 0xff; + + if (hdr_len == 0) + return -1; + + if (bpf_xdp_load_bytes(ctx, 0, hdr_buff, hdr_len) < 0) + return -1; + + if (offset > MAX_ADJST_OFFSET) + return -1; + + if (bpf_xdp_adjust_head(ctx, 0 - offset) < 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, 0, hdr_buff, hdr_len) < 0) + return -1; + + if (bpf_xdp_store_bytes(ctx, hdr_len, data_buff, offset) < 0) + return -1; + + return 0; +} + +static int xdp_head_adjst(struct xdp_md *ctx, __u16 port) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct udphdr *udph_ptr = NULL; + __u32 key, size, hdr_len; + __s32 *val; + int res; + + /* Filter packets based on UDP port */ + udph_ptr = filter_udphdr(ctx, port); + if (!udph_ptr) + return XDP_PASS; + + hdr_len = (void *)udph_ptr - data + sizeof(struct udphdr); + + key = XDP_ADJST_OFFSET; + val = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!val) + return XDP_PASS; + + switch (*val) { + case -16: + case 16: + size = 16; + break; + case -32: + case 32: + size = 32; + break; + case -64: + case 64: + size = 64; + break; + case -128: + case 128: + size = 128; + break; + case -256: + case 256: + size = 256; + break; + default: + bpf_printk("Invalid adjustment offset: %d\n", *val); + goto abort; + } + + if (*val < 0) + res = xdp_adjst_head_grow_data(ctx, hdr_len, size); + else + res = xdp_adjst_head_shrnk_data(ctx, hdr_len, size); + + if (res) + goto abort; + + record_stats(ctx, STATS_PASS); + return XDP_PASS; + +abort: + record_stats(ctx, STATS_ABORT); + return XDP_ABORTED; +} + +static int xdp_prog_common(struct xdp_md *ctx) +{ + __u32 key, *port; + __s32 *mode; + + key = XDP_MODE; + mode = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!mode) + return XDP_PASS; + + key = XDP_PORT; + port = bpf_map_lookup_elem(&map_xdp_setup, &key); + if (!port) + return XDP_PASS; + + switch (*mode) { + case XDP_MODE_PASS: + return xdp_mode_pass(ctx, (__u16)(*port)); + case XDP_MODE_DROP: + return xdp_mode_drop_handler(ctx, (__u16)(*port)); + case XDP_MODE_TX: + return xdp_mode_tx_handler(ctx, (__u16)(*port)); + case XDP_MODE_TAIL_ADJST: + return xdp_adjst_tail(ctx, (__u16)(*port)); + case XDP_MODE_HEAD_ADJST: + return xdp_head_adjst(ctx, (__u16)(*port)); + } + + /* Default action is to simple pass */ + return XDP_PASS; +} + +SEC("xdp") +int xdp_prog(struct xdp_md *ctx) +{ + return xdp_prog_common(ctx); +} + +SEC("xdp.frags") +int xdp_prog_frags(struct xdp_md *ctx) +{ + return xdp_prog_common(ctx); +} + +char _license[] SEC("license") = "GPL"; |