diff options
Diffstat (limited to 'tools')
87 files changed, 6618 insertions, 1069 deletions
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore index 26cde83e1ca3..3e601bcfd461 100644 --- a/tools/bpf/bpftool/.gitignore +++ b/tools/bpf/bpftool/.gitignore @@ -1,10 +1,11 @@ # SPDX-License-Identifier: GPL-2.0-only *.d -/_bpftool +/bpftool-bootstrap /bpftool bpftool*.8 bpf-helpers.* FEATURE-DUMP.bpftool feature libbpf -profiler.skel.h +/*.skel.h +/vmlinux.h diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst index ce3a724f50c1..896f4c6c2870 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst @@ -36,6 +36,11 @@ DESCRIPTION otherwise list all BTF objects currently loaded on the system. + Since Linux 5.8 bpftool is able to discover information about + processes that hold open file descriptors (FDs) against BTF + objects. On such kernels bpftool will automatically emit this + information as well. + **bpftool btf dump** *BTF_SRC* Dump BTF entries from a given *BTF_SRC*. diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst index 0e43d7b06c11..38b0949a185b 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-link.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst @@ -37,6 +37,11 @@ DESCRIPTION zero or more named attributes, some of which depend on type of link. + Since Linux 5.8 bpftool is able to discover information about + processes that hold open file descriptors (FDs) against BPF + links. On such kernels bpftool will automatically emit this + information as well. + **bpftool link pin** *LINK* *FILE* Pin link *LINK* as *FILE*. @@ -82,6 +87,7 @@ EXAMPLES 10: cgroup prog 25 cgroup_id 614 attach_type egress + pids test_progs(223) **# bpftool --json --pretty link show** @@ -91,7 +97,12 @@ EXAMPLES "type": "cgroup", "prog_id": 25, "cgroup_id": 614, - "attach_type": "egress" + "attach_type": "egress", + "pids": [{ + "pid": 223, + "comm": "test_progs" + } + ] } ] diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 70c78faa47ab..41e2a74252d0 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -62,6 +62,11 @@ DESCRIPTION Output will start with map ID followed by map type and zero or more named attributes (depending on kernel version). + Since Linux 5.8 bpftool is able to discover information about + processes that hold open file descriptors (FDs) against BPF + maps. On such kernels bpftool will automatically emit this + information as well. + **bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*] Create a new map with given parameters and pin it to *bpffs* as *FILE*. @@ -180,7 +185,8 @@ EXAMPLES :: 10: hash name some_map flags 0x0 - key 4B value 8B max_entries 2048 memlock 167936B + key 4B value 8B max_entries 2048 memlock 167936B + pids systemd(1) The following three commands are equivalent: diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 2b254959d488..412ea3d9bf7f 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -75,6 +75,11 @@ DESCRIPTION program run. Activation or deactivation of the feature is performed via the **kernel.bpf_stats_enabled** sysctl knob. + Since Linux 5.8 bpftool is able to discover information about + processes that hold open file descriptors (FDs) against BPF + programs. On such kernels bpftool will automatically emit this + information as well. + **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }] Dump eBPF instructions of the programs from the kernel. By default, eBPF will be disassembled and printed to standard @@ -243,6 +248,7 @@ EXAMPLES 10: xdp name some_prog tag 005a3d2123620c8b gpl run_time_ns 81632 run_cnt 10 loaded_at 2017-09-29T20:11:00+0000 uid 0 xlated 528B jited 370B memlock 4096B map_ids 10 + pids systemd(1) **# bpftool --json --pretty prog show** @@ -262,6 +268,11 @@ EXAMPLES "bytes_jited": 370, "bytes_memlock": 4096, "map_ids": [10 + ], + "pids": [{ + "pid": 1, + "comm": "systemd" + } ] } ] diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile index 9e85f101be85..51bd520ed437 100644 --- a/tools/bpf/bpftool/Makefile +++ b/tools/bpf/bpftool/Makefile @@ -40,8 +40,9 @@ bash_compdir ?= /usr/share/bash-completion/completions CFLAGS += -O2 CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers -CFLAGS += $(filter-out -Wswitch-enum,$(EXTRA_WARNINGS)) +CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS)) CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \ + -I$(if $(OUTPUT),$(OUTPUT),.) \ -I$(srctree)/kernel/bpf/ \ -I$(srctree)/tools/include \ -I$(srctree)/tools/include/uapi \ @@ -61,9 +62,9 @@ CLANG ?= clang FEATURE_USER = .bpftool FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib libcap \ - clang-bpf-global-var + clang-bpf-co-re FEATURE_DISPLAY = libbfd disassembler-four-args zlib libcap \ - clang-bpf-global-var + clang-bpf-co-re check_feat := 1 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall @@ -116,40 +117,60 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT SRCS += $(BFD_SRCS) endif +BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstrap) + +BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o) OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o -_OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o -ifeq ($(feature-clang-bpf-global-var),1) - __OBJS = $(OBJS) -else - __OBJS = $(_OBJS) -endif +VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ + $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ + ../../../vmlinux \ + /sys/kernel/btf/vmlinux \ + /boot/vmlinux-$(shell uname -r) +VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) -$(OUTPUT)_prog.o: prog.c - $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $< +ifneq ($(VMLINUX_BTF)$(VMLINUX_H),) +ifeq ($(feature-clang-bpf-co-re),1) -$(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS) +BUILD_BPF_SKELS := 1 -skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF) +$(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP) +ifeq ($(VMLINUX_H),) + $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) btf dump file $< format c > $@ +else + $(Q)cp "$(VMLINUX_H)" $@ +endif + +$(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF) $(QUIET_CLANG)$(CLANG) \ + -I$(if $(OUTPUT),$(OUTPUT),.) \ -I$(srctree)/tools/include/uapi/ \ - -I$(LIBBPF_PATH) -I$(srctree)/tools/lib \ + -I$(LIBBPF_PATH) \ + -I$(srctree)/tools/lib \ -g -O2 -target bpf -c $< -o $@ -profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o - $(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@ +$(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP) + $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@ -$(OUTPUT)prog.o: prog.c profiler.skel.h - $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< +$(OUTPUT)prog.o: $(OUTPUT)profiler.skel.h + +$(OUTPUT)pids.o: $(OUTPUT)pid_iter.skel.h + +endif +endif + +CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS) $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< $(OUTPUT)feature.o: | zdep -$(OUTPUT)bpftool: $(__OBJS) $(LIBBPF) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS) +$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) $(LIBS) + +$(OUTPUT)bpftool: $(OBJS) $(LIBBPF) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS) $(OUTPUT)%.o: %.c $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $< @@ -157,7 +178,7 @@ $(OUTPUT)%.o: %.c clean: $(LIBBPF)-clean $(call QUIET_CLEAN, bpftool) $(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d - $(Q)$(RM) -- $(OUTPUT)_bpftool profiler.skel.h skeleton/profiler.bpf.o + $(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h $(Q)$(RM) -r -- $(OUTPUT)libbpf/ $(call QUIET_CLEAN, core-gen) $(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool @@ -192,6 +213,7 @@ FORCE: zdep: @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi +.SECONDARY: .PHONY: all FORCE clean install uninstall zdep .PHONY: doc doc-clean doc-install doc-uninstall .DEFAULT_GOAL := all diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c index faac8189b285..fc9bc7a23db6 100644 --- a/tools/bpf/bpftool/btf.c +++ b/tools/bpf/bpftool/btf.c @@ -809,6 +809,7 @@ show_btf_plain(struct bpf_btf_info *info, int fd, printf("%s%u", n++ == 0 ? " map_ids " : ",", obj->obj_id); } + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); printf("\n"); } @@ -841,6 +842,9 @@ show_btf_json(struct bpf_btf_info *info, int fd, jsonw_uint(json_wtr, obj->obj_id); } jsonw_end_array(json_wtr); /* map_ids */ + + emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */ + jsonw_end_object(json_wtr); /* btf object */ } @@ -893,6 +897,7 @@ static int do_show(int argc, char **argv) close(fd); return err; } + build_obj_refs_table(&refs_table, BPF_OBJ_BTF); if (fd >= 0) { err = show_btf(fd, &btf_prog_table, &btf_map_table); @@ -939,6 +944,7 @@ static int do_show(int argc, char **argv) exit_free: delete_btf_table(&btf_prog_table); delete_btf_table(&btf_map_table); + delete_obj_refs_table(&refs_table); return err; } diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index c47bdc65de8e..18e5604fe260 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -29,6 +29,42 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { + [BPF_CGROUP_INET_INGRESS] = "ingress", + [BPF_CGROUP_INET_EGRESS] = "egress", + [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create", + [BPF_CGROUP_SOCK_OPS] = "sock_ops", + [BPF_CGROUP_DEVICE] = "device", + [BPF_CGROUP_INET4_BIND] = "bind4", + [BPF_CGROUP_INET6_BIND] = "bind6", + [BPF_CGROUP_INET4_CONNECT] = "connect4", + [BPF_CGROUP_INET6_CONNECT] = "connect6", + [BPF_CGROUP_INET4_POST_BIND] = "post_bind4", + [BPF_CGROUP_INET6_POST_BIND] = "post_bind6", + [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4", + [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6", + [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4", + [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6", + [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4", + [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6", + [BPF_CGROUP_SYSCTL] = "sysctl", + [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4", + [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6", + [BPF_CGROUP_GETSOCKOPT] = "getsockopt", + [BPF_CGROUP_SETSOCKOPT] = "setsockopt", + + [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", + [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", + [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", + [BPF_LIRC_MODE2] = "lirc_mode2", + [BPF_FLOW_DISSECTOR] = "flow_dissector", + [BPF_TRACE_RAW_TP] = "raw_tp", + [BPF_TRACE_FENTRY] = "fentry", + [BPF_TRACE_FEXIT] = "fexit", + [BPF_MODIFY_RETURN] = "mod_ret", + [BPF_LSM_MAC] = "lsm_mac", +}; + void p_err(const char *fmt, ...) { va_list ap; @@ -581,3 +617,311 @@ print_all_levels(__maybe_unused enum libbpf_print_level level, { return vfprintf(stderr, format, args); } + +static int prog_fd_by_nametag(void *nametag, int **fds, bool tag) +{ + unsigned int id = 0; + int fd, nb_fds = 0; + void *tmp; + int err; + + while (true) { + struct bpf_prog_info info = {}; + __u32 len = sizeof(info); + + err = bpf_prog_get_next_id(id, &id); + if (err) { + if (errno != ENOENT) { + p_err("%s", strerror(errno)); + goto err_close_fds; + } + return nb_fds; + } + + fd = bpf_prog_get_fd_by_id(id); + if (fd < 0) { + p_err("can't get prog by id (%u): %s", + id, strerror(errno)); + goto err_close_fds; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + p_err("can't get prog info (%u): %s", + id, strerror(errno)); + goto err_close_fd; + } + + if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) || + (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) { + close(fd); + continue; + } + + if (nb_fds > 0) { + tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); + if (!tmp) { + p_err("failed to realloc"); + goto err_close_fd; + } + *fds = tmp; + } + (*fds)[nb_fds++] = fd; + } + +err_close_fd: + close(fd); +err_close_fds: + while (--nb_fds >= 0) + close((*fds)[nb_fds]); + return -1; +} + +int prog_parse_fds(int *argc, char ***argv, int **fds) +{ + if (is_prefix(**argv, "id")) { + unsigned int id; + char *endptr; + + NEXT_ARGP(); + + id = strtoul(**argv, &endptr, 0); + if (*endptr) { + p_err("can't parse %s as ID", **argv); + return -1; + } + NEXT_ARGP(); + + (*fds)[0] = bpf_prog_get_fd_by_id(id); + if ((*fds)[0] < 0) { + p_err("get by id (%u): %s", id, strerror(errno)); + return -1; + } + return 1; + } else if (is_prefix(**argv, "tag")) { + unsigned char tag[BPF_TAG_SIZE]; + + NEXT_ARGP(); + + if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2, + tag + 3, tag + 4, tag + 5, tag + 6, tag + 7) + != BPF_TAG_SIZE) { + p_err("can't parse tag"); + return -1; + } + NEXT_ARGP(); + + return prog_fd_by_nametag(tag, fds, true); + } else if (is_prefix(**argv, "name")) { + char *name; + + NEXT_ARGP(); + + name = **argv; + if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { + p_err("can't parse name"); + return -1; + } + NEXT_ARGP(); + + return prog_fd_by_nametag(name, fds, false); + } else if (is_prefix(**argv, "pinned")) { + char *path; + + NEXT_ARGP(); + + path = **argv; + NEXT_ARGP(); + + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG); + if ((*fds)[0] < 0) + return -1; + return 1; + } + + p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv); + return -1; +} + +int prog_parse_fd(int *argc, char ***argv) +{ + int *fds = NULL; + int nb_fds, fd; + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = prog_parse_fds(argc, argv, &fds); + if (nb_fds != 1) { + if (nb_fds > 1) { + p_err("several programs match this handle"); + while (nb_fds--) + close(fds[nb_fds]); + } + fd = -1; + goto exit_free; + } + + fd = fds[0]; +exit_free: + free(fds); + return fd; +} + +static int map_fd_by_name(char *name, int **fds) +{ + unsigned int id = 0; + int fd, nb_fds = 0; + void *tmp; + int err; + + while (true) { + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + + err = bpf_map_get_next_id(id, &id); + if (err) { + if (errno != ENOENT) { + p_err("%s", strerror(errno)); + goto err_close_fds; + } + return nb_fds; + } + + fd = bpf_map_get_fd_by_id(id); + if (fd < 0) { + p_err("can't get map by id (%u): %s", + id, strerror(errno)); + goto err_close_fds; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + p_err("can't get map info (%u): %s", + id, strerror(errno)); + goto err_close_fd; + } + + if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) { + close(fd); + continue; + } + + if (nb_fds > 0) { + tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); + if (!tmp) { + p_err("failed to realloc"); + goto err_close_fd; + } + *fds = tmp; + } + (*fds)[nb_fds++] = fd; + } + +err_close_fd: + close(fd); +err_close_fds: + while (--nb_fds >= 0) + close((*fds)[nb_fds]); + return -1; +} + +int map_parse_fds(int *argc, char ***argv, int **fds) +{ + if (is_prefix(**argv, "id")) { + unsigned int id; + char *endptr; + + NEXT_ARGP(); + + id = strtoul(**argv, &endptr, 0); + if (*endptr) { + p_err("can't parse %s as ID", **argv); + return -1; + } + NEXT_ARGP(); + + (*fds)[0] = bpf_map_get_fd_by_id(id); + if ((*fds)[0] < 0) { + p_err("get map by id (%u): %s", id, strerror(errno)); + return -1; + } + return 1; + } else if (is_prefix(**argv, "name")) { + char *name; + + NEXT_ARGP(); + + name = **argv; + if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { + p_err("can't parse name"); + return -1; + } + NEXT_ARGP(); + + return map_fd_by_name(name, fds); + } else if (is_prefix(**argv, "pinned")) { + char *path; + + NEXT_ARGP(); + + path = **argv; + NEXT_ARGP(); + + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP); + if ((*fds)[0] < 0) + return -1; + return 1; + } + + p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv); + return -1; +} + +int map_parse_fd(int *argc, char ***argv) +{ + int *fds = NULL; + int nb_fds, fd; + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = map_parse_fds(argc, argv, &fds); + if (nb_fds != 1) { + if (nb_fds > 1) { + p_err("several maps match this handle"); + while (nb_fds--) + close(fds[nb_fds]); + } + fd = -1; + goto exit_free; + } + + fd = fds[0]; +exit_free: + free(fds); + return fd; +} + +int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) +{ + int err; + int fd; + + fd = map_parse_fd(argc, argv); + if (fd < 0) + return -1; + + err = bpf_obj_get_info_by_fd(fd, info, info_len); + if (err) { + p_err("can't get map info: %s", strerror(errno)); + close(fd); + return err; + } + + return fd; +} diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c index 768bf77df886..1cd75807673e 100644 --- a/tools/bpf/bpftool/feature.c +++ b/tools/bpf/bpftool/feature.c @@ -695,7 +695,7 @@ section_program_types(bool *supported_types, const char *define_prefix, "/*** eBPF program types ***/", define_prefix); - for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) + for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++) probe_prog_type(i, supported_types, define_prefix, ifindex); print_end_section(); @@ -741,7 +741,7 @@ section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex) " %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n", define_prefix, define_prefix, define_prefix, define_prefix); - for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++) + for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++) probe_helpers_for_progtype(i, supported_types[i], define_prefix, ifindex); diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index fca57ee8fafe..326b8fdf0243 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -108,7 +108,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) if (err) return err; - if (prog_info.type < ARRAY_SIZE(prog_type_name)) + if (prog_info.type < prog_type_name_size) jsonw_string_field(json_wtr, "prog_type", prog_type_name[prog_info.type]); else @@ -143,6 +143,9 @@ static int show_link_close_json(int fd, struct bpf_link_info *info) } jsonw_end_array(json_wtr); } + + emit_obj_refs_json(&refs_table, info->id, json_wtr); + jsonw_end_object(json_wtr); return 0; @@ -184,7 +187,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) if (err) return err; - if (prog_info.type < ARRAY_SIZE(prog_type_name)) + if (prog_info.type < prog_type_name_size) printf("\n\tprog_type %s ", prog_type_name[prog_info.type]); else @@ -212,6 +215,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info) printf("\n\tpinned %s", obj->path); } } + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); printf("\n"); @@ -257,6 +261,7 @@ static int do_show(int argc, char **argv) if (show_pinned) build_pinned_obj_table(&link_table, BPF_OBJ_LINK); + build_obj_refs_table(&refs_table, BPF_OBJ_LINK); if (argc == 2) { fd = link_parse_fd(&argc, &argv); @@ -296,6 +301,8 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); + delete_obj_refs_table(&refs_table); + return errno == ENOENT ? 0 : -1; } diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 46bd716a9d86..4a191fcbeb82 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -31,6 +31,7 @@ bool relaxed_maps; struct pinned_obj_table prog_table; struct pinned_obj_table map_table; struct pinned_obj_table link_table; +struct obj_refs_table refs_table; static void __noreturn clean_and_exit(int i) { @@ -92,9 +93,16 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv, if (argc < 1 && cmds[0].func) return cmds[0].func(argc, argv); - for (i = 0; cmds[i].func; i++) - if (is_prefix(*argv, cmds[i].cmd)) + for (i = 0; cmds[i].cmd; i++) { + if (is_prefix(*argv, cmds[i].cmd)) { + if (!cmds[i].func) { + p_err("command '%s' is not supported in bootstrap mode", + cmds[i].cmd); + return -1; + } return cmds[i].func(argc - 1, argv + 1); + } + } help(argc - 1, argv + 1); diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 5cdf0bc049bd..78d34e860713 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -56,82 +56,21 @@ #define HELP_SPEC_LINK \ "LINK := { id LINK_ID | pinned FILE }" -static const char * const prog_type_name[] = { - [BPF_PROG_TYPE_UNSPEC] = "unspec", - [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", - [BPF_PROG_TYPE_KPROBE] = "kprobe", - [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", - [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", - [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", - [BPF_PROG_TYPE_XDP] = "xdp", - [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", - [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", - [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", - [BPF_PROG_TYPE_LWT_IN] = "lwt_in", - [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", - [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", - [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", - [BPF_PROG_TYPE_SK_SKB] = "sk_skb", - [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", - [BPF_PROG_TYPE_SK_MSG] = "sk_msg", - [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", - [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", - [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", - [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", - [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", - [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", - [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", - [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", - [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", - [BPF_PROG_TYPE_TRACING] = "tracing", - [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", - [BPF_PROG_TYPE_EXT] = "ext", -}; +extern const char * const prog_type_name[]; +extern const size_t prog_type_name_size; -static const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = { - [BPF_CGROUP_INET_INGRESS] = "ingress", - [BPF_CGROUP_INET_EGRESS] = "egress", - [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create", - [BPF_CGROUP_SOCK_OPS] = "sock_ops", - [BPF_CGROUP_DEVICE] = "device", - [BPF_CGROUP_INET4_BIND] = "bind4", - [BPF_CGROUP_INET6_BIND] = "bind6", - [BPF_CGROUP_INET4_CONNECT] = "connect4", - [BPF_CGROUP_INET6_CONNECT] = "connect6", - [BPF_CGROUP_INET4_POST_BIND] = "post_bind4", - [BPF_CGROUP_INET6_POST_BIND] = "post_bind6", - [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4", - [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6", - [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4", - [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6", - [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4", - [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6", - [BPF_CGROUP_SYSCTL] = "sysctl", - [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4", - [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6", - [BPF_CGROUP_GETSOCKOPT] = "getsockopt", - [BPF_CGROUP_SETSOCKOPT] = "setsockopt", - - [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser", - [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict", - [BPF_SK_MSG_VERDICT] = "sk_msg_verdict", - [BPF_LIRC_MODE2] = "lirc_mode2", - [BPF_FLOW_DISSECTOR] = "flow_dissector", - [BPF_TRACE_RAW_TP] = "raw_tp", - [BPF_TRACE_FENTRY] = "fentry", - [BPF_TRACE_FEXIT] = "fexit", - [BPF_MODIFY_RETURN] = "mod_ret", - [BPF_LSM_MAC] = "lsm_mac", -}; +extern const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE]; extern const char * const map_type_name[]; extern const size_t map_type_name_size; +/* keep in sync with the definition in skeleton/pid_iter.bpf.c */ enum bpf_obj_type { BPF_OBJ_UNKNOWN, BPF_OBJ_PROG, BPF_OBJ_MAP, BPF_OBJ_LINK, + BPF_OBJ_BTF, }; extern const char *bin_name; @@ -139,12 +78,14 @@ extern const char *bin_name; extern json_writer_t *json_wtr; extern bool json_output; extern bool show_pinned; +extern bool show_pids; extern bool block_mount; extern bool verifier_logs; extern bool relaxed_maps; extern struct pinned_obj_table prog_table; extern struct pinned_obj_table map_table; extern struct pinned_obj_table link_table; +extern struct obj_refs_table refs_table; void __printf(1, 2) p_err(const char *fmt, ...); void __printf(1, 2) p_info(const char *fmt, ...); @@ -168,12 +109,35 @@ struct pinned_obj { struct hlist_node hash; }; +struct obj_refs_table { + DECLARE_HASHTABLE(table, 16); +}; + +struct obj_ref { + int pid; + char comm[16]; +}; + +struct obj_refs { + struct hlist_node node; + __u32 id; + int ref_cnt; + struct obj_ref *refs; +}; + struct btf; struct bpf_line_info; int build_pinned_obj_table(struct pinned_obj_table *table, enum bpf_obj_type type); void delete_pinned_obj_table(struct pinned_obj_table *tab); +__weak int build_obj_refs_table(struct obj_refs_table *table, + enum bpf_obj_type type); +__weak void delete_obj_refs_table(struct obj_refs_table *table); +__weak void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, + json_writer_t *json_wtr); +__weak void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, + const char *prefix); void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode); @@ -194,23 +158,28 @@ int mount_bpffs_for_pin(const char *name); int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***)); int do_pin_fd(int fd, const char *name); -int do_prog(int argc, char **arg); -int do_map(int argc, char **arg); -int do_link(int argc, char **arg); -int do_event_pipe(int argc, char **argv); -int do_cgroup(int argc, char **arg); -int do_perf(int argc, char **arg); -int do_net(int argc, char **arg); -int do_tracelog(int argc, char **arg); -int do_feature(int argc, char **argv); -int do_btf(int argc, char **argv); +/* commands available in bootstrap mode */ int do_gen(int argc, char **argv); -int do_struct_ops(int argc, char **argv); -int do_iter(int argc, char **argv); +int do_btf(int argc, char **argv); + +/* non-bootstrap only commands */ +int do_prog(int argc, char **arg) __weak; +int do_map(int argc, char **arg) __weak; +int do_link(int argc, char **arg) __weak; +int do_event_pipe(int argc, char **argv) __weak; +int do_cgroup(int argc, char **arg) __weak; +int do_perf(int argc, char **arg) __weak; +int do_net(int argc, char **arg) __weak; +int do_tracelog(int argc, char **arg) __weak; +int do_feature(int argc, char **argv) __weak; +int do_struct_ops(int argc, char **argv) __weak; +int do_iter(int argc, char **argv) __weak; int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); +int prog_parse_fds(int *argc, char ***argv, int **fds); int map_parse_fd(int *argc, char ***argv); +int map_parse_fds(int *argc, char ***argv, int **fds); int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len); struct bpf_prog_linfo; diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index 1d3b60651078..3a27d31a1856 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -93,162 +93,6 @@ static void *alloc_value(struct bpf_map_info *info) return malloc(info->value_size); } -static int map_fd_by_name(char *name, int **fds) -{ - unsigned int id = 0; - int fd, nb_fds = 0; - void *tmp; - int err; - - while (true) { - struct bpf_map_info info = {}; - __u32 len = sizeof(info); - - err = bpf_map_get_next_id(id, &id); - if (err) { - if (errno != ENOENT) { - p_err("%s", strerror(errno)); - goto err_close_fds; - } - return nb_fds; - } - - fd = bpf_map_get_fd_by_id(id); - if (fd < 0) { - p_err("can't get map by id (%u): %s", - id, strerror(errno)); - goto err_close_fds; - } - - err = bpf_obj_get_info_by_fd(fd, &info, &len); - if (err) { - p_err("can't get map info (%u): %s", - id, strerror(errno)); - goto err_close_fd; - } - - if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) { - close(fd); - continue; - } - - if (nb_fds > 0) { - tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); - if (!tmp) { - p_err("failed to realloc"); - goto err_close_fd; - } - *fds = tmp; - } - (*fds)[nb_fds++] = fd; - } - -err_close_fd: - close(fd); -err_close_fds: - while (--nb_fds >= 0) - close((*fds)[nb_fds]); - return -1; -} - -static int map_parse_fds(int *argc, char ***argv, int **fds) -{ - if (is_prefix(**argv, "id")) { - unsigned int id; - char *endptr; - - NEXT_ARGP(); - - id = strtoul(**argv, &endptr, 0); - if (*endptr) { - p_err("can't parse %s as ID", **argv); - return -1; - } - NEXT_ARGP(); - - (*fds)[0] = bpf_map_get_fd_by_id(id); - if ((*fds)[0] < 0) { - p_err("get map by id (%u): %s", id, strerror(errno)); - return -1; - } - return 1; - } else if (is_prefix(**argv, "name")) { - char *name; - - NEXT_ARGP(); - - name = **argv; - if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { - p_err("can't parse name"); - return -1; - } - NEXT_ARGP(); - - return map_fd_by_name(name, fds); - } else if (is_prefix(**argv, "pinned")) { - char *path; - - NEXT_ARGP(); - - path = **argv; - NEXT_ARGP(); - - (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP); - if ((*fds)[0] < 0) - return -1; - return 1; - } - - p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv); - return -1; -} - -int map_parse_fd(int *argc, char ***argv) -{ - int *fds = NULL; - int nb_fds, fd; - - fds = malloc(sizeof(int)); - if (!fds) { - p_err("mem alloc failed"); - return -1; - } - nb_fds = map_parse_fds(argc, argv, &fds); - if (nb_fds != 1) { - if (nb_fds > 1) { - p_err("several maps match this handle"); - while (nb_fds--) - close(fds[nb_fds]); - } - fd = -1; - goto exit_free; - } - - fd = fds[0]; -exit_free: - free(fds); - return fd; -} - -int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) -{ - int err; - int fd; - - fd = map_parse_fd(argc, argv); - if (fd < 0) - return -1; - - err = bpf_obj_get_info_by_fd(fd, info, info_len); - if (err) { - p_err("can't get map info: %s", strerror(errno)); - close(fd); - return err; - } - - return fd; -} - static int do_dump_btf(const struct btf_dumper *d, struct bpf_map_info *map_info, void *key, void *value) @@ -629,7 +473,7 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) if (owner_prog_type) { unsigned int prog_type = atoi(owner_prog_type); - if (prog_type < ARRAY_SIZE(prog_type_name)) + if (prog_type < prog_type_name_size) jsonw_string_field(json_wtr, "owner_prog_type", prog_type_name[prog_type]); else @@ -666,6 +510,8 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_end_array(json_wtr); } + emit_obj_refs_json(&refs_table, info->id, json_wtr); + jsonw_end_object(json_wtr); return 0; @@ -712,7 +558,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) if (owner_prog_type) { unsigned int prog_type = atoi(owner_prog_type); - if (prog_type < ARRAY_SIZE(prog_type_name)) + if (prog_type < prog_type_name_size) printf("owner_prog_type %s ", prog_type_name[prog_type]); else @@ -753,6 +599,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) if (frozen) printf("%sfrozen", info->btf_id ? " " : ""); + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + printf("\n"); return 0; } @@ -811,6 +659,7 @@ static int do_show(int argc, char **argv) if (show_pinned) build_pinned_obj_table(&map_table, BPF_OBJ_MAP); + build_obj_refs_table(&refs_table, BPF_OBJ_MAP); if (argc == 2) return do_show_subset(argc, argv); @@ -854,6 +703,8 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); + delete_obj_refs_table(&refs_table); + return errno == ENOENT ? 0 : -1; } diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c new file mode 100644 index 000000000000..7d5416667c85 --- /dev/null +++ b/tools/bpf/bpftool/pids.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2020 Facebook */ +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <bpf/bpf.h> + +#include "main.h" +#include "skeleton/pid_iter.h" + +#ifdef BPFTOOL_WITHOUT_SKELETONS + +int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) +{ + p_err("bpftool built without PID iterator support"); + return -ENOTSUP; +} +void delete_obj_refs_table(struct obj_refs_table *table) {} +void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) {} + +#else /* BPFTOOL_WITHOUT_SKELETONS */ + +#include "pid_iter.skel.h" + +static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e) +{ + struct obj_refs *refs; + struct obj_ref *ref; + void *tmp; + int i; + + hash_for_each_possible(table->table, refs, node, e->id) { + if (refs->id != e->id) + continue; + + for (i = 0; i < refs->ref_cnt; i++) { + if (refs->refs[i].pid == e->pid) + return; + } + + tmp = realloc(refs->refs, (refs->ref_cnt + 1) * sizeof(*ref)); + if (!tmp) { + p_err("failed to re-alloc memory for ID %u, PID %d, COMM %s...", + e->id, e->pid, e->comm); + return; + } + refs->refs = tmp; + ref = &refs->refs[refs->ref_cnt]; + ref->pid = e->pid; + memcpy(ref->comm, e->comm, sizeof(ref->comm)); + refs->ref_cnt++; + + return; + } + + /* new ref */ + refs = calloc(1, sizeof(*refs)); + if (!refs) { + p_err("failed to alloc memory for ID %u, PID %d, COMM %s...", + e->id, e->pid, e->comm); + return; + } + + refs->id = e->id; + refs->refs = malloc(sizeof(*refs->refs)); + if (!refs->refs) { + free(refs); + p_err("failed to alloc memory for ID %u, PID %d, COMM %s...", + e->id, e->pid, e->comm); + return; + } + ref = &refs->refs[0]; + ref->pid = e->pid; + memcpy(ref->comm, e->comm, sizeof(ref->comm)); + refs->ref_cnt = 1; + hash_add(table->table, &refs->node, e->id); +} + +static int __printf(2, 0) +libbpf_print_none(__maybe_unused enum libbpf_print_level level, + __maybe_unused const char *format, + __maybe_unused va_list args) +{ + return 0; +} + +int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) +{ + char buf[4096]; + struct pid_iter_bpf *skel; + struct pid_iter_entry *e; + int err, ret, fd = -1, i; + libbpf_print_fn_t default_print; + + hash_init(table->table); + set_max_rlimit(); + + skel = pid_iter_bpf__open(); + if (!skel) { + p_err("failed to open PID iterator skeleton"); + return -1; + } + + skel->rodata->obj_type = type; + + /* we don't want output polluted with libbpf errors if bpf_iter is not + * supported + */ + default_print = libbpf_set_print(libbpf_print_none); + err = pid_iter_bpf__load(skel); + libbpf_set_print(default_print); + if (err) { + /* too bad, kernel doesn't support BPF iterators yet */ + err = 0; + goto out; + } + err = pid_iter_bpf__attach(skel); + if (err) { + /* if we loaded above successfully, attach has to succeed */ + p_err("failed to attach PID iterator: %d", err); + goto out; + } + + fd = bpf_iter_create(bpf_link__fd(skel->links.iter)); + if (fd < 0) { + err = -errno; + p_err("failed to create PID iterator session: %d", err); + goto out; + } + + while (true) { + ret = read(fd, buf, sizeof(buf)); + if (ret < 0) { + err = -errno; + p_err("failed to read PID iterator output: %d", err); + goto out; + } + if (ret == 0) + break; + if (ret % sizeof(*e)) { + err = -EINVAL; + p_err("invalid PID iterator output format"); + goto out; + } + ret /= sizeof(*e); + + e = (void *)buf; + for (i = 0; i < ret; i++, e++) { + add_ref(table, e); + } + } + err = 0; +out: + if (fd >= 0) + close(fd); + pid_iter_bpf__destroy(skel); + return err; +} + +void delete_obj_refs_table(struct obj_refs_table *table) +{ + struct obj_refs *refs; + struct hlist_node *tmp; + unsigned int bkt; + + hash_for_each_safe(table->table, bkt, tmp, refs, node) { + hash_del(&refs->node); + free(refs->refs); + free(refs); + } +} + +void emit_obj_refs_json(struct obj_refs_table *table, __u32 id, + json_writer_t *json_writer) +{ + struct obj_refs *refs; + struct obj_ref *ref; + int i; + + if (hash_empty(table->table)) + return; + + hash_for_each_possible(table->table, refs, node, id) { + if (refs->id != id) + continue; + if (refs->ref_cnt == 0) + break; + + jsonw_name(json_writer, "pids"); + jsonw_start_array(json_writer); + for (i = 0; i < refs->ref_cnt; i++) { + ref = &refs->refs[i]; + jsonw_start_object(json_writer); + jsonw_int_field(json_writer, "pid", ref->pid); + jsonw_string_field(json_writer, "comm", ref->comm); + jsonw_end_object(json_writer); + } + jsonw_end_array(json_writer); + break; + } +} + +void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) +{ + struct obj_refs *refs; + struct obj_ref *ref; + int i; + + if (hash_empty(table->table)) + return; + + hash_for_each_possible(table->table, refs, node, id) { + if (refs->id != id) + continue; + if (refs->ref_cnt == 0) + break; + + printf("%s", prefix); + for (i = 0; i < refs->ref_cnt; i++) { + ref = &refs->refs[i]; + printf("%s%s(%d)", i == 0 ? "" : ", ", ref->comm, ref->pid); + } + break; + } +} + + +#endif diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index a5eff83496f2..6863c57effd0 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -29,6 +29,40 @@ #include "main.h" #include "xlated_dumper.h" +const char * const prog_type_name[] = { + [BPF_PROG_TYPE_UNSPEC] = "unspec", + [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter", + [BPF_PROG_TYPE_KPROBE] = "kprobe", + [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls", + [BPF_PROG_TYPE_SCHED_ACT] = "sched_act", + [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint", + [BPF_PROG_TYPE_XDP] = "xdp", + [BPF_PROG_TYPE_PERF_EVENT] = "perf_event", + [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb", + [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock", + [BPF_PROG_TYPE_LWT_IN] = "lwt_in", + [BPF_PROG_TYPE_LWT_OUT] = "lwt_out", + [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit", + [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops", + [BPF_PROG_TYPE_SK_SKB] = "sk_skb", + [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device", + [BPF_PROG_TYPE_SK_MSG] = "sk_msg", + [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint", + [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr", + [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local", + [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2", + [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport", + [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector", + [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl", + [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable", + [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt", + [BPF_PROG_TYPE_TRACING] = "tracing", + [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops", + [BPF_PROG_TYPE_EXT] = "ext", +}; + +const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name); + enum dump_mode { DUMP_JITED, DUMP_XLATED, @@ -86,158 +120,6 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) strftime(buf, size, "%FT%T%z", &load_tm); } -static int prog_fd_by_nametag(void *nametag, int **fds, bool tag) -{ - unsigned int id = 0; - int fd, nb_fds = 0; - void *tmp; - int err; - - while (true) { - struct bpf_prog_info info = {}; - __u32 len = sizeof(info); - - err = bpf_prog_get_next_id(id, &id); - if (err) { - if (errno != ENOENT) { - p_err("%s", strerror(errno)); - goto err_close_fds; - } - return nb_fds; - } - - fd = bpf_prog_get_fd_by_id(id); - if (fd < 0) { - p_err("can't get prog by id (%u): %s", - id, strerror(errno)); - goto err_close_fds; - } - - err = bpf_obj_get_info_by_fd(fd, &info, &len); - if (err) { - p_err("can't get prog info (%u): %s", - id, strerror(errno)); - goto err_close_fd; - } - - if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) || - (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) { - close(fd); - continue; - } - - if (nb_fds > 0) { - tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); - if (!tmp) { - p_err("failed to realloc"); - goto err_close_fd; - } - *fds = tmp; - } - (*fds)[nb_fds++] = fd; - } - -err_close_fd: - close(fd); -err_close_fds: - while (--nb_fds >= 0) - close((*fds)[nb_fds]); - return -1; -} - -static int prog_parse_fds(int *argc, char ***argv, int **fds) -{ - if (is_prefix(**argv, "id")) { - unsigned int id; - char *endptr; - - NEXT_ARGP(); - - id = strtoul(**argv, &endptr, 0); - if (*endptr) { - p_err("can't parse %s as ID", **argv); - return -1; - } - NEXT_ARGP(); - - (*fds)[0] = bpf_prog_get_fd_by_id(id); - if ((*fds)[0] < 0) { - p_err("get by id (%u): %s", id, strerror(errno)); - return -1; - } - return 1; - } else if (is_prefix(**argv, "tag")) { - unsigned char tag[BPF_TAG_SIZE]; - - NEXT_ARGP(); - - if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2, - tag + 3, tag + 4, tag + 5, tag + 6, tag + 7) - != BPF_TAG_SIZE) { - p_err("can't parse tag"); - return -1; - } - NEXT_ARGP(); - - return prog_fd_by_nametag(tag, fds, true); - } else if (is_prefix(**argv, "name")) { - char *name; - - NEXT_ARGP(); - - name = **argv; - if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { - p_err("can't parse name"); - return -1; - } - NEXT_ARGP(); - - return prog_fd_by_nametag(name, fds, false); - } else if (is_prefix(**argv, "pinned")) { - char *path; - - NEXT_ARGP(); - - path = **argv; - NEXT_ARGP(); - - (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG); - if ((*fds)[0] < 0) - return -1; - return 1; - } - - p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv); - return -1; -} - -int prog_parse_fd(int *argc, char ***argv) -{ - int *fds = NULL; - int nb_fds, fd; - - fds = malloc(sizeof(int)); - if (!fds) { - p_err("mem alloc failed"); - return -1; - } - nb_fds = prog_parse_fds(argc, argv, &fds); - if (nb_fds != 1) { - if (nb_fds > 1) { - p_err("several programs match this handle"); - while (nb_fds--) - close(fds[nb_fds]); - } - fd = -1; - goto exit_free; - } - - fd = fds[0]; -exit_free: - free(fds); - return fd; -} - static void show_prog_maps(int fd, __u32 num_maps) { struct bpf_prog_info info = {}; @@ -342,6 +224,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) jsonw_end_array(json_wtr); } + emit_obj_refs_json(&refs_table, info->id, json_wtr); + jsonw_end_object(json_wtr); } @@ -408,6 +292,8 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) if (info->btf_id) printf("\n\tbtf_id %d", info->btf_id); + emit_obj_refs_plain(&refs_table, info->id, "\n\tpids "); + printf("\n"); } @@ -473,6 +359,7 @@ static int do_show(int argc, char **argv) if (show_pinned) build_pinned_obj_table(&prog_table, BPF_OBJ_PROG); + build_obj_refs_table(&refs_table, BPF_OBJ_PROG); if (argc == 2) return do_show_subset(argc, argv); @@ -514,6 +401,8 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); + delete_obj_refs_table(&refs_table); + return err; } diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c new file mode 100644 index 000000000000..8468a608911e --- /dev/null +++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (c) 2020 Facebook */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_tracing.h> +#include "pid_iter.h" + +/* keep in sync with the definition in main.h */ +enum bpf_obj_type { + BPF_OBJ_UNKNOWN, + BPF_OBJ_PROG, + BPF_OBJ_MAP, + BPF_OBJ_LINK, + BPF_OBJ_BTF, +}; + +extern const void bpf_link_fops __ksym; +extern const void bpf_map_fops __ksym; +extern const void bpf_prog_fops __ksym; +extern const void btf_fops __ksym; + +const volatile enum bpf_obj_type obj_type = BPF_OBJ_UNKNOWN; + +static __always_inline __u32 get_obj_id(void *ent, enum bpf_obj_type type) +{ + switch (type) { + case BPF_OBJ_PROG: + return BPF_CORE_READ((struct bpf_prog *)ent, aux, id); + case BPF_OBJ_MAP: + return BPF_CORE_READ((struct bpf_map *)ent, id); + case BPF_OBJ_BTF: + return BPF_CORE_READ((struct btf *)ent, id); + case BPF_OBJ_LINK: + return BPF_CORE_READ((struct bpf_link *)ent, id); + default: + return 0; + } +} + +SEC("iter/task_file") +int iter(struct bpf_iter__task_file *ctx) +{ + struct file *file = ctx->file; + struct task_struct *task = ctx->task; + struct pid_iter_entry e; + const void *fops; + + if (!file || !task) + return 0; + + switch (obj_type) { + case BPF_OBJ_PROG: + fops = &bpf_prog_fops; + break; + case BPF_OBJ_MAP: + fops = &bpf_map_fops; + break; + case BPF_OBJ_BTF: + fops = &btf_fops; + break; + case BPF_OBJ_LINK: + fops = &bpf_link_fops; + break; + default: + return 0; + } + + if (file->f_op != fops) + return 0; + + e.pid = task->tgid; + e.id = get_obj_id(file->private_data, obj_type); + bpf_probe_read(&e.comm, sizeof(e.comm), task->group_leader->comm); + bpf_seq_write(ctx->meta->seq, &e, sizeof(e)); + + return 0; +} + +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/bpf/bpftool/skeleton/pid_iter.h b/tools/bpf/bpftool/skeleton/pid_iter.h new file mode 100644 index 000000000000..5692cf257adb --- /dev/null +++ b/tools/bpf/bpftool/skeleton/pid_iter.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ +/* Copyright (c) 2020 Facebook */ +#ifndef __PID_ITER_H +#define __PID_ITER_H + +struct pid_iter_entry { + __u32 id; + int pid; + char comm[16]; +}; + +#endif diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c index 20034c12f7c5..4e3512f700c0 100644 --- a/tools/bpf/bpftool/skeleton/profiler.bpf.c +++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c @@ -1,7 +1,6 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) // Copyright (c) 2020 Facebook -#include "profiler.h" -#include <linux/bpf.h> +#include <vmlinux.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> @@ -116,4 +115,4 @@ int BPF_PROG(fexit_XXX) return 0; } -char LICENSE[] SEC("license") = "GPL"; +char LICENSE[] SEC("license") = "Dual BSD/GPL"; diff --git a/tools/bpf/bpftool/skeleton/profiler.h b/tools/bpf/bpftool/skeleton/profiler.h deleted file mode 100644 index 1f767e9510f7..000000000000 --- a/tools/bpf/bpftool/skeleton/profiler.h +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#ifndef __PROFILER_H -#define __PROFILER_H - -/* useful typedefs from vmlinux.h */ - -typedef signed char __s8; -typedef unsigned char __u8; -typedef short int __s16; -typedef short unsigned int __u16; -typedef int __s32; -typedef unsigned int __u32; -typedef long long int __s64; -typedef long long unsigned int __u64; - -typedef __s8 s8; -typedef __u8 u8; -typedef __s16 s16; -typedef __u16 u16; -typedef __s32 s32; -typedef __u32 u32; -typedef __s64 s64; -typedef __u64 u64; - -enum { - false = 0, - true = 1, -}; - -#ifdef __CHECKER__ -#define __bitwise__ __attribute__((bitwise)) -#else -#define __bitwise__ -#endif - -typedef __u16 __bitwise__ __le16; -typedef __u16 __bitwise__ __be16; -typedef __u32 __bitwise__ __le32; -typedef __u32 __bitwise__ __be32; -typedef __u64 __bitwise__ __le64; -typedef __u64 __bitwise__ __be64; - -typedef __u16 __bitwise__ __sum16; -typedef __u32 __bitwise__ __wsum; - -#endif /* __PROFILER_H */ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index b1f0321180f5..88371f7f0369 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -68,7 +68,7 @@ FILES= \ test-llvm-version.bin \ test-libaio.bin \ test-libzstd.bin \ - test-clang-bpf-global-var.bin \ + test-clang-bpf-co-re.bin \ test-file-handle.bin \ test-libpfm4.bin @@ -325,7 +325,7 @@ $(OUTPUT)test-libaio.bin: $(OUTPUT)test-libzstd.bin: $(BUILD) -lzstd -$(OUTPUT)test-clang-bpf-global-var.bin: +$(OUTPUT)test-clang-bpf-co-re.bin: $(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \ grep BTF_KIND_VAR diff --git a/tools/build/feature/test-clang-bpf-co-re.c b/tools/build/feature/test-clang-bpf-co-re.c new file mode 100644 index 000000000000..cb5265bfdd83 --- /dev/null +++ b/tools/build/feature/test-clang-bpf-co-re.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +struct test { + int a; + int b; +} __attribute__((preserve_access_index)); + +volatile struct test global_value_for_test = {}; diff --git a/tools/build/feature/test-clang-bpf-global-var.c b/tools/build/feature/test-clang-bpf-global-var.c deleted file mode 100644 index 221f1481d52e..000000000000 --- a/tools/build/feature/test-clang-bpf-global-var.c +++ /dev/null @@ -1,4 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (c) 2020 Facebook - -volatile int global_value_for_test = 1; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 8bd33050b7bb..ea382e47e400 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -653,7 +653,7 @@ union bpf_attr { * Map value associated to *key*, or **NULL** if no entry was * found. * - * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) + * long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags) * Description * Add or update the value of the entry associated to *key* in * *map* with *value*. *flags* is one of: @@ -671,13 +671,13 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_delete_elem(struct bpf_map *map, const void *key) + * long bpf_map_delete_elem(struct bpf_map *map, const void *key) * Description * Delete entry with *key* from *map*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr) * Description * For tracing programs, safely attempt to read *size* bytes from * kernel space address *unsafe_ptr* and store the data in *dst*. @@ -695,7 +695,7 @@ union bpf_attr { * Return * Current *ktime*. * - * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...) + * long bpf_trace_printk(const char *fmt, u32 fmt_size, ...) * Description * This helper is a "printk()-like" facility for debugging. It * prints a message defined by format *fmt* (of size *fmt_size*) @@ -775,7 +775,7 @@ union bpf_attr { * Return * The SMP id of the processor running the program. * - * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) + * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags) * Description * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. *flags* are a combination of @@ -792,7 +792,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) + * long bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size) * Description * Recompute the layer 3 (e.g. IP) checksum for the packet * associated to *skb*. Computation is incremental, so the helper @@ -817,7 +817,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) + * long bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags) * Description * Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the * packet associated to *skb*. Computation is incremental, so the @@ -849,7 +849,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) + * long bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index) * Description * This special helper is used to trigger a "tail call", or in * other words, to jump into another eBPF program. The same stack @@ -880,7 +880,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) + * long bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags) * Description * Clone and redirect the packet associated to *skb* to another * net device of index *ifindex*. Both ingress and egress @@ -916,7 +916,7 @@ union bpf_attr { * A 64-bit integer containing the current GID and UID, and * created as such: *current_gid* **<< 32 \|** *current_uid*. * - * int bpf_get_current_comm(void *buf, u32 size_of_buf) + * long bpf_get_current_comm(void *buf, u32 size_of_buf) * Description * Copy the **comm** attribute of the current task into *buf* of * *size_of_buf*. The **comm** attribute contains the name of @@ -953,7 +953,7 @@ union bpf_attr { * Return * The classid, or 0 for the default unconfigured classid. * - * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) + * long bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci) * Description * Push a *vlan_tci* (VLAN tag control information) of protocol * *vlan_proto* to the packet associated to *skb*, then update @@ -969,7 +969,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_vlan_pop(struct sk_buff *skb) + * long bpf_skb_vlan_pop(struct sk_buff *skb) * Description * Pop a VLAN header from the packet associated to *skb*. * @@ -981,7 +981,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * long bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) * Description * Get tunnel metadata. This helper takes a pointer *key* to an * empty **struct bpf_tunnel_key** of **size**, that will be @@ -1032,7 +1032,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) + * long bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags) * Description * Populate tunnel metadata for packet associated to *skb.* The * tunnel metadata is set to the contents of *key*, of *size*. The @@ -1098,7 +1098,7 @@ union bpf_attr { * The value of the perf event counter read from the map, or a * negative error code in case of failure. * - * int bpf_redirect(u32 ifindex, u64 flags) + * long bpf_redirect(u32 ifindex, u64 flags) * Description * Redirect the packet to another net device of index *ifindex*. * This helper is somewhat similar to **bpf_clone_redirect**\ @@ -1145,7 +1145,7 @@ union bpf_attr { * The realm of the route for the packet associated to *skb*, or 0 * if none was found. * - * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -1190,7 +1190,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) + * long bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len) * Description * This helper was provided as an easy way to load data from a * packet. It can be used to load *len* bytes from *offset* from @@ -1207,7 +1207,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) + * long bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags) * Description * Walk a user or a kernel stack and return its id. To achieve * this, the helper needs *ctx*, which is a pointer to the context @@ -1276,7 +1276,7 @@ union bpf_attr { * The checksum result, or a negative error code in case of * failure. * - * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * long bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Retrieve tunnel options metadata for the packet associated to * *skb*, and store the raw tunnel option data to the buffer *opt* @@ -1294,7 +1294,7 @@ union bpf_attr { * Return * The size of the option data retrieved. * - * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) + * long bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size) * Description * Set tunnel options metadata for the packet associated to *skb* * to the option data contained in the raw buffer *opt* of *size*. @@ -1304,7 +1304,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) + * long bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags) * Description * Change the protocol of the *skb* to *proto*. Currently * supported are transition from IPv4 to IPv6, and from IPv6 to @@ -1331,7 +1331,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_change_type(struct sk_buff *skb, u32 type) + * long bpf_skb_change_type(struct sk_buff *skb, u32 type) * Description * Change the packet type for the packet associated to *skb*. This * comes down to setting *skb*\ **->pkt_type** to *type*, except @@ -1358,7 +1358,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) + * long bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index) * Description * Check whether *skb* is a descendant of the cgroup2 held by * *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*. @@ -1389,7 +1389,7 @@ union bpf_attr { * Return * A pointer to the current task struct. * - * int bpf_probe_write_user(void *dst, const void *src, u32 len) + * long bpf_probe_write_user(void *dst, const void *src, u32 len) * Description * Attempt in a safe way to write *len* bytes from the buffer * *src* to *dst* in memory. It only works for threads that are in @@ -1408,7 +1408,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) + * long bpf_current_task_under_cgroup(struct bpf_map *map, u32 index) * Description * Check whether the probe is being run is the context of a given * subset of the cgroup2 hierarchy. The cgroup2 to test is held by @@ -1420,7 +1420,7 @@ union bpf_attr { * * 1, if the *skb* task does not belong to the cgroup2. * * A negative error code, if an error occurred. * - * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) + * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags) * Description * Resize (trim or grow) the packet associated to *skb* to the * new *len*. The *flags* are reserved for future usage, and must @@ -1444,7 +1444,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_pull_data(struct sk_buff *skb, u32 len) + * long bpf_skb_pull_data(struct sk_buff *skb, u32 len) * Description * Pull in non-linear data in case the *skb* is non-linear and not * all of *len* are part of the linear section. Make *len* bytes @@ -1500,7 +1500,7 @@ union bpf_attr { * recalculation the next time the kernel tries to access this * hash or when the **bpf_get_hash_recalc**\ () helper is called. * - * int bpf_get_numa_node_id(void) + * long bpf_get_numa_node_id(void) * Description * Return the id of the current NUMA node. The primary use case * for this helper is the selection of sockets for the local NUMA @@ -1511,7 +1511,7 @@ union bpf_attr { * Return * The id of current NUMA node. * - * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) + * long bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags) * Description * Grows headroom of packet associated to *skb* and adjusts the * offset of the MAC header accordingly, adding *len* bytes of @@ -1532,7 +1532,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that * it is possible to use a negative value for *delta*. This helper @@ -1547,7 +1547,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address * *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for @@ -1595,14 +1595,14 @@ union bpf_attr { * is returned (note that **overflowuid** might also be the actual * UID value for the socket). * - * u32 bpf_set_hash(struct sk_buff *skb, u32 hash) + * long bpf_set_hash(struct sk_buff *skb, u32 hash) * Description * Set the full hash for *skb* (set the field *skb*\ **->hash**) * to value *hash*. * Return * 0 * - * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) + * long bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **setsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1621,16 +1621,19 @@ union bpf_attr { * * * **SOL_SOCKET**, which supports the following *optname*\ s: * **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**, - * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**. + * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**, + * **SO_BINDTODEVICE**, **SO_KEEPALIVE**. * * **IPPROTO_TCP**, which supports the following *optname*\ s: * **TCP_CONGESTION**, **TCP_BPF_IW**, - * **TCP_BPF_SNDCWND_CLAMP**. + * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**, + * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**, + * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**. * * **IPPROTO_IP**, which supports *optname* **IP_TOS**. * * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) + * long bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags) * Description * Grow or shrink the room for data in the packet associated to * *skb* by *len_diff*, and according to the selected *mode*. @@ -1676,7 +1679,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) + * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the endpoint referenced by *map* at * index *key*. Depending on its type, this *map* can contain @@ -1697,7 +1700,7 @@ union bpf_attr { * **XDP_REDIRECT** on success, or the value of the two lower bits * of the *flags* argument on error. * - * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) + * long bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags) * Description * Redirect the packet to the socket referenced by *map* (of type * **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and @@ -1708,7 +1711,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * long bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a *map* referencing sockets. The * *skops* is used as a new value for the entry associated to @@ -1727,7 +1730,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta) * Description * Adjust the address pointed by *xdp_md*\ **->data_meta** by * *delta* (which can be positive or negative). Note that this @@ -1756,7 +1759,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) + * long bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size) * Description * Read the value of a perf event counter, and store it into *buf* * of size *buf_size*. This helper relies on a *map* of type @@ -1806,7 +1809,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) + * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size) * Description * For en eBPF program attached to a perf event, retrieve the * value of the event counter associated to *ctx* and store it in @@ -1817,7 +1820,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) + * long bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **getsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1842,7 +1845,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_override_return(struct pt_regs *regs, u64 rc) + * long bpf_override_return(struct pt_regs *regs, u64 rc) * Description * Used for error injection, this helper uses kprobes to override * the return value of the probed function, and to set it to *rc*. @@ -1867,7 +1870,7 @@ union bpf_attr { * Return * 0 * - * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) + * long bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval) * Description * Attempt to set the value of the **bpf_sock_ops_cb_flags** field * for the full TCP socket associated to *bpf_sock_ops* to @@ -1911,7 +1914,7 @@ union bpf_attr { * be set is returned (which comes down to 0 if all bits were set * as required). * - * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) + * long bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags) * Description * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if @@ -1925,7 +1928,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) + * long bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes) * Description * For socket policies, apply the verdict of the eBPF program to * the next *bytes* (number of bytes) of message *msg*. @@ -1959,7 +1962,7 @@ union bpf_attr { * Return * 0 * - * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) + * long bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes) * Description * For socket policies, prevent the execution of the verdict eBPF * program for message *msg* until *bytes* (byte number) have been @@ -1977,7 +1980,7 @@ union bpf_attr { * Return * 0 * - * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) + * long bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags) * Description * For socket policies, pull in non-linear data from user space * for *msg* and set pointers *msg*\ **->data** and *msg*\ @@ -2008,7 +2011,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) + * long bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len) * Description * Bind the socket associated to *ctx* to the address pointed by * *addr*, of length *addr_len*. This allows for making outgoing @@ -2026,7 +2029,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) + * long bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta) * Description * Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is * possible to both shrink and grow the packet tail. @@ -2040,7 +2043,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) + * long bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags) * Description * Retrieve the XFRM state (IP transform framework, see also * **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*. @@ -2056,7 +2059,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) + * long bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags) * Description * Return a user or a kernel stack in bpf program provided buffer. * To achieve this, the helper needs *ctx*, which is a pointer @@ -2089,7 +2092,7 @@ union bpf_attr { * A non-negative value equal to or less than *size* on success, * or a negative error in case of failure. * - * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) + * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header) * Description * This helper is similar to **bpf_skb_load_bytes**\ () in that * it provides an easy way to load *len* bytes from *offset* @@ -2111,7 +2114,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) + * long bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags) * Description * Do FIB lookup in kernel tables using parameters in *params*. * If lookup is successful and result shows packet is to be @@ -2142,7 +2145,7 @@ union bpf_attr { * * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the * packet is not forwarded or needs assist from full stack * - * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) + * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags) * Description * Add an entry to, or update a sockhash *map* referencing sockets. * The *skops* is used as a new value for the entry associated to @@ -2161,7 +2164,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) + * long bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags) * Description * This helper is used in programs implementing policies at the * socket level. If the message *msg* is allowed to pass (i.e. if @@ -2175,7 +2178,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) + * long bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags) * Description * This helper is used in programs implementing policies at the * skb socket level. If the sk_buff *skb* is allowed to pass (i.e. @@ -2189,7 +2192,7 @@ union bpf_attr { * Return * **SK_PASS** on success, or **SK_DROP** on error. * - * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) + * long bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len) * Description * Encapsulate the packet associated to *skb* within a Layer 3 * protocol header. This header is provided in the buffer at @@ -2226,7 +2229,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) + * long bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len) * Description * Store *len* bytes from address *from* into the packet * associated to *skb*, at *offset*. Only the flags, tag and TLVs @@ -2241,7 +2244,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) + * long bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta) * Description * Adjust the size allocated to TLVs in the outermost IPv6 * Segment Routing Header contained in the packet associated to @@ -2257,7 +2260,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) + * long bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len) * Description * Apply an IPv6 Segment Routing action of type *action* to the * packet associated to *skb*. Each action takes a parameter @@ -2286,7 +2289,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_repeat(void *ctx) + * long bpf_rc_repeat(void *ctx) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded repeat key message. This delays @@ -2305,7 +2308,7 @@ union bpf_attr { * Return * 0 * - * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) + * long bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded key press with *scancode*, @@ -2370,7 +2373,7 @@ union bpf_attr { * Return * A pointer to the local storage area. * - * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) + * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags) * Description * Select a **SO_REUSEPORT** socket from a * **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*. @@ -2471,7 +2474,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * int bpf_sk_release(struct bpf_sock *sock) + * long bpf_sk_release(struct bpf_sock *sock) * Description * Release the reference held by *sock*. *sock* must be a * non-**NULL** pointer that was returned from @@ -2479,7 +2482,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) + * long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags) * Description * Push an element *value* in *map*. *flags* is one of: * @@ -2489,19 +2492,19 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_pop_elem(struct bpf_map *map, void *value) + * long bpf_map_pop_elem(struct bpf_map *map, void *value) * Description * Pop an element from *map*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_map_peek_elem(struct bpf_map *map, void *value) + * long bpf_map_peek_elem(struct bpf_map *map, void *value) * Description * Get an element from *map* without removing it. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * long bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * For socket policies, insert *len* bytes into *msg* at offset * *start*. @@ -2517,7 +2520,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) + * long bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags) * Description * Will remove *len* bytes from a *msg* starting at byte *start*. * This may result in **ENOMEM** errors under certain situations if @@ -2529,7 +2532,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) + * long bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y) * Description * This helper is used in programs implementing IR decoding, to * report a successfully decoded pointer movement. @@ -2543,7 +2546,7 @@ union bpf_attr { * Return * 0 * - * int bpf_spin_lock(struct bpf_spin_lock *lock) + * long bpf_spin_lock(struct bpf_spin_lock *lock) * Description * Acquire a spinlock represented by the pointer *lock*, which is * stored as part of a value of a map. Taking the lock allows to @@ -2591,7 +2594,7 @@ union bpf_attr { * Return * 0 * - * int bpf_spin_unlock(struct bpf_spin_lock *lock) + * long bpf_spin_unlock(struct bpf_spin_lock *lock) * Description * Release the *lock* previously locked by a call to * **bpf_spin_lock**\ (\ *lock*\ ). @@ -2614,7 +2617,7 @@ union bpf_attr { * A **struct bpf_tcp_sock** pointer on success, or **NULL** in * case of failure. * - * int bpf_skb_ecn_set_ce(struct sk_buff *skb) + * long bpf_skb_ecn_set_ce(struct sk_buff *skb) * Description * Set ECN (Explicit Congestion Notification) field of IP header * to **CE** (Congestion Encountered) if current value is **ECT** @@ -2651,7 +2654,7 @@ union bpf_attr { * result is from *reuse*\ **->socks**\ [] using the hash of the * tuple. * - * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) + * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len) * Description * Check whether *iph* and *th* contain a valid SYN cookie ACK for * the listening socket in *sk*. @@ -2666,7 +2669,7 @@ union bpf_attr { * 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative * error otherwise. * - * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) + * long bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) * Description * Get name of sysctl in /proc/sys/ and copy it into provided by * program buffer *buf* of size *buf_len*. @@ -2682,7 +2685,7 @@ union bpf_attr { * **-E2BIG** if the buffer wasn't big enough (*buf* will contain * truncated name in this case). * - * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * long bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) * Description * Get current value of sysctl as it is presented in /proc/sys * (incl. newline, etc), and copy it as a string into provided @@ -2701,7 +2704,7 @@ union bpf_attr { * **-EINVAL** if current value was unavailable, e.g. because * sysctl is uninitialized and read returns -EIO for it. * - * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) + * long bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len) * Description * Get new value being written by user space to sysctl (before * the actual write happens) and copy it as a string into @@ -2718,7 +2721,7 @@ union bpf_attr { * * **-EINVAL** if sysctl is being read. * - * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) + * long bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len) * Description * Override new value being written by user space to sysctl with * value provided by program in buffer *buf* of size *buf_len*. @@ -2735,7 +2738,7 @@ union bpf_attr { * * **-EINVAL** if sysctl is being read. * - * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) + * long bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res) * Description * Convert the initial part of the string from buffer *buf* of * size *buf_len* to a long integer according to the given base @@ -2759,7 +2762,7 @@ union bpf_attr { * * **-ERANGE** if resulting value was out of range. * - * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) + * long bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res) * Description * Convert the initial part of the string from buffer *buf* of * size *buf_len* to an unsigned long integer according to the @@ -2810,7 +2813,7 @@ union bpf_attr { * **NULL** if not found or there was an error in adding * a new bpf-local-storage. * - * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) + * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk) * Description * Delete a bpf-local-storage from a *sk*. * Return @@ -2818,7 +2821,7 @@ union bpf_attr { * * **-ENOENT** if the bpf-local-storage cannot be found. * - * int bpf_send_signal(u32 sig) + * long bpf_send_signal(u32 sig) * Description * Send signal *sig* to the process of the current task. * The signal may be delivered to any of this process's threads. @@ -2859,7 +2862,7 @@ union bpf_attr { * * **-EPROTONOSUPPORT** IP packet version is not 4 or 6 * - * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -2883,21 +2886,21 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr) * Description * Safely attempt to read *size* bytes from user space address * *unsafe_ptr* and store the data in *dst*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr) * Description * Safely attempt to read *size* bytes from kernel space address * *unsafe_ptr* and store the data in *dst*. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe user address * *unsafe_ptr* to *dst*. The *size* should include the @@ -2941,7 +2944,7 @@ union bpf_attr { * including the trailing NUL character. On error, a negative * value. * - * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) + * long bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr) * Description * Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr* * to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply. @@ -2949,14 +2952,14 @@ union bpf_attr { * On success, the strictly positive length of the string, including * the trailing NUL character. On error, a negative value. * - * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt) + * long bpf_tcp_send_ack(void *tp, u32 rcv_nxt) * Description * Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**. * *rcv_nxt* is the ack_seq to be sent out. * Return * 0 on success, or a negative error in case of failure. * - * int bpf_send_signal_thread(u32 sig) + * long bpf_send_signal_thread(u32 sig) * Description * Send signal *sig* to the thread corresponding to the current task. * Return @@ -2976,7 +2979,7 @@ union bpf_attr { * Return * The 64 bit jiffies * - * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) + * long bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags) * Description * For an eBPF program attached to a perf event, retrieve the * branch records (**struct perf_branch_entry**) associated to *ctx* @@ -2995,7 +2998,7 @@ union bpf_attr { * * **-ENOENT** if architecture does not support branch records. * - * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) + * long bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size) * Description * Returns 0 on success, values for *pid* and *tgid* as seen from the current * *namespace* will be returned in *nsdata*. @@ -3007,7 +3010,7 @@ union bpf_attr { * * **-ENOENT** if pidns does not exists for the current task. * - * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) + * long bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size) * Description * Write raw *data* blob into a special BPF perf event held by * *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf @@ -3062,7 +3065,7 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) + * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags) * Description * Assign the *sk* to the *skb*. When combined with appropriate * routing configuration to receive the packet towards the socket, @@ -3097,7 +3100,7 @@ union bpf_attr { * Return * Current *ktime*. * - * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) + * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len) * Description * **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print * out the format string. @@ -3126,7 +3129,7 @@ union bpf_attr { * * **-EOVERFLOW** if an overflow happened: The same object will be tried again. * - * int bpf_seq_write(struct seq_file *m, const void *data, u32 len) + * long bpf_seq_write(struct seq_file *m, const void *data, u32 len) * Description * **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data. * The *m* represents the seq_file. The *data* and *len* represent the @@ -3222,7 +3225,7 @@ union bpf_attr { * Return * Requested value, or 0, if *flags* are not recognized. * - * int bpf_csum_level(struct sk_buff *skb, u64 level) + * long bpf_csum_level(struct sk_buff *skb, u64 level) * Description * Change the skbs checksum level by one layer up or down, or * reset it entirely to none in order to have the stack perform @@ -3253,6 +3256,69 @@ union bpf_attr { * case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level * is returned or the error code -EACCES in case the skb is not * subject to CHECKSUM_UNNECESSARY. + * + * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. + * + * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. + * + * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. + * + * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. + * + * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk) + * Description + * Dynamically cast a *sk* pointer to a *udp6_sock* pointer. + * Return + * *sk* if casting is valid, or NULL otherwise. + * + * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags) + * Description + * Return a user or a kernel stack in bpf program provided buffer. + * To achieve this, the helper needs *task*, which is a valid + * pointer to struct task_struct. To store the stacktrace, the + * bpf program provides *buf* with a nonnegative *size*. + * + * The last argument, *flags*, holds the number of stack frames to + * skip (from 0 to 255), masked with + * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set + * the following flags: + * + * **BPF_F_USER_STACK** + * Collect a user space stack instead of a kernel stack. + * **BPF_F_USER_BUILD_ID** + * Collect buildid+offset instead of ips for user stack, + * only valid if **BPF_F_USER_STACK** is also specified. + * + * **bpf_get_task_stack**\ () can collect up to + * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject + * to sufficient large buffer size. Note that + * this limit can be controlled with the **sysctl** program, and + * that it should be manually increased in order to profile long + * user stacks (such as stacks for Java programs). To do so, use: + * + * :: + * + * # sysctl kernel.perf_event_max_stack=<new value> + * Return + * A non-negative value equal to or less than *size* on success, + * or a negative error in case of failure. + * */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3390,7 +3456,14 @@ union bpf_attr { FN(ringbuf_submit), \ FN(ringbuf_discard), \ FN(ringbuf_query), \ - FN(csum_level), + FN(csum_level), \ + FN(skc_to_tcp6_sock), \ + FN(skc_to_tcp_sock), \ + FN(skc_to_tcp_timewait_sock), \ + FN(skc_to_tcp_request_sock), \ + FN(skc_to_udp6_sock), \ + FN(get_task_stack), \ + /* */ /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 7009dc90e012..eae5cccff761 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -217,7 +217,7 @@ enum bpf_field_info_kind { */ #define BPF_CORE_READ_INTO(dst, src, a, ...) \ ({ \ - ___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__) \ + ___core_read(bpf_core_read, dst, (src), a, ##__VA_ARGS__) \ }) /* @@ -227,7 +227,7 @@ enum bpf_field_info_kind { */ #define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \ ({ \ - ___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \ + ___core_read(bpf_core_read_str, dst, (src), a, ##__VA_ARGS__)\ }) /* @@ -254,8 +254,8 @@ enum bpf_field_info_kind { */ #define BPF_CORE_READ(src, a, ...) \ ({ \ - ___type(src, a, ##__VA_ARGS__) __r; \ - BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__); \ + ___type((src), a, ##__VA_ARGS__) __r; \ + BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \ __r; \ }) diff --git a/tools/lib/bpf/bpf_endian.h b/tools/lib/bpf/bpf_endian.h index fbe28008450f..ec9db4feca9f 100644 --- a/tools/lib/bpf/bpf_endian.h +++ b/tools/lib/bpf/bpf_endian.h @@ -2,8 +2,35 @@ #ifndef __BPF_ENDIAN__ #define __BPF_ENDIAN__ -#include <linux/stddef.h> -#include <linux/swab.h> +/* + * Isolate byte #n and put it into byte #m, for __u##b type. + * E.g., moving byte #6 (nnnnnnnn) into byte #1 (mmmmmmmm) for __u64: + * 1) xxxxxxxx nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx + * 2) nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 00000000 + * 3) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn + * 4) 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 00000000 + */ +#define ___bpf_mvb(x, b, n, m) ((__u##b)(x) << (b-(n+1)*8) >> (b-8) << (m*8)) + +#define ___bpf_swab16(x) ((__u16)( \ + ___bpf_mvb(x, 16, 0, 1) | \ + ___bpf_mvb(x, 16, 1, 0))) + +#define ___bpf_swab32(x) ((__u32)( \ + ___bpf_mvb(x, 32, 0, 3) | \ + ___bpf_mvb(x, 32, 1, 2) | \ + ___bpf_mvb(x, 32, 2, 1) | \ + ___bpf_mvb(x, 32, 3, 0))) + +#define ___bpf_swab64(x) ((__u64)( \ + ___bpf_mvb(x, 64, 0, 7) | \ + ___bpf_mvb(x, 64, 1, 6) | \ + ___bpf_mvb(x, 64, 2, 5) | \ + ___bpf_mvb(x, 64, 3, 4) | \ + ___bpf_mvb(x, 64, 4, 3) | \ + ___bpf_mvb(x, 64, 5, 2) | \ + ___bpf_mvb(x, 64, 6, 1) | \ + ___bpf_mvb(x, 64, 7, 0))) /* LLVM's BPF target selects the endianness of the CPU * it compiles on, or the user specifies (bpfel/bpfeb), @@ -23,16 +50,16 @@ #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ # define __bpf_ntohs(x) __builtin_bswap16(x) # define __bpf_htons(x) __builtin_bswap16(x) -# define __bpf_constant_ntohs(x) ___constant_swab16(x) -# define __bpf_constant_htons(x) ___constant_swab16(x) +# define __bpf_constant_ntohs(x) ___bpf_swab16(x) +# define __bpf_constant_htons(x) ___bpf_swab16(x) # define __bpf_ntohl(x) __builtin_bswap32(x) # define __bpf_htonl(x) __builtin_bswap32(x) -# define __bpf_constant_ntohl(x) ___constant_swab32(x) -# define __bpf_constant_htonl(x) ___constant_swab32(x) +# define __bpf_constant_ntohl(x) ___bpf_swab32(x) +# define __bpf_constant_htonl(x) ___bpf_swab32(x) # define __bpf_be64_to_cpu(x) __builtin_bswap64(x) # define __bpf_cpu_to_be64(x) __builtin_bswap64(x) -# define __bpf_constant_be64_to_cpu(x) ___constant_swab64(x) -# define __bpf_constant_cpu_to_be64(x) ___constant_swab64(x) +# define __bpf_constant_be64_to_cpu(x) ___bpf_swab64(x) +# define __bpf_constant_cpu_to_be64(x) ___bpf_swab64(x) #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ # define __bpf_ntohs(x) (x) # define __bpf_htons(x) (x) diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index f67dce2af802..a510d8ed716f 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -75,5 +75,6 @@ enum libbpf_tristate { }; #define __kconfig __attribute__((section(".kconfig"))) +#define __ksym __attribute__((section(".ksyms"))) #endif diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 70c1b7ec2bd0..06cd1731c154 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -168,6 +168,11 @@ static inline bool btf_kflag(const struct btf_type *t) return BTF_INFO_KFLAG(t->info); } +static inline bool btf_is_void(const struct btf_type *t) +{ + return btf_kind(t) == BTF_KIND_UNKN; +} + static inline bool btf_is_int(const struct btf_type *t) { return btf_kind(t) == BTF_KIND_INT; diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 11e4725b8b1c..51fd9b6c6a77 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -230,6 +230,7 @@ struct bpf_program { struct bpf_insn *insns; size_t insns_cnt, main_prog_cnt; enum bpf_prog_type type; + bool load; struct reloc_desc *reloc_desc; int nr_reloc; @@ -285,6 +286,7 @@ struct bpf_struct_ops { #define BSS_SEC ".bss" #define RODATA_SEC ".rodata" #define KCONFIG_SEC ".kconfig" +#define KSYMS_SEC ".ksyms" #define STRUCT_OPS_SEC ".struct_ops" enum libbpf_map_type { @@ -310,6 +312,7 @@ struct bpf_map { int map_ifindex; int inner_map_fd; struct bpf_map_def def; + __u32 numa_node; __u32 btf_var_idx; __u32 btf_key_type_id; __u32 btf_value_type_id; @@ -329,24 +332,39 @@ struct bpf_map { enum extern_type { EXT_UNKNOWN, - EXT_CHAR, - EXT_BOOL, - EXT_INT, - EXT_TRISTATE, - EXT_CHAR_ARR, + EXT_KCFG, + EXT_KSYM, +}; + +enum kcfg_type { + KCFG_UNKNOWN, + KCFG_CHAR, + KCFG_BOOL, + KCFG_INT, + KCFG_TRISTATE, + KCFG_CHAR_ARR, }; struct extern_desc { - const char *name; + enum extern_type type; int sym_idx; int btf_id; - enum extern_type type; - int sz; - int align; - int data_off; - bool is_signed; - bool is_weak; + int sec_btf_id; + const char *name; bool is_set; + bool is_weak; + union { + struct { + enum kcfg_type type; + int sz; + int align; + int data_off; + bool is_signed; + } kcfg; + struct { + unsigned long long addr; + } ksym; + }; }; static LIST_HEAD(bpf_objects_list); @@ -524,6 +542,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, prog->instances.fds = NULL; prog->instances.nr = -1; prog->type = BPF_PROG_TYPE_UNSPEC; + prog->load = true; return 0; errout: @@ -1423,19 +1442,19 @@ static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, return NULL; } -static int set_ext_value_tri(struct extern_desc *ext, void *ext_val, - char value) +static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val, + char value) { - switch (ext->type) { - case EXT_BOOL: + switch (ext->kcfg.type) { + case KCFG_BOOL: if (value == 'm') { - pr_warn("extern %s=%c should be tristate or char\n", + pr_warn("extern (kcfg) %s=%c should be tristate or char\n", ext->name, value); return -EINVAL; } *(bool *)ext_val = value == 'y' ? true : false; break; - case EXT_TRISTATE: + case KCFG_TRISTATE: if (value == 'y') *(enum libbpf_tristate *)ext_val = TRI_YES; else if (value == 'm') @@ -1443,14 +1462,14 @@ static int set_ext_value_tri(struct extern_desc *ext, void *ext_val, else /* value == 'n' */ *(enum libbpf_tristate *)ext_val = TRI_NO; break; - case EXT_CHAR: + case KCFG_CHAR: *(char *)ext_val = value; break; - case EXT_UNKNOWN: - case EXT_INT: - case EXT_CHAR_ARR: + case KCFG_UNKNOWN: + case KCFG_INT: + case KCFG_CHAR_ARR: default: - pr_warn("extern %s=%c should be bool, tristate, or char\n", + pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n", ext->name, value); return -EINVAL; } @@ -1458,29 +1477,29 @@ static int set_ext_value_tri(struct extern_desc *ext, void *ext_val, return 0; } -static int set_ext_value_str(struct extern_desc *ext, char *ext_val, - const char *value) +static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val, + const char *value) { size_t len; - if (ext->type != EXT_CHAR_ARR) { - pr_warn("extern %s=%s should char array\n", ext->name, value); + if (ext->kcfg.type != KCFG_CHAR_ARR) { + pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value); return -EINVAL; } len = strlen(value); if (value[len - 1] != '"') { - pr_warn("extern '%s': invalid string config '%s'\n", + pr_warn("extern (kcfg) '%s': invalid string config '%s'\n", ext->name, value); return -EINVAL; } /* strip quotes */ len -= 2; - if (len >= ext->sz) { - pr_warn("extern '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", - ext->name, value, len, ext->sz - 1); - len = ext->sz - 1; + if (len >= ext->kcfg.sz) { + pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", + ext->name, value, len, ext->kcfg.sz - 1); + len = ext->kcfg.sz - 1; } memcpy(ext_val, value + 1, len); ext_val[len] = '\0'; @@ -1507,11 +1526,11 @@ static int parse_u64(const char *value, __u64 *res) return 0; } -static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v) +static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v) { - int bit_sz = ext->sz * 8; + int bit_sz = ext->kcfg.sz * 8; - if (ext->sz == 8) + if (ext->kcfg.sz == 8) return true; /* Validate that value stored in u64 fits in integer of `ext->sz` @@ -1526,26 +1545,26 @@ static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v) * For unsigned target integer, check that all the (64 - Y) bits are * zero. */ - if (ext->is_signed) + if (ext->kcfg.is_signed) return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz); else return (v >> bit_sz) == 0; } -static int set_ext_value_num(struct extern_desc *ext, void *ext_val, - __u64 value) +static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val, + __u64 value) { - if (ext->type != EXT_INT && ext->type != EXT_CHAR) { - pr_warn("extern %s=%llu should be integer\n", + if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) { + pr_warn("extern (kcfg) %s=%llu should be integer\n", ext->name, (unsigned long long)value); return -EINVAL; } - if (!is_ext_value_in_range(ext, value)) { - pr_warn("extern %s=%llu value doesn't fit in %d bytes\n", - ext->name, (unsigned long long)value, ext->sz); + if (!is_kcfg_value_in_range(ext, value)) { + pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n", + ext->name, (unsigned long long)value, ext->kcfg.sz); return -ERANGE; } - switch (ext->sz) { + switch (ext->kcfg.sz) { case 1: *(__u8 *)ext_val = value; break; case 2: *(__u16 *)ext_val = value; break; case 4: *(__u32 *)ext_val = value; break; @@ -1591,30 +1610,30 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj, if (!ext || ext->is_set) return 0; - ext_val = data + ext->data_off; + ext_val = data + ext->kcfg.data_off; value = sep + 1; switch (*value) { case 'y': case 'n': case 'm': - err = set_ext_value_tri(ext, ext_val, *value); + err = set_kcfg_value_tri(ext, ext_val, *value); break; case '"': - err = set_ext_value_str(ext, ext_val, value); + err = set_kcfg_value_str(ext, ext_val, value); break; default: /* assume integer */ err = parse_u64(value, &num); if (err) { - pr_warn("extern %s=%s should be integer\n", + pr_warn("extern (kcfg) %s=%s should be integer\n", ext->name, value); return err; } - err = set_ext_value_num(ext, ext_val, num); + err = set_kcfg_value_num(ext, ext_val, num); break; } if (err) return err; - pr_debug("extern %s=%s\n", ext->name, value); + pr_debug("extern (kcfg) %s=%s\n", ext->name, value); return 0; } @@ -1685,16 +1704,20 @@ static int bpf_object__read_kconfig_mem(struct bpf_object *obj, static int bpf_object__init_kconfig_map(struct bpf_object *obj) { - struct extern_desc *last_ext; + struct extern_desc *last_ext = NULL, *ext; size_t map_sz; - int err; + int i, err; - if (obj->nr_extern == 0) - return 0; + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + if (ext->type == EXT_KCFG) + last_ext = ext; + } - last_ext = &obj->externs[obj->nr_extern - 1]; - map_sz = last_ext->data_off + last_ext->sz; + if (!last_ext) + return 0; + map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz; err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, obj->efile.symbols_shndx, NULL, map_sz); @@ -1957,6 +1980,10 @@ static int parse_btf_map_def(struct bpf_object *obj, return -EINVAL; pr_debug("map '%s': found map_flags = %u.\n", map->name, map->def.map_flags); + } else if (strcmp(name, "numa_node") == 0) { + if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node)) + return -EINVAL; + pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node); } else if (strcmp(name, "key_size") == 0) { __u32 sz; @@ -2479,22 +2506,33 @@ static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog) static int bpf_object__load_vmlinux_btf(struct bpf_object *obj) { + bool need_vmlinux_btf = false; struct bpf_program *prog; int err; + /* CO-RE relocations need kernel BTF */ + if (obj->btf_ext && obj->btf_ext->field_reloc_info.len) + need_vmlinux_btf = true; + bpf_object__for_each_program(prog, obj) { + if (!prog->load) + continue; if (libbpf_prog_needs_vmlinux_btf(prog)) { - obj->btf_vmlinux = libbpf_find_kernel_btf(); - if (IS_ERR(obj->btf_vmlinux)) { - err = PTR_ERR(obj->btf_vmlinux); - pr_warn("Error loading vmlinux BTF: %d\n", err); - obj->btf_vmlinux = NULL; - return err; - } - return 0; + need_vmlinux_btf = true; + break; } } + if (!need_vmlinux_btf) + return 0; + + obj->btf_vmlinux = libbpf_find_kernel_btf(); + if (IS_ERR(obj->btf_vmlinux)) { + err = PTR_ERR(obj->btf_vmlinux); + pr_warn("Error loading vmlinux BTF: %d\n", err); + obj->btf_vmlinux = NULL; + return err; + } return 0; } @@ -2709,8 +2747,33 @@ static int find_extern_btf_id(const struct btf *btf, const char *ext_name) return -ENOENT; } -static enum extern_type find_extern_type(const struct btf *btf, int id, - bool *is_signed) +static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) { + const struct btf_var_secinfo *vs; + const struct btf_type *t; + int i, j, n; + + if (!btf) + return -ESRCH; + + n = btf__get_nr_types(btf); + for (i = 1; i <= n; i++) { + t = btf__type_by_id(btf, i); + + if (!btf_is_datasec(t)) + continue; + + vs = btf_var_secinfos(t); + for (j = 0; j < btf_vlen(t); j++, vs++) { + if (vs->type == ext_btf_id) + return i; + } + } + + return -ENOENT; +} + +static enum kcfg_type find_kcfg_type(const struct btf *btf, int id, + bool *is_signed) { const struct btf_type *t; const char *name; @@ -2725,29 +2788,29 @@ static enum extern_type find_extern_type(const struct btf *btf, int id, int enc = btf_int_encoding(t); if (enc & BTF_INT_BOOL) - return t->size == 1 ? EXT_BOOL : EXT_UNKNOWN; + return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN; if (is_signed) *is_signed = enc & BTF_INT_SIGNED; if (t->size == 1) - return EXT_CHAR; + return KCFG_CHAR; if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1))) - return EXT_UNKNOWN; - return EXT_INT; + return KCFG_UNKNOWN; + return KCFG_INT; } case BTF_KIND_ENUM: if (t->size != 4) - return EXT_UNKNOWN; + return KCFG_UNKNOWN; if (strcmp(name, "libbpf_tristate")) - return EXT_UNKNOWN; - return EXT_TRISTATE; + return KCFG_UNKNOWN; + return KCFG_TRISTATE; case BTF_KIND_ARRAY: if (btf_array(t)->nelems == 0) - return EXT_UNKNOWN; - if (find_extern_type(btf, btf_array(t)->type, NULL) != EXT_CHAR) - return EXT_UNKNOWN; - return EXT_CHAR_ARR; + return KCFG_UNKNOWN; + if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR) + return KCFG_UNKNOWN; + return KCFG_CHAR_ARR; default: - return EXT_UNKNOWN; + return KCFG_UNKNOWN; } } @@ -2756,23 +2819,45 @@ static int cmp_externs(const void *_a, const void *_b) const struct extern_desc *a = _a; const struct extern_desc *b = _b; - /* descending order by alignment requirements */ - if (a->align != b->align) - return a->align > b->align ? -1 : 1; - /* ascending order by size, within same alignment class */ - if (a->sz != b->sz) - return a->sz < b->sz ? -1 : 1; + if (a->type != b->type) + return a->type < b->type ? -1 : 1; + + if (a->type == EXT_KCFG) { + /* descending order by alignment requirements */ + if (a->kcfg.align != b->kcfg.align) + return a->kcfg.align > b->kcfg.align ? -1 : 1; + /* ascending order by size, within same alignment class */ + if (a->kcfg.sz != b->kcfg.sz) + return a->kcfg.sz < b->kcfg.sz ? -1 : 1; + } + /* resolve ties by name */ return strcmp(a->name, b->name); } +static int find_int_btf_id(const struct btf *btf) +{ + const struct btf_type *t; + int i, n; + + n = btf__get_nr_types(btf); + for (i = 1; i <= n; i++) { + t = btf__type_by_id(btf, i); + + if (btf_is_int(t) && btf_int_bits(t) == 32) + return i; + } + + return 0; +} + static int bpf_object__collect_externs(struct bpf_object *obj) { + struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL; const struct btf_type *t; struct extern_desc *ext; - int i, n, off, btf_id; - struct btf_type *sec; - const char *ext_name; + int i, n, off; + const char *ext_name, *sec_name; Elf_Scn *scn; GElf_Shdr sh; @@ -2818,22 +2903,50 @@ static int bpf_object__collect_externs(struct bpf_object *obj) ext->name = btf__name_by_offset(obj->btf, t->name_off); ext->sym_idx = i; ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK; - ext->sz = btf__resolve_size(obj->btf, t->type); - if (ext->sz <= 0) { - pr_warn("failed to resolve size of extern '%s': %d\n", - ext_name, ext->sz); - return ext->sz; - } - ext->align = btf__align_of(obj->btf, t->type); - if (ext->align <= 0) { - pr_warn("failed to determine alignment of extern '%s': %d\n", - ext_name, ext->align); - return -EINVAL; - } - ext->type = find_extern_type(obj->btf, t->type, - &ext->is_signed); - if (ext->type == EXT_UNKNOWN) { - pr_warn("extern '%s' type is unsupported\n", ext_name); + + ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id); + if (ext->sec_btf_id <= 0) { + pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n", + ext_name, ext->btf_id, ext->sec_btf_id); + return ext->sec_btf_id; + } + sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id); + sec_name = btf__name_by_offset(obj->btf, sec->name_off); + + if (strcmp(sec_name, KCONFIG_SEC) == 0) { + kcfg_sec = sec; + ext->type = EXT_KCFG; + ext->kcfg.sz = btf__resolve_size(obj->btf, t->type); + if (ext->kcfg.sz <= 0) { + pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n", + ext_name, ext->kcfg.sz); + return ext->kcfg.sz; + } + ext->kcfg.align = btf__align_of(obj->btf, t->type); + if (ext->kcfg.align <= 0) { + pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n", + ext_name, ext->kcfg.align); + return -EINVAL; + } + ext->kcfg.type = find_kcfg_type(obj->btf, t->type, + &ext->kcfg.is_signed); + if (ext->kcfg.type == KCFG_UNKNOWN) { + pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name); + return -ENOTSUP; + } + } else if (strcmp(sec_name, KSYMS_SEC) == 0) { + const struct btf_type *vt; + + ksym_sec = sec; + ext->type = EXT_KSYM; + + vt = skip_mods_and_typedefs(obj->btf, t->type, NULL); + if (!btf_is_void(vt)) { + pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name); + return -ENOTSUP; + } + } else { + pr_warn("unrecognized extern section '%s'\n", sec_name); return -ENOTSUP; } } @@ -2842,42 +2955,80 @@ static int bpf_object__collect_externs(struct bpf_object *obj) if (!obj->nr_extern) return 0; - /* sort externs by (alignment, size, name) and calculate their offsets - * within a map */ + /* sort externs by type, for kcfg ones also by (align, size, name) */ qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs); - off = 0; - for (i = 0; i < obj->nr_extern; i++) { - ext = &obj->externs[i]; - ext->data_off = roundup(off, ext->align); - off = ext->data_off + ext->sz; - pr_debug("extern #%d: symbol %d, off %u, name %s\n", - i, ext->sym_idx, ext->data_off, ext->name); - } - btf_id = btf__find_by_name(obj->btf, KCONFIG_SEC); - if (btf_id <= 0) { - pr_warn("no BTF info found for '%s' datasec\n", KCONFIG_SEC); - return -ESRCH; - } + /* for .ksyms section, we need to turn all externs into allocated + * variables in BTF to pass kernel verification; we do this by + * pretending that each extern is a 8-byte variable + */ + if (ksym_sec) { + /* find existing 4-byte integer type in BTF to use for fake + * extern variables in DATASEC + */ + int int_btf_id = find_int_btf_id(obj->btf); - sec = (struct btf_type *)btf__type_by_id(obj->btf, btf_id); - sec->size = off; - n = btf_vlen(sec); - for (i = 0; i < n; i++) { - struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; - - t = btf__type_by_id(obj->btf, vs->type); - ext_name = btf__name_by_offset(obj->btf, t->name_off); - ext = find_extern_by_name(obj, ext_name); - if (!ext) { - pr_warn("failed to find extern definition for BTF var '%s'\n", - ext_name); - return -ESRCH; + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + if (ext->type != EXT_KSYM) + continue; + pr_debug("extern (ksym) #%d: symbol %d, name %s\n", + i, ext->sym_idx, ext->name); + } + + sec = ksym_sec; + n = btf_vlen(sec); + for (i = 0, off = 0; i < n; i++, off += sizeof(int)) { + struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; + struct btf_type *vt; + + vt = (void *)btf__type_by_id(obj->btf, vs->type); + ext_name = btf__name_by_offset(obj->btf, vt->name_off); + ext = find_extern_by_name(obj, ext_name); + if (!ext) { + pr_warn("failed to find extern definition for BTF var '%s'\n", + ext_name); + return -ESRCH; + } + btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED; + vt->type = int_btf_id; + vs->offset = off; + vs->size = sizeof(int); } - vs->offset = ext->data_off; - btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; + sec->size = off; } + if (kcfg_sec) { + sec = kcfg_sec; + /* for kcfg externs calculate their offsets within a .kconfig map */ + off = 0; + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + if (ext->type != EXT_KCFG) + continue; + + ext->kcfg.data_off = roundup(off, ext->kcfg.align); + off = ext->kcfg.data_off + ext->kcfg.sz; + pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n", + i, ext->sym_idx, ext->kcfg.data_off, ext->name); + } + sec->size = off; + n = btf_vlen(sec); + for (i = 0; i < n; i++) { + struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; + + t = btf__type_by_id(obj->btf, vs->type); + ext_name = btf__name_by_offset(obj->btf, t->name_off); + ext = find_extern_by_name(obj, ext_name); + if (!ext) { + pr_warn("failed to find extern definition for BTF var '%s'\n", + ext_name); + return -ESRCH; + } + btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; + vs->offset = ext->kcfg.data_off; + } + } return 0; } @@ -3007,11 +3158,11 @@ static int bpf_program__record_reloc(struct bpf_program *prog, sym_idx); return -LIBBPF_ERRNO__RELOC; } - pr_debug("found extern #%d '%s' (sym %d, off %u) for insn %u\n", - i, ext->name, ext->sym_idx, ext->data_off, insn_idx); + pr_debug("found extern #%d '%s' (sym %d) for insn %u\n", + i, ext->name, ext->sym_idx, insn_idx); reloc_desc->type = RELO_EXTERN; reloc_desc->insn_idx = insn_idx; - reloc_desc->sym_off = ext->data_off; + reloc_desc->sym_off = i; /* sym_off stores extern index */ return 0; } @@ -3222,20 +3373,27 @@ err_free_new_name: return err; } -int bpf_map__resize(struct bpf_map *map, __u32 max_entries) +__u32 bpf_map__max_entries(const struct bpf_map *map) { - if (!map || !max_entries) - return -EINVAL; + return map->def.max_entries; +} - /* If map already created, its attributes can't be changed. */ +int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) +{ if (map->fd >= 0) return -EBUSY; - map->def.max_entries = max_entries; - return 0; } +int bpf_map__resize(struct bpf_map *map, __u32 max_entries) +{ + if (!map || !max_entries) + return -EINVAL; + + return bpf_map__set_max_entries(map, max_entries); +} + static int bpf_object__probe_loading(struct bpf_object *obj) { @@ -3603,6 +3761,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map) create_attr.map_flags = def->map_flags; create_attr.key_size = def->key_size; create_attr.value_size = def->value_size; + create_attr.numa_node = map->numa_node; if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) { int nr_cpus; @@ -4799,8 +4958,8 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) if (targ_btf_path) targ_btf = btf__parse_elf(targ_btf_path, NULL); else - targ_btf = libbpf_find_kernel_btf(); - if (IS_ERR(targ_btf)) { + targ_btf = obj->btf_vmlinux; + if (IS_ERR_OR_NULL(targ_btf)) { pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf)); return PTR_ERR(targ_btf); } @@ -4847,7 +5006,9 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path) } out: - btf__free(targ_btf); + /* obj->btf_vmlinux is freed at the end of object load phase */ + if (targ_btf != obj->btf_vmlinux) + btf__free(targ_btf); if (!IS_ERR_OR_NULL(cand_cache)) { hashmap__for_each_entry(cand_cache, entry, i) { bpf_core_free_cands(entry->value); @@ -4934,6 +5095,7 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; struct bpf_insn *insn = &prog->insns[relo->insn_idx]; + struct extern_desc *ext; if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { pr_warn("relocation out of range: '%s'\n", @@ -4952,9 +5114,15 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) insn[0].imm = obj->maps[relo->map_idx].fd; break; case RELO_EXTERN: - insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; - insn[1].imm = relo->sym_off; + ext = &obj->externs[relo->sym_off]; + if (ext->type == EXT_KCFG) { + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; + insn[1].imm = ext->kcfg.data_off; + } else /* EXT_KSYM */ { + insn[0].imm = (__u32)ext->ksym.addr; + insn[1].imm = ext->ksym.addr >> 32; + } break; case RELO_CALL: err = bpf_program__reloc_text(prog, obj, relo); @@ -5287,6 +5455,12 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) { int err = 0, fd, i, btf_id; + if (prog->obj->loaded) { + pr_warn("prog '%s'('%s'): can't load after object was loaded\n", + prog->name, prog->section_name); + return -EINVAL; + } + if ((prog->type == BPF_PROG_TYPE_TRACING || prog->type == BPF_PROG_TYPE_LSM || prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) { @@ -5375,16 +5549,21 @@ static bool bpf_program__is_function_storage(const struct bpf_program *prog, static int bpf_object__load_progs(struct bpf_object *obj, int log_level) { + struct bpf_program *prog; size_t i; int err; for (i = 0; i < obj->nr_programs; i++) { - if (bpf_program__is_function_storage(&obj->programs[i], obj)) + prog = &obj->programs[i]; + if (bpf_program__is_function_storage(prog, obj)) continue; - obj->programs[i].log_level |= log_level; - err = bpf_program__load(&obj->programs[i], - obj->license, - obj->kern_version); + if (!prog->load) { + pr_debug("prog '%s'('%s'): skipped loading\n", + prog->name, prog->section_name); + continue; + } + prog->log_level |= log_level; + err = bpf_program__load(prog, obj->license, obj->kern_version); if (err) return err; } @@ -5573,56 +5752,114 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj) return 0; } +static int bpf_object__read_kallsyms_file(struct bpf_object *obj) +{ + char sym_type, sym_name[500]; + unsigned long long sym_addr; + struct extern_desc *ext; + int ret, err = 0; + FILE *f; + + f = fopen("/proc/kallsyms", "r"); + if (!f) { + err = -errno; + pr_warn("failed to open /proc/kallsyms: %d\n", err); + return err; + } + + while (true) { + ret = fscanf(f, "%llx %c %499s%*[^\n]\n", + &sym_addr, &sym_type, sym_name); + if (ret == EOF && feof(f)) + break; + if (ret != 3) { + pr_warn("failed to read kallsyms entry: %d\n", ret); + err = -EINVAL; + goto out; + } + + ext = find_extern_by_name(obj, sym_name); + if (!ext || ext->type != EXT_KSYM) + continue; + + if (ext->is_set && ext->ksym.addr != sym_addr) { + pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n", + sym_name, ext->ksym.addr, sym_addr); + err = -EINVAL; + goto out; + } + if (!ext->is_set) { + ext->is_set = true; + ext->ksym.addr = sym_addr; + pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr); + } + } + +out: + fclose(f); + return err; +} + static int bpf_object__resolve_externs(struct bpf_object *obj, const char *extra_kconfig) { - bool need_config = false; + bool need_config = false, need_kallsyms = false; struct extern_desc *ext; + void *kcfg_data = NULL; int err, i; - void *data; if (obj->nr_extern == 0) return 0; - data = obj->maps[obj->kconfig_map_idx].mmaped; + if (obj->kconfig_map_idx >= 0) + kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped; for (i = 0; i < obj->nr_extern; i++) { ext = &obj->externs[i]; - if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { - void *ext_val = data + ext->data_off; + if (ext->type == EXT_KCFG && + strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { + void *ext_val = kcfg_data + ext->kcfg.data_off; __u32 kver = get_kernel_version(); if (!kver) { pr_warn("failed to get kernel version\n"); return -EINVAL; } - err = set_ext_value_num(ext, ext_val, kver); + err = set_kcfg_value_num(ext, ext_val, kver); if (err) return err; - pr_debug("extern %s=0x%x\n", ext->name, kver); - } else if (strncmp(ext->name, "CONFIG_", 7) == 0) { + pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver); + } else if (ext->type == EXT_KCFG && + strncmp(ext->name, "CONFIG_", 7) == 0) { need_config = true; + } else if (ext->type == EXT_KSYM) { + need_kallsyms = true; } else { pr_warn("unrecognized extern '%s'\n", ext->name); return -EINVAL; } } if (need_config && extra_kconfig) { - err = bpf_object__read_kconfig_mem(obj, extra_kconfig, data); + err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data); if (err) return -EINVAL; need_config = false; for (i = 0; i < obj->nr_extern; i++) { ext = &obj->externs[i]; - if (!ext->is_set) { + if (ext->type == EXT_KCFG && !ext->is_set) { need_config = true; break; } } } if (need_config) { - err = bpf_object__read_kconfig_file(obj, data); + err = bpf_object__read_kconfig_file(obj, kcfg_data); + if (err) + return -EINVAL; + } + if (need_kallsyms) { + err = bpf_object__read_kallsyms_file(obj); if (err) return -EINVAL; } @@ -5653,12 +5890,10 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) return -EINVAL; if (obj->loaded) { - pr_warn("object should not be loaded twice\n"); + pr_warn("object '%s': load can't be attempted twice\n", obj->name); return -EINVAL; } - obj->loaded = true; - err = bpf_object__probe_loading(obj); err = err ? : bpf_object__probe_caps(obj); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); @@ -5673,6 +5908,8 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) btf__free(obj->btf_vmlinux); obj->btf_vmlinux = NULL; + obj->loaded = true; /* doesn't matter if successfully or not */ + if (err) goto out; @@ -6445,6 +6682,20 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) return title; } +bool bpf_program__autoload(const struct bpf_program *prog) +{ + return prog->load; +} + +int bpf_program__set_autoload(struct bpf_program *prog, bool autoload) +{ + if (prog->obj->loaded) + return -EINVAL; + + prog->load = autoload; + return 0; +} + int bpf_program__fd(const struct bpf_program *prog) { return bpf_program__nth_fd(prog, 0); @@ -7094,6 +7345,71 @@ const char *bpf_map__name(const struct bpf_map *map) return map ? map->name : NULL; } +enum bpf_map_type bpf_map__type(const struct bpf_map *map) +{ + return map->def.type; +} + +int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type) +{ + if (map->fd >= 0) + return -EBUSY; + map->def.type = type; + return 0; +} + +__u32 bpf_map__map_flags(const struct bpf_map *map) +{ + return map->def.map_flags; +} + +int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags) +{ + if (map->fd >= 0) + return -EBUSY; + map->def.map_flags = flags; + return 0; +} + +__u32 bpf_map__numa_node(const struct bpf_map *map) +{ + return map->numa_node; +} + +int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node) +{ + if (map->fd >= 0) + return -EBUSY; + map->numa_node = numa_node; + return 0; +} + +__u32 bpf_map__key_size(const struct bpf_map *map) +{ + return map->def.key_size; +} + +int bpf_map__set_key_size(struct bpf_map *map, __u32 size) +{ + if (map->fd >= 0) + return -EBUSY; + map->def.key_size = size; + return 0; +} + +__u32 bpf_map__value_size(const struct bpf_map *map) +{ + return map->def.value_size; +} + +int bpf_map__set_value_size(struct bpf_map *map, __u32 size) +{ + if (map->fd >= 0) + return -EBUSY; + map->def.value_size = size; + return 0; +} + __u32 bpf_map__btf_key_type_id(const struct bpf_map *map) { return map ? map->btf_key_type_id : 0; @@ -7146,9 +7462,17 @@ bool bpf_map__is_internal(const struct bpf_map *map) return map->libbpf_type != LIBBPF_MAP_UNSPEC; } -void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) +__u32 bpf_map__ifindex(const struct bpf_map *map) { + return map->map_ifindex; +} + +int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex) +{ + if (map->fd >= 0) + return -EBUSY; map->map_ifindex = ifindex; + return 0; } int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd) @@ -8994,6 +9318,9 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) const struct bpf_sec_def *sec_def; const char *sec_name = bpf_program__title(prog, false); + if (!prog->load) + continue; + sec_def = find_sec_def(sec_name); if (!sec_def || !sec_def->attach_fn) continue; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 334437af3014..2335971ed0bd 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -200,6 +200,8 @@ LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog); LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy); +LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog); +LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload); /* returns program size in bytes */ LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog); @@ -418,11 +420,38 @@ bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj); LIBBPF_API struct bpf_map * bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj); +/* get/set map FD */ LIBBPF_API int bpf_map__fd(const struct bpf_map *map); +LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); +/* get map definition */ LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map); +/* get map name */ LIBBPF_API const char *bpf_map__name(const struct bpf_map *map); +/* get/set map type */ +LIBBPF_API enum bpf_map_type bpf_map__type(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type); +/* get/set map size (max_entries) */ +LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries); +LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); +/* get/set map flags */ +LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags); +/* get/set map NUMA node */ +LIBBPF_API __u32 bpf_map__numa_node(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node); +/* get/set map key size */ +LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size); +/* get/set map value size */ +LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size); +/* get map key/value BTF type IDs */ LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map); LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map); +/* get/set map if_index */ +LIBBPF_API __u32 bpf_map__ifindex(const struct bpf_map *map); +LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *); LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, @@ -430,11 +459,8 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, LIBBPF_API void *bpf_map__priv(const struct bpf_map *map); LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map, const void *data, size_t size); -LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); -LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map); LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map); -LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map); diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index f732c77b7ed0..6544d2cd1ed6 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -270,3 +270,22 @@ LIBBPF_0.0.9 { ring_buffer__new; ring_buffer__poll; } LIBBPF_0.0.8; + +LIBBPF_0.1.0 { + global: + bpf_map__ifindex; + bpf_map__key_size; + bpf_map__map_flags; + bpf_map__max_entries; + bpf_map__numa_node; + bpf_map__set_key_size; + bpf_map__set_map_flags; + bpf_map__set_max_entries; + bpf_map__set_numa_node; + bpf_map__set_type; + bpf_map__set_value_size; + bpf_map__type; + bpf_map__value_size; + bpf_program__autoload; + bpf_program__set_autoload; +} LIBBPF_0.0.9; diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 22aaec74ea0a..1f9c696b3edf 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -134,12 +134,12 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ) $(call msg,CC,,$@) $(CC) -c $(CFLAGS) -o $@ $< -VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \ +VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \ ../../../../vmlinux \ /sys/kernel/btf/vmlinux \ /boot/vmlinux-$(shell uname -r) -VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) +VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS)))) $(OUTPUT)/runqslower: $(BPFOBJ) $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \ @@ -182,8 +182,13 @@ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR): mkdir -p $@ $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR) +ifeq ($(VMLINUX_H),) $(call msg,GEN,,$@) $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@ +else + $(call msg,CP,,$@) + cp "$(VMLINUX_H)" $@ +endif # Get Clang's default includes on this system, as opposed to those seen by # '-target bpf'. This fixes "missing" files on some architectures/distros, diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index e36dd1a1780d..acd08715be2e 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -7,8 +7,6 @@ #include <arpa/inet.h> -#include <sys/epoll.h> - #include <linux/err.h> #include <linux/in.h> #include <linux/in6.h> @@ -17,8 +15,13 @@ #include "network_helpers.h" #define clean_errno() (errno == 0 ? "None" : strerror(errno)) -#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ - __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__) +#define log_err(MSG, ...) ({ \ + int __save = errno; \ + fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \ + __FILE__, __LINE__, clean_errno(), \ + ##__VA_ARGS__); \ + errno = __save; \ +}) struct ipv4_packet pkt_v4 = { .eth.h_proto = __bpf_constant_htons(ETH_P_IP), @@ -37,7 +40,34 @@ struct ipv6_packet pkt_v6 = { .tcp.doff = 5, }; -int start_server_with_port(int family, int type, __u16 port) +static int settimeo(int fd, int timeout_ms) +{ + struct timeval timeout = { .tv_sec = 3 }; + + if (timeout_ms > 0) { + timeout.tv_sec = timeout_ms / 1000; + timeout.tv_usec = (timeout_ms % 1000) * 1000; + } + + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout, + sizeof(timeout))) { + log_err("Failed to set SO_RCVTIMEO"); + return -1; + } + + if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout, + sizeof(timeout))) { + log_err("Failed to set SO_SNDTIMEO"); + return -1; + } + + return 0; +} + +#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; }) + +int start_server(int family, int type, const char *addr_str, __u16 port, + int timeout_ms) { struct sockaddr_storage addr = {}; socklen_t len; @@ -48,120 +78,119 @@ int start_server_with_port(int family, int type, __u16 port) sin->sin_family = AF_INET; sin->sin_port = htons(port); + if (addr_str && + inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) { + log_err("inet_pton(AF_INET, %s)", addr_str); + return -1; + } len = sizeof(*sin); } else { struct sockaddr_in6 *sin6 = (void *)&addr; sin6->sin6_family = AF_INET6; sin6->sin6_port = htons(port); + if (addr_str && + inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) { + log_err("inet_pton(AF_INET6, %s)", addr_str); + return -1; + } len = sizeof(*sin6); } - fd = socket(family, type | SOCK_NONBLOCK, 0); + fd = socket(family, type, 0); if (fd < 0) { log_err("Failed to create server socket"); return -1; } + if (settimeo(fd, timeout_ms)) + goto error_close; + if (bind(fd, (const struct sockaddr *)&addr, len) < 0) { log_err("Failed to bind socket"); - close(fd); - return -1; + goto error_close; } if (type == SOCK_STREAM) { if (listen(fd, 1) < 0) { log_err("Failed to listed on socket"); - close(fd); - return -1; + goto error_close; } } return fd; -} -int start_server(int family, int type) -{ - return start_server_with_port(family, type, 0); +error_close: + save_errno_close(fd); + return -1; } -static const struct timeval timeo_sec = { .tv_sec = 3 }; -static const size_t timeo_optlen = sizeof(timeo_sec); - -int connect_to_fd(int family, int type, int server_fd) +static int connect_fd_to_addr(int fd, + const struct sockaddr_storage *addr, + socklen_t addrlen) { - int fd, save_errno; - - fd = socket(family, type, 0); - if (fd < 0) { - log_err("Failed to create client socket"); + if (connect(fd, (const struct sockaddr *)addr, addrlen)) { + log_err("Failed to connect to server"); return -1; } - if (connect_fd_to_fd(fd, server_fd) < 0 && errno != EINPROGRESS) { - save_errno = errno; - close(fd); - errno = save_errno; - return -1; - } - - return fd; + return 0; } -int connect_fd_to_fd(int client_fd, int server_fd) +int connect_to_fd(int server_fd, int timeout_ms) { struct sockaddr_storage addr; - socklen_t len = sizeof(addr); - int save_errno; + struct sockaddr_in *addr_in; + socklen_t addrlen, optlen; + int fd, type; - if (setsockopt(client_fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec, - timeo_optlen)) { - log_err("Failed to set SO_RCVTIMEO"); + optlen = sizeof(type); + if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) { + log_err("getsockopt(SOL_TYPE)"); return -1; } - if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { + addrlen = sizeof(addr); + if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { log_err("Failed to get server addr"); return -1; } - if (connect(client_fd, (const struct sockaddr *)&addr, len) < 0) { - if (errno != EINPROGRESS) { - save_errno = errno; - log_err("Failed to connect to server"); - errno = save_errno; - } + addr_in = (struct sockaddr_in *)&addr; + fd = socket(addr_in->sin_family, type, 0); + if (fd < 0) { + log_err("Failed to create client socket"); return -1; } - return 0; + if (settimeo(fd, timeout_ms)) + goto error_close; + + if (connect_fd_to_addr(fd, &addr, addrlen)) + goto error_close; + + return fd; + +error_close: + save_errno_close(fd); + return -1; } -int connect_wait(int fd) +int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms) { - struct epoll_event ev = {}, events[2]; - int timeout_ms = 1000; - int efd, nfd; + struct sockaddr_storage addr; + socklen_t len = sizeof(addr); - efd = epoll_create1(EPOLL_CLOEXEC); - if (efd < 0) { - log_err("Failed to open epoll fd"); + if (settimeo(client_fd, timeout_ms)) return -1; - } - - ev.events = EPOLLRDHUP | EPOLLOUT; - ev.data.fd = fd; - if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) < 0) { - log_err("Failed to register fd=%d on epoll fd=%d", fd, efd); - close(efd); + if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) { + log_err("Failed to get server addr"); return -1; } - nfd = epoll_wait(efd, events, ARRAY_SIZE(events), timeout_ms); - if (nfd < 0) - log_err("Failed to wait for I/O event on epoll fd=%d", efd); + if (connect_fd_to_addr(client_fd, &addr, len)) + return -1; - close(efd); - return nfd; + return 0; } diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index 6a8009605670..f580e82fda58 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -33,10 +33,9 @@ struct ipv6_packet { } __packed; extern struct ipv6_packet pkt_v6; -int start_server(int family, int type); -int start_server_with_port(int family, int type, __u16 port); -int connect_to_fd(int family, int type, int server_fd); -int connect_fd_to_fd(int client_fd, int server_fd); -int connect_wait(int client_fd); +int start_server(int family, int type, const char *addr, __u16 port, + int timeout_ms); +int connect_to_fd(int server_fd, int timeout_ms); +int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); #endif diff --git a/tools/testing/selftests/bpf/prog_tests/autoload.c b/tools/testing/selftests/bpf/prog_tests/autoload.c new file mode 100644 index 000000000000..3693f7d133eb --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/autoload.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <test_progs.h> +#include <time.h> +#include "test_autoload.skel.h" + +void test_autoload(void) +{ + int duration = 0, err; + struct test_autoload* skel; + + skel = test_autoload__open_and_load(); + /* prog3 should be broken */ + if (CHECK(skel, "skel_open_and_load", "unexpected success\n")) + goto cleanup; + + skel = test_autoload__open(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + goto cleanup; + + /* don't load prog3 */ + bpf_program__set_autoload(skel->progs.prog3, false); + + err = test_autoload__load(skel); + if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) + goto cleanup; + + err = test_autoload__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + usleep(1); + + CHECK(!skel->bss->prog1_called, "prog1", "not called\n"); + CHECK(!skel->bss->prog2_called, "prog2", "not called\n"); + CHECK(skel->bss->prog3_called, "prog3", "called?!\n"); + +cleanup: + test_autoload__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 87c29dde1cf9..fed42755416d 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -5,7 +5,12 @@ #include "bpf_iter_netlink.skel.h" #include "bpf_iter_bpf_map.skel.h" #include "bpf_iter_task.skel.h" +#include "bpf_iter_task_stack.skel.h" #include "bpf_iter_task_file.skel.h" +#include "bpf_iter_tcp4.skel.h" +#include "bpf_iter_tcp6.skel.h" +#include "bpf_iter_udp4.skel.h" +#include "bpf_iter_udp6.skel.h" #include "bpf_iter_test_kern1.skel.h" #include "bpf_iter_test_kern2.skel.h" #include "bpf_iter_test_kern3.skel.h" @@ -106,6 +111,20 @@ static void test_task(void) bpf_iter_task__destroy(skel); } +static void test_task_stack(void) +{ + struct bpf_iter_task_stack *skel; + + skel = bpf_iter_task_stack__open_and_load(); + if (CHECK(!skel, "bpf_iter_task_stack__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_task_stack); + + bpf_iter_task_stack__destroy(skel); +} + static void test_task_file(void) { struct bpf_iter_task_file *skel; @@ -120,6 +139,62 @@ static void test_task_file(void) bpf_iter_task_file__destroy(skel); } +static void test_tcp4(void) +{ + struct bpf_iter_tcp4 *skel; + + skel = bpf_iter_tcp4__open_and_load(); + if (CHECK(!skel, "bpf_iter_tcp4__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_tcp4); + + bpf_iter_tcp4__destroy(skel); +} + +static void test_tcp6(void) +{ + struct bpf_iter_tcp6 *skel; + + skel = bpf_iter_tcp6__open_and_load(); + if (CHECK(!skel, "bpf_iter_tcp6__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_tcp6); + + bpf_iter_tcp6__destroy(skel); +} + +static void test_udp4(void) +{ + struct bpf_iter_udp4 *skel; + + skel = bpf_iter_udp4__open_and_load(); + if (CHECK(!skel, "bpf_iter_udp4__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_udp4); + + bpf_iter_udp4__destroy(skel); +} + +static void test_udp6(void) +{ + struct bpf_iter_udp6 *skel; + + skel = bpf_iter_udp6__open_and_load(); + if (CHECK(!skel, "bpf_iter_udp6__open_and_load", + "skeleton open_and_load failed\n")) + return; + + do_dummy_read(skel->progs.dump_udp6); + + bpf_iter_udp6__destroy(skel); +} + /* The expected string is less than 16 bytes */ static int do_read_with_fd(int iter_fd, const char *expected, bool read_one_char) @@ -392,8 +467,18 @@ void test_bpf_iter(void) test_bpf_map(); if (test__start_subtest("task")) test_task(); + if (test__start_subtest("task_stack")) + test_task_stack(); if (test__start_subtest("task_file")) test_task_file(); + if (test__start_subtest("tcp4")) + test_tcp4(); + if (test__start_subtest("tcp6")) + test_tcp6(); + if (test__start_subtest("udp4")) + test_udp4(); + if (test__start_subtest("udp6")) + test_udp6(); if (test__start_subtest("anon")) test_anon_iter(false); if (test__start_subtest("anon-read-one-char")) diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c index 059047af7df3..464edc1c1708 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c @@ -13,7 +13,7 @@ static void run_lookup_test(__u16 *g_serv_port, int out_sk) socklen_t addr_len = sizeof(addr); __u32 duration = 0; - serv_sk = start_server(AF_INET6, SOCK_STREAM); + serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); if (CHECK(serv_sk < 0, "start_server", "failed to start server\n")) return; @@ -24,17 +24,13 @@ static void run_lookup_test(__u16 *g_serv_port, int out_sk) *g_serv_port = addr.sin6_port; /* Client outside of test cgroup should fail to connect by timeout. */ - err = connect_fd_to_fd(out_sk, serv_sk); + err = connect_fd_to_fd(out_sk, serv_sk, 1000); if (CHECK(!err || errno != EINPROGRESS, "connect_fd_to_fd", "unexpected result err %d errno %d\n", err, errno)) goto cleanup; - err = connect_wait(out_sk); - if (CHECK(err, "connect_wait", "unexpected result %d\n", err)) - goto cleanup; - /* Client inside test cgroup should connect just fine. */ - in_sk = connect_to_fd(AF_INET6, SOCK_STREAM, serv_sk); + in_sk = connect_to_fd(serv_sk, 0); if (CHECK(in_sk < 0, "connect_to_fd", "errno %d\n", errno)) goto cleanup; @@ -85,7 +81,7 @@ void test_cgroup_skb_sk_lookup(void) * differs from that of testing cgroup. Moving selftests process to * testing cgroup won't change cgroup id of an already created socket. */ - out_sk = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); + out_sk = socket(AF_INET6, SOCK_STREAM, 0); if (CHECK_FAIL(out_sk < 0)) return; diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c index 17bbf76812ca..9229db2f5ca5 100644 --- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c +++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c @@ -114,7 +114,7 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type) goto close_bpf_object; } - fd = connect_to_fd(family, type, server_fd); + fd = connect_to_fd(server_fd, 0); if (fd < 0) { err = -1; goto close_bpf_object; @@ -137,25 +137,25 @@ void test_connect_force_port(void) if (CHECK_FAIL(cgroup_fd < 0)) return; - server_fd = start_server_with_port(AF_INET, SOCK_STREAM, 60123); + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 60123, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM)); close(server_fd); - server_fd = start_server_with_port(AF_INET6, SOCK_STREAM, 60124); + server_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 60124, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM)); close(server_fd); - server_fd = start_server_with_port(AF_INET, SOCK_DGRAM, 60123); + server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 60123, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM)); close(server_fd); - server_fd = start_server_with_port(AF_INET6, SOCK_DGRAM, 60124); + server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 60124, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM)); diff --git a/tools/testing/selftests/bpf/prog_tests/endian.c b/tools/testing/selftests/bpf/prog_tests/endian.c new file mode 100644 index 000000000000..1a11612ace6c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/endian.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <test_progs.h> +#include "test_endian.skel.h" + +static int duration; + +#define IN16 0x1234 +#define IN32 0x12345678U +#define IN64 0x123456789abcdef0ULL + +#define OUT16 0x3412 +#define OUT32 0x78563412U +#define OUT64 0xf0debc9a78563412ULL + +void test_endian(void) +{ + struct test_endian* skel; + struct test_endian__bss *bss; + int err; + + skel = test_endian__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + bss = skel->bss; + + bss->in16 = IN16; + bss->in32 = IN32; + bss->in64 = IN64; + + err = test_endian__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + usleep(1); + + CHECK(bss->out16 != OUT16, "out16", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->out16, (__u64)OUT16); + CHECK(bss->out32 != OUT32, "out32", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->out32, (__u64)OUT32); + CHECK(bss->out64 != OUT64, "out16", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->out64, (__u64)OUT64); + + CHECK(bss->const16 != OUT16, "const16", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->const16, (__u64)OUT16); + CHECK(bss->const32 != OUT32, "const32", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->const32, (__u64)OUT32); + CHECK(bss->const64 != OUT64, "const64", "got 0x%llx != exp 0x%llx\n", + (__u64)bss->const64, (__u64)OUT64); +cleanup: + test_endian__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c new file mode 100644 index 000000000000..e3d6777226a8 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <test_progs.h> +#include "test_ksyms.skel.h" +#include <sys/stat.h> + +static int duration; + +static __u64 kallsyms_find(const char *sym) +{ + char type, name[500]; + __u64 addr, res = 0; + FILE *f; + + f = fopen("/proc/kallsyms", "r"); + if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno)) + return 0; + + while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) { + if (strcmp(name, sym) == 0) { + res = addr; + goto out; + } + } + + CHECK(false, "not_found", "symbol %s not found\n", sym); +out: + fclose(f); + return res; +} + +void test_ksyms(void) +{ + __u64 link_fops_addr = kallsyms_find("bpf_link_fops"); + const char *btf_path = "/sys/kernel/btf/vmlinux"; + struct test_ksyms *skel; + struct test_ksyms__data *data; + struct stat st; + __u64 btf_size; + int err; + + if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno)) + return; + btf_size = st.st_size; + + skel = test_ksyms__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n")) + return; + + err = test_ksyms__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + data = skel->data; + CHECK(data->out__bpf_link_fops != link_fops_addr, "bpf_link_fops", + "got 0x%llx, exp 0x%llx\n", + data->out__bpf_link_fops, link_fops_addr); + CHECK(data->out__bpf_link_fops1 != 0, "bpf_link_fops1", + "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0); + CHECK(data->out__btf_size != btf_size, "btf_size", + "got %llu, exp %llu\n", data->out__btf_size, btf_size); + CHECK(data->out__per_cpu_start != 0, "__per_cpu_start", + "got %llu, exp %llu\n", data->out__per_cpu_start, (__u64)0); + +cleanup: + test_ksyms__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c index c1168e4a9036..5a2a689dbb68 100644 --- a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c +++ b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c @@ -23,7 +23,7 @@ void test_load_bytes_relative(void) if (CHECK_FAIL(cgroup_fd < 0)) return; - server_fd = start_server(AF_INET, SOCK_STREAM); + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; @@ -49,7 +49,7 @@ void test_load_bytes_relative(void) if (CHECK_FAIL(err)) goto close_bpf_object; - client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd); + client_fd = connect_to_fd(server_fd, 0); if (CHECK_FAIL(client_fd < 0)) goto close_bpf_object; close(client_fd); diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c new file mode 100644 index 000000000000..c230a573c373 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <test_progs.h> +#include <network_helpers.h> + +#include "map_ptr_kern.skel.h" + +void test_map_ptr(void) +{ + struct map_ptr_kern *skel; + __u32 duration = 0, retval; + char buf[128]; + int err; + + skel = map_ptr_kern__open_and_load(); + if (CHECK(!skel, "skel_open_load", "open_load failed\n")) + return; + + err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4, + sizeof(pkt_v4), buf, NULL, &retval, NULL); + + if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno)) + goto cleanup; + + if (CHECK(!retval, "retval", "retval=%d map_type=%u line=%u\n", retval, + skel->bss->g_map_type, skel->bss->g_line)) + goto cleanup; + +cleanup: + map_ptr_kern__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c index 9013a0c01eed..d207e968e6b1 100644 --- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c +++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c @@ -118,7 +118,7 @@ static int run_test(int cgroup_fd, int server_fd) goto close_bpf_object; } - client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd); + client_fd = connect_to_fd(server_fd, 0); if (client_fd < 0) { err = -1; goto close_bpf_object; @@ -161,7 +161,7 @@ void test_tcp_rtt(void) if (CHECK_FAIL(cgroup_fd < 0)) return; - server_fd = start_server(AF_INET, SOCK_STREAM); + server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0); if (CHECK_FAIL(server_fd < 0)) goto close_cgroup_fd; diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c new file mode 100644 index 000000000000..c75525eab02c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/varlen.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <test_progs.h> +#include <time.h> +#include "test_varlen.skel.h" + +#define CHECK_VAL(got, exp) \ + CHECK((got) != (exp), "check", "got %ld != exp %ld\n", \ + (long)(got), (long)(exp)) + +void test_varlen(void) +{ + int duration = 0, err; + struct test_varlen* skel; + struct test_varlen__bss *bss; + struct test_varlen__data *data; + const char str1[] = "Hello, "; + const char str2[] = "World!"; + const char exp_str[] = "Hello, \0World!\0"; + const int size1 = sizeof(str1); + const int size2 = sizeof(str2); + + skel = test_varlen__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + bss = skel->bss; + data = skel->data; + + err = test_varlen__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + bss->test_pid = getpid(); + + /* trigger everything */ + memcpy(bss->buf_in1, str1, size1); + memcpy(bss->buf_in2, str2, size2); + bss->capture = true; + usleep(1); + bss->capture = false; + + CHECK_VAL(bss->payload1_len1, size1); + CHECK_VAL(bss->payload1_len2, size2); + CHECK_VAL(bss->total1, size1 + size2); + CHECK(memcmp(bss->payload1, exp_str, size1 + size2), "content_check", + "doesn't match!"); + + CHECK_VAL(data->payload2_len1, size1); + CHECK_VAL(data->payload2_len2, size2); + CHECK_VAL(data->total2, size1 + size2); + CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check", + "doesn't match!"); + + CHECK_VAL(data->payload3_len1, size1); + CHECK_VAL(data->payload3_len2, size2); + CHECK_VAL(data->total3, size1 + size2); + CHECK(memcmp(data->payload3, exp_str, size1 + size2), "content_check", + "doesn't match!"); + + CHECK_VAL(data->payload4_len1, size1); + CHECK_VAL(data->payload4_len2, size2); + CHECK_VAL(data->total4, size1 + size2); + CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check", + "doesn't match!"); +cleanup: + test_varlen__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h new file mode 100644 index 000000000000..17db3bac518b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Facebook */ +/* "undefine" structs in vmlinux.h, because we "override" them below */ +#define bpf_iter_meta bpf_iter_meta___not_used +#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used +#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used +#define bpf_iter__netlink bpf_iter__netlink___not_used +#define bpf_iter__task bpf_iter__task___not_used +#define bpf_iter__task_file bpf_iter__task_file___not_used +#define bpf_iter__tcp bpf_iter__tcp___not_used +#define tcp6_sock tcp6_sock___not_used +#define bpf_iter__udp bpf_iter__udp___not_used +#define udp6_sock udp6_sock___not_used +#include "vmlinux.h" +#undef bpf_iter_meta +#undef bpf_iter__bpf_map +#undef bpf_iter__ipv6_route +#undef bpf_iter__netlink +#undef bpf_iter__task +#undef bpf_iter__task_file +#undef bpf_iter__tcp +#undef tcp6_sock +#undef bpf_iter__udp +#undef udp6_sock + +struct bpf_iter_meta { + struct seq_file *seq; + __u64 session_id; + __u64 seq_num; +} __attribute__((preserve_access_index)); + +struct bpf_iter__ipv6_route { + struct bpf_iter_meta *meta; + struct fib6_info *rt; +} __attribute__((preserve_access_index)); + +struct bpf_iter__netlink { + struct bpf_iter_meta *meta; + struct netlink_sock *sk; +} __attribute__((preserve_access_index)); + +struct bpf_iter__task { + struct bpf_iter_meta *meta; + struct task_struct *task; +} __attribute__((preserve_access_index)); + +struct bpf_iter__task_file { + struct bpf_iter_meta *meta; + struct task_struct *task; + __u32 fd; + struct file *file; +} __attribute__((preserve_access_index)); + +struct bpf_iter__bpf_map { + struct bpf_iter_meta *meta; + struct bpf_map *map; +} __attribute__((preserve_access_index)); + +struct bpf_iter__tcp { + struct bpf_iter_meta *meta; + struct sock_common *sk_common; + uid_t uid; +} __attribute__((preserve_access_index)); + +struct tcp6_sock { + struct tcp_sock tcp; + struct ipv6_pinfo inet6; +} __attribute__((preserve_access_index)); + +struct bpf_iter__udp { + struct bpf_iter_meta *meta; + struct udp_sock *udp_sk; + uid_t uid __attribute__((aligned(8))); + int bucket __attribute__((aligned(8))); +} __attribute__((preserve_access_index)); + +struct udp6_sock { + struct udp_sock udp; + struct ipv6_pinfo inet6; +} __attribute__((preserve_access_index)); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c index b57bd6fef208..08651b23edba 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c @@ -1,27 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__bpf_map +#include "bpf_iter.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__bpf_map { - struct bpf_iter_meta *meta; - struct bpf_map *map; -} __attribute__((preserve_access_index)); - SEC("iter/bpf_map") int dump_bpf_map(struct bpf_iter__bpf_map *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c index c8e9ca74c87b..d58d9f1642b5 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c @@ -1,35 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__ipv6_route +#include "bpf_iter.h" +#include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__ipv6_route { - struct bpf_iter_meta *meta; - struct fib6_info *rt; -} __attribute__((preserve_access_index)); - char _license[] SEC("license") = "GPL"; extern bool CONFIG_IPV6_SUBTREES __kconfig __weak; -#define RTF_GATEWAY 0x0002 -#define IFNAMSIZ 16 -#define fib_nh_gw_family nh_common.nhc_gw_family -#define fib_nh_gw6 nh_common.nhc_gw.ipv6 -#define fib_nh_dev nh_common.nhc_dev - SEC("iter/ipv6_route") int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c index 75ecf956a2df..b9c2756c5c97 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c @@ -1,11 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__netlink bpf_iter__netlink___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__netlink +#include "bpf_iter.h" +#include "bpf_tracing_net.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c index ee754021f98e..4983087852a0 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c @@ -1,27 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__task bpf_iter__task___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__task +#include "bpf_iter.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task { - struct bpf_iter_meta *meta; - struct task_struct *task; -} __attribute__((preserve_access_index)); - SEC("iter/task") int dump_task(struct bpf_iter__task *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c index 0f0ec3db20ba..8b787baa2654 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c @@ -1,29 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__task_file bpf_iter__task_file___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__task_file +#include "bpf_iter.h" #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> char _license[] SEC("license") = "GPL"; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task_file { - struct bpf_iter_meta *meta; - struct task_struct *task; - __u32 fd; - struct file *file; -} __attribute__((preserve_access_index)); - SEC("iter/task_file") int dump_task_file(struct bpf_iter__task_file *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c new file mode 100644 index 000000000000..50e59a2e142e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +#define MAX_STACK_TRACE_DEPTH 64 +unsigned long entries[MAX_STACK_TRACE_DEPTH] = {}; +#define SIZE_OF_ULONG (sizeof(unsigned long)) + +SEC("iter/task") +int dump_task_stack(struct bpf_iter__task *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + long i, retlen; + + if (task == (void *)0) + return 0; + + retlen = bpf_get_task_stack(task, entries, + MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, 0); + if (retlen < 0) + return 0; + + BPF_SEQ_PRINTF(seq, "pid: %8u num_entries: %8u\n", task->pid, + retlen / SIZE_OF_ULONG); + for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) { + if (retlen > i * SIZE_OF_ULONG) + BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]); + } + BPF_SEQ_PRINTF(seq, "\n"); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c new file mode 100644 index 000000000000..30fd587cb325 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_endian.h> + +char _license[] SEC("license") = "GPL"; + +static int hlist_unhashed_lockless(const struct hlist_node *h) +{ + return !(h->pprev); +} + +static int timer_pending(const struct timer_list * timer) +{ + return !hlist_unhashed_lockless(&timer->entry); +} + +extern unsigned CONFIG_HZ __kconfig; + +#define USER_HZ 100 +#define NSEC_PER_SEC 1000000000ULL +static clock_t jiffies_to_clock_t(unsigned long x) +{ + /* The implementation here tailored to a particular + * setting of USER_HZ. + */ + u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ; + u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ; + + if ((tick_nsec % user_hz_nsec) == 0) { + if (CONFIG_HZ < USER_HZ) + return x * (USER_HZ / CONFIG_HZ); + else + return x / (CONFIG_HZ / USER_HZ); + } + return x * tick_nsec/user_hz_nsec; +} + +static clock_t jiffies_delta_to_clock_t(long delta) +{ + if (delta <= 0) + return 0; + + return jiffies_to_clock_t(delta); +} + +static long sock_i_ino(const struct sock *sk) +{ + const struct socket *sk_socket = sk->sk_socket; + const struct inode *inode; + unsigned long ino; + + if (!sk_socket) + return 0; + + inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; + bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + return ino; +} + +static bool +inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk) +{ + return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; +} + +static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp) +{ + return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; +} + +static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp, + uid_t uid, __u32 seq_num) +{ + const struct inet_connection_sock *icsk; + const struct fastopen_queue *fastopenq; + const struct inet_sock *inet; + unsigned long timer_expires; + const struct sock *sp; + __u16 destp, srcp; + __be32 dest, src; + int timer_active; + int rx_queue; + int state; + + icsk = &tp->inet_conn; + inet = &icsk->icsk_inet; + sp = &inet->sk; + fastopenq = &icsk->icsk_accept_queue.fastopenq; + + dest = inet->inet_daddr; + src = inet->inet_rcv_saddr; + destp = bpf_ntohs(inet->inet_dport); + srcp = bpf_ntohs(inet->inet_sport); + + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + timer_active = 1; + timer_expires = icsk->icsk_timeout; + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { + timer_active = 4; + timer_expires = icsk->icsk_timeout; + } else if (timer_pending(&sp->sk_timer)) { + timer_active = 2; + timer_expires = sp->sk_timer.expires; + } else { + timer_active = 0; + timer_expires = bpf_jiffies64(); + } + + state = sp->sk_state; + if (state == TCP_LISTEN) { + rx_queue = sp->sk_ack_backlog; + } else { + rx_queue = tp->rcv_nxt - tp->copied_seq; + if (rx_queue < 0) + rx_queue = 0; + } + + BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ", + seq_num, src, srcp, destp, destp); + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ", + state, + tp->write_seq - tp->snd_una, rx_queue, + timer_active, + jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()), + icsk->icsk_retransmits, uid, + icsk->icsk_probes_out, + sock_i_ino(sp), + sp->sk_refcnt.refs.counter); + BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n", + tp, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), + (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk), + tp->snd_cwnd, + state == TCP_LISTEN ? fastopenq->max_qlen + : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) + ); + + return 0; +} + +static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw, + uid_t uid, __u32 seq_num) +{ + struct inet_timewait_sock *tw = &ttw->tw_sk; + __u16 destp, srcp; + __be32 dest, src; + long delta; + + delta = tw->tw_timer.expires - bpf_jiffies64(); + dest = tw->tw_daddr; + src = tw->tw_rcv_saddr; + destp = bpf_ntohs(tw->tw_dport); + srcp = bpf_ntohs(tw->tw_sport); + + BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ", + seq_num, src, srcp, dest, destp); + + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", + tw->tw_substate, 0, 0, + 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, + tw->tw_refcnt.refs.counter, tw); + + return 0; +} + +static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq, + uid_t uid, __u32 seq_num) +{ + struct inet_request_sock *irsk = &treq->req; + struct request_sock *req = &irsk->req; + long ttd; + + ttd = req->rsk_timer.expires - bpf_jiffies64(); + + if (ttd < 0) + ttd = 0; + + BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ", + seq_num, irsk->ir_loc_addr, + irsk->ir_num, irsk->ir_rmt_addr, + bpf_ntohs(irsk->ir_rmt_port)); + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", + TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd), + req->num_timeout, uid, 0, 0, 0, req); + + return 0; +} + +SEC("iter/tcp") +int dump_tcp4(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = ctx->sk_common; + struct seq_file *seq = ctx->meta->seq; + struct tcp_timewait_sock *tw; + struct tcp_request_sock *req; + struct tcp_sock *tp; + uid_t uid = ctx->uid; + __u32 seq_num; + + if (sk_common == (void *)0) + return 0; + + seq_num = ctx->meta->seq_num; + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, " sl " + "local_address " + "rem_address " + "st tx_queue rx_queue tr tm->when retrnsmt" + " uid timeout inode\n"); + + if (sk_common->skc_family != AF_INET) + return 0; + + tp = bpf_skc_to_tcp_sock(sk_common); + if (tp) + return dump_tcp_sock(seq, tp, uid, seq_num); + + tw = bpf_skc_to_tcp_timewait_sock(sk_common); + if (tw) + return dump_tw_sock(seq, tw, uid, seq_num); + + req = bpf_skc_to_tcp_request_sock(sk_common); + if (req) + return dump_req_sock(seq, req, uid, seq_num); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c new file mode 100644 index 000000000000..10dec4392031 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c @@ -0,0 +1,250 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_endian.h> + +char _license[] SEC("license") = "GPL"; + +static int hlist_unhashed_lockless(const struct hlist_node *h) +{ + return !(h->pprev); +} + +static int timer_pending(const struct timer_list * timer) +{ + return !hlist_unhashed_lockless(&timer->entry); +} + +extern unsigned CONFIG_HZ __kconfig; + +#define USER_HZ 100 +#define NSEC_PER_SEC 1000000000ULL +static clock_t jiffies_to_clock_t(unsigned long x) +{ + /* The implementation here tailored to a particular + * setting of USER_HZ. + */ + u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ; + u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ; + + if ((tick_nsec % user_hz_nsec) == 0) { + if (CONFIG_HZ < USER_HZ) + return x * (USER_HZ / CONFIG_HZ); + else + return x / (CONFIG_HZ / USER_HZ); + } + return x * tick_nsec/user_hz_nsec; +} + +static clock_t jiffies_delta_to_clock_t(long delta) +{ + if (delta <= 0) + return 0; + + return jiffies_to_clock_t(delta); +} + +static long sock_i_ino(const struct sock *sk) +{ + const struct socket *sk_socket = sk->sk_socket; + const struct inode *inode; + unsigned long ino; + + if (!sk_socket) + return 0; + + inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; + bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + return ino; +} + +static bool +inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk) +{ + return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH; +} + +static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp) +{ + return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH; +} + +static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp, + uid_t uid, __u32 seq_num) +{ + const struct inet_connection_sock *icsk; + const struct fastopen_queue *fastopenq; + const struct in6_addr *dest, *src; + const struct inet_sock *inet; + unsigned long timer_expires; + const struct sock *sp; + __u16 destp, srcp; + int timer_active; + int rx_queue; + int state; + + icsk = &tp->tcp.inet_conn; + inet = &icsk->icsk_inet; + sp = &inet->sk; + fastopenq = &icsk->icsk_accept_queue.fastopenq; + + dest = &sp->sk_v6_daddr; + src = &sp->sk_v6_rcv_saddr; + destp = bpf_ntohs(inet->inet_dport); + srcp = bpf_ntohs(inet->inet_sport); + + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { + timer_active = 1; + timer_expires = icsk->icsk_timeout; + } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { + timer_active = 4; + timer_expires = icsk->icsk_timeout; + } else if (timer_pending(&sp->sk_timer)) { + timer_active = 2; + timer_expires = sp->sk_timer.expires; + } else { + timer_active = 0; + timer_expires = bpf_jiffies64(); + } + + state = sp->sk_state; + if (state == TCP_LISTEN) { + rx_queue = sp->sk_ack_backlog; + } else { + rx_queue = tp->tcp.rcv_nxt - tp->tcp.copied_seq; + if (rx_queue < 0) + rx_queue = 0; + } + + BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ", + seq_num, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp); + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ", + state, + tp->tcp.write_seq - tp->tcp.snd_una, rx_queue, + timer_active, + jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()), + icsk->icsk_retransmits, uid, + icsk->icsk_probes_out, + sock_i_ino(sp), + sp->sk_refcnt.refs.counter); + BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n", + tp, + jiffies_to_clock_t(icsk->icsk_rto), + jiffies_to_clock_t(icsk->icsk_ack.ato), + (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk), + tp->tcp.snd_cwnd, + state == TCP_LISTEN ? fastopenq->max_qlen + : (tcp_in_initial_slowstart(&tp->tcp) ? -1 + : tp->tcp.snd_ssthresh) + ); + + return 0; +} + +static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw, + uid_t uid, __u32 seq_num) +{ + struct inet_timewait_sock *tw = &ttw->tw_sk; + const struct in6_addr *dest, *src; + __u16 destp, srcp; + long delta; + + delta = tw->tw_timer.expires - bpf_jiffies64(); + dest = &tw->tw_v6_daddr; + src = &tw->tw_v6_rcv_saddr; + destp = bpf_ntohs(tw->tw_dport); + srcp = bpf_ntohs(tw->tw_sport); + + BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ", + seq_num, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp); + + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", + tw->tw_substate, 0, 0, + 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, + tw->tw_refcnt.refs.counter, tw); + + return 0; +} + +static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq, + uid_t uid, __u32 seq_num) +{ + struct inet_request_sock *irsk = &treq->req; + struct request_sock *req = &irsk->req; + struct in6_addr *src, *dest; + long ttd; + + ttd = req->rsk_timer.expires - bpf_jiffies64(); + src = &irsk->ir_v6_loc_addr; + dest = &irsk->ir_v6_rmt_addr; + + if (ttd < 0) + ttd = 0; + + BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ", + seq_num, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], + irsk->ir_num, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], + bpf_ntohs(irsk->ir_rmt_port)); + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n", + TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd), + req->num_timeout, uid, 0, 0, 0, req); + + return 0; +} + +SEC("iter/tcp") +int dump_tcp6(struct bpf_iter__tcp *ctx) +{ + struct sock_common *sk_common = ctx->sk_common; + struct seq_file *seq = ctx->meta->seq; + struct tcp_timewait_sock *tw; + struct tcp_request_sock *req; + struct tcp6_sock *tp; + uid_t uid = ctx->uid; + __u32 seq_num; + + if (sk_common == (void *)0) + return 0; + + seq_num = ctx->meta->seq_num; + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, " sl " + "local_address " + "remote_address " + "st tx_queue rx_queue tr tm->when retrnsmt" + " uid timeout inode\n"); + + if (sk_common->skc_family != AF_INET6) + return 0; + + tp = bpf_skc_to_tcp6_sock(sk_common); + if (tp) + return dump_tcp6_sock(seq, tp, uid, seq_num); + + tw = bpf_skc_to_tcp_timewait_sock(sk_common); + if (tw) + return dump_tw_sock(seq, tw, uid, seq_num); + + req = bpf_skc_to_tcp_request_sock(sk_common); + if (req) + return dump_req_sock(seq, req, uid, seq_num); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c index 13c2c90c835f..2a4647f20c46 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c @@ -1,25 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__task bpf_iter__task___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__task +#include "bpf_iter.h" #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task { - struct bpf_iter_meta *meta; - struct task_struct *task; -} __attribute__((preserve_access_index)); - SEC("iter/task") int dump_task(struct bpf_iter__task *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c index 0aa71b333cf3..ee49493dc125 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c @@ -1,25 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__bpf_map +#include "bpf_iter.h" #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__bpf_map { - struct bpf_iter_meta *meta; - struct bpf_map *map; -} __attribute__((preserve_access_index)); - __u32 map1_id = 0, map2_id = 0; __u32 map1_accessed = 0, map2_accessed = 0; __u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h index dee1339e6905..d5e3df66ad9a 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h @@ -1,27 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* Copyright (c) 2020 Facebook */ -/* "undefine" structs in vmlinux.h, because we "override" them below */ -#define bpf_iter_meta bpf_iter_meta___not_used -#define bpf_iter__task bpf_iter__task___not_used -#include "vmlinux.h" -#undef bpf_iter_meta -#undef bpf_iter__task +#include "bpf_iter.h" #include <bpf/bpf_helpers.h> char _license[] SEC("license") = "GPL"; int count = 0; -struct bpf_iter_meta { - struct seq_file *seq; - __u64 session_id; - __u64 seq_num; -} __attribute__((preserve_access_index)); - -struct bpf_iter__task { - struct bpf_iter_meta *meta; - struct task_struct *task; -} __attribute__((preserve_access_index)); - SEC("iter/task") int dump_task(struct bpf_iter__task *ctx) { diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c new file mode 100644 index 000000000000..7053784575e4 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_endian.h> + +char _license[] SEC("license") = "GPL"; + +static long sock_i_ino(const struct sock *sk) +{ + const struct socket *sk_socket = sk->sk_socket; + const struct inode *inode; + unsigned long ino; + + if (!sk_socket) + return 0; + + inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; + bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + return ino; +} + +SEC("iter/udp") +int dump_udp4(struct bpf_iter__udp *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct udp_sock *udp_sk = ctx->udp_sk; + struct inet_sock *inet; + __u16 srcp, destp; + __be32 dest, src; + __u32 seq_num; + int rqueue; + + if (udp_sk == (void *)0) + return 0; + + seq_num = ctx->meta->seq_num; + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, + " sl local_address rem_address st tx_queue " + "rx_queue tr tm->when retrnsmt uid timeout " + "inode ref pointer drops\n"); + + /* filter out udp6 sockets */ + inet = &udp_sk->inet; + if (inet->sk.sk_family == AF_INET6) + return 0; + + inet = &udp_sk->inet; + dest = inet->inet_daddr; + src = inet->inet_rcv_saddr; + srcp = bpf_ntohs(inet->inet_sport); + destp = bpf_ntohs(inet->inet_dport); + rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit; + + BPF_SEQ_PRINTF(seq, "%5d: %08X:%04X %08X:%04X ", + ctx->bucket, src, srcp, dest, destp); + + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n", + inet->sk.sk_state, + inet->sk.sk_wmem_alloc.refs.counter - 1, + rqueue, + 0, 0L, 0, ctx->uid, 0, + sock_i_ino(&inet->sk), + inet->sk.sk_refcnt.refs.counter, udp_sk, + inet->sk.sk_drops.counter); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c new file mode 100644 index 000000000000..c1175a6ecf43 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c @@ -0,0 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_endian.h> + +char _license[] SEC("license") = "GPL"; + +#define IPV6_SEQ_DGRAM_HEADER \ + " sl " \ + "local_address " \ + "remote_address " \ + "st tx_queue rx_queue tr tm->when retrnsmt" \ + " uid timeout inode ref pointer drops\n" + +static long sock_i_ino(const struct sock *sk) +{ + const struct socket *sk_socket = sk->sk_socket; + const struct inode *inode; + unsigned long ino; + + if (!sk_socket) + return 0; + + inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode; + bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); + return ino; +} + +SEC("iter/udp") +int dump_udp6(struct bpf_iter__udp *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct udp_sock *udp_sk = ctx->udp_sk; + const struct in6_addr *dest, *src; + struct udp6_sock *udp6_sk; + struct inet_sock *inet; + __u16 srcp, destp; + __u32 seq_num; + int rqueue; + + if (udp_sk == (void *)0) + return 0; + + seq_num = ctx->meta->seq_num; + if (seq_num == 0) + BPF_SEQ_PRINTF(seq, IPV6_SEQ_DGRAM_HEADER); + + udp6_sk = bpf_skc_to_udp6_sock(udp_sk); + if (udp6_sk == (void *)0) + return 0; + + inet = &udp_sk->inet; + srcp = bpf_ntohs(inet->inet_sport); + destp = bpf_ntohs(inet->inet_dport); + rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit; + dest = &inet->sk.sk_v6_daddr; + src = &inet->sk.sk_v6_rcv_saddr; + + BPF_SEQ_PRINTF(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ", + ctx->bucket, + src->s6_addr32[0], src->s6_addr32[1], + src->s6_addr32[2], src->s6_addr32[3], srcp, + dest->s6_addr32[0], dest->s6_addr32[1], + dest->s6_addr32[2], dest->s6_addr32[3], destp); + + BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n", + inet->sk.sk_state, + inet->sk.sk_wmem_alloc.refs.counter - 1, + rqueue, + 0, 0L, 0, ctx->uid, 0, + sock_i_ino(&inet->sk), + inet->sk.sk_refcnt.refs.counter, udp_sk, + inet->sk.sk_drops.counter); + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h new file mode 100644 index 000000000000..01378911252b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_TRACING_NET_H__ +#define __BPF_TRACING_NET_H__ + +#define AF_INET 2 +#define AF_INET6 10 + +#define ICSK_TIME_RETRANS 1 +#define ICSK_TIME_PROBE0 3 +#define ICSK_TIME_LOSS_PROBE 5 +#define ICSK_TIME_REO_TIMEOUT 6 + +#define IFNAMSIZ 16 + +#define RTF_GATEWAY 0x0002 + +#define TCP_INFINITE_SSTHRESH 0x7fffffff +#define TCP_PINGPONG_THRESH 3 + +#define fib_nh_dev nh_common.nhc_dev +#define fib_nh_gw_family nh_common.nhc_gw_family +#define fib_nh_gw6 nh_common.nhc_gw.ipv6 + +#define inet_daddr sk.__sk_common.skc_daddr +#define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr +#define inet_dport sk.__sk_common.skc_dport + +#define ir_loc_addr req.__req_common.skc_rcv_saddr +#define ir_num req.__req_common.skc_num +#define ir_rmt_addr req.__req_common.skc_daddr +#define ir_rmt_port req.__req_common.skc_dport +#define ir_v6_rmt_addr req.__req_common.skc_v6_daddr +#define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr + +#define sk_family __sk_common.skc_family +#define sk_rmem_alloc sk_backlog.rmem_alloc +#define sk_refcnt __sk_common.skc_refcnt +#define sk_state __sk_common.skc_state +#define sk_v6_daddr __sk_common.skc_v6_daddr +#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr + +#define s6_addr32 in6_u.u6_addr32 + +#define tw_daddr __tw_common.skc_daddr +#define tw_rcv_saddr __tw_common.skc_rcv_saddr +#define tw_dport __tw_common.skc_dport +#define tw_refcnt __tw_common.skc_refcnt +#define tw_v6_daddr __tw_common.skc_v6_daddr +#define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr + +#endif diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index 1ab2c5eba86c..b1b2773c0b9d 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -104,6 +104,30 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx) return 0; } +static __inline int set_keepalive(struct bpf_sock_addr *ctx) +{ + int zero = 0, one = 1; + + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one))) + return 1; + if (ctx->type == SOCK_STREAM) { + if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one))) + return 1; + if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one))) + return 1; + if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one))) + return 1; + if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one))) + return 1; + if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one))) + return 1; + } + if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero))) + return 1; + + return 0; +} + SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { @@ -121,6 +145,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx) if (bind_to_device(ctx)) return 0; + if (set_keepalive(ctx)) + return 0; + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) return 0; else if (ctx->type == SOCK_STREAM) diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c new file mode 100644 index 000000000000..473665cac67e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -0,0 +1,686 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +#define LOOP_BOUND 0xf +#define MAX_ENTRIES 8 +#define HALF_ENTRIES (MAX_ENTRIES >> 1) + +_Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND"); + +enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC; +__u32 g_line = 0; + +#define VERIFY_TYPE(type, func) ({ \ + g_map_type = type; \ + if (!func()) \ + return 0; \ +}) + + +#define VERIFY(expr) ({ \ + g_line = __LINE__; \ + if (!(expr)) \ + return 0; \ +}) + +struct bpf_map_memory { + __u32 pages; +} __attribute__((preserve_access_index)); + +struct bpf_map { + enum bpf_map_type map_type; + __u32 key_size; + __u32 value_size; + __u32 max_entries; + __u32 id; + struct bpf_map_memory memory; +} __attribute__((preserve_access_index)); + +static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size, + __u32 value_size, __u32 max_entries) +{ + VERIFY(map->map_type == g_map_type); + VERIFY(map->key_size == key_size); + VERIFY(map->value_size == value_size); + VERIFY(map->max_entries == max_entries); + VERIFY(map->id > 0); + VERIFY(map->memory.pages > 0); + + return 1; +} + +static inline int check_bpf_map_ptr(struct bpf_map *indirect, + struct bpf_map *direct) +{ + VERIFY(indirect->map_type == direct->map_type); + VERIFY(indirect->key_size == direct->key_size); + VERIFY(indirect->value_size == direct->value_size); + VERIFY(indirect->max_entries == direct->max_entries); + VERIFY(indirect->id == direct->id); + VERIFY(indirect->memory.pages == direct->memory.pages); + + return 1; +} + +static inline int check(struct bpf_map *indirect, struct bpf_map *direct, + __u32 key_size, __u32 value_size, __u32 max_entries) +{ + VERIFY(check_bpf_map_ptr(indirect, direct)); + VERIFY(check_bpf_map_fields(indirect, key_size, value_size, + max_entries)); + return 1; +} + +static inline int check_default(struct bpf_map *indirect, + struct bpf_map *direct) +{ + VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32), + MAX_ENTRIES)); + return 1; +} + +typedef struct { + int counter; +} atomic_t; + +struct bpf_htab { + struct bpf_map map; + atomic_t count; + __u32 n_buckets; + __u32 elem_size; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(map_flags, BPF_F_NO_PREALLOC); /* to test bpf_htab.count */ + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_hash SEC(".maps"); + +static inline int check_hash(void) +{ + struct bpf_htab *hash = (struct bpf_htab *)&m_hash; + struct bpf_map *map = (struct bpf_map *)&m_hash; + int i; + + VERIFY(check_default(&hash->map, map)); + + VERIFY(hash->n_buckets == MAX_ENTRIES); + VERIFY(hash->elem_size == 64); + + VERIFY(hash->count.counter == 0); + for (i = 0; i < HALF_ENTRIES; ++i) { + const __u32 key = i; + const __u32 val = 1; + + if (bpf_map_update_elem(hash, &key, &val, 0)) + return 0; + } + VERIFY(hash->count.counter == HALF_ENTRIES); + + return 1; +} + +struct bpf_array { + struct bpf_map map; + __u32 elem_size; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_array SEC(".maps"); + +static inline int check_array(void) +{ + struct bpf_array *array = (struct bpf_array *)&m_array; + struct bpf_map *map = (struct bpf_map *)&m_array; + int i, n_lookups = 0, n_keys = 0; + + VERIFY(check_default(&array->map, map)); + + VERIFY(array->elem_size == 8); + + for (i = 0; i < array->map.max_entries && i < LOOP_BOUND; ++i) { + const __u32 key = i; + __u32 *val = bpf_map_lookup_elem(array, &key); + + ++n_lookups; + if (val) + ++n_keys; + } + + VERIFY(n_lookups == MAX_ENTRIES); + VERIFY(n_keys == MAX_ENTRIES); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_prog_array SEC(".maps"); + +static inline int check_prog_array(void) +{ + struct bpf_array *prog_array = (struct bpf_array *)&m_prog_array; + struct bpf_map *map = (struct bpf_map *)&m_prog_array; + + VERIFY(check_default(&prog_array->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_perf_event_array SEC(".maps"); + +static inline int check_perf_event_array(void) +{ + struct bpf_array *perf_event_array = (struct bpf_array *)&m_perf_event_array; + struct bpf_map *map = (struct bpf_map *)&m_perf_event_array; + + VERIFY(check_default(&perf_event_array->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_percpu_hash SEC(".maps"); + +static inline int check_percpu_hash(void) +{ + struct bpf_htab *percpu_hash = (struct bpf_htab *)&m_percpu_hash; + struct bpf_map *map = (struct bpf_map *)&m_percpu_hash; + + VERIFY(check_default(&percpu_hash->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_percpu_array SEC(".maps"); + +static inline int check_percpu_array(void) +{ + struct bpf_array *percpu_array = (struct bpf_array *)&m_percpu_array; + struct bpf_map *map = (struct bpf_map *)&m_percpu_array; + + VERIFY(check_default(&percpu_array->map, map)); + + return 1; +} + +struct bpf_stack_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u64); +} m_stack_trace SEC(".maps"); + +static inline int check_stack_trace(void) +{ + struct bpf_stack_map *stack_trace = + (struct bpf_stack_map *)&m_stack_trace; + struct bpf_map *map = (struct bpf_map *)&m_stack_trace; + + VERIFY(check(&stack_trace->map, map, sizeof(__u32), sizeof(__u64), + MAX_ENTRIES)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_cgroup_array SEC(".maps"); + +static inline int check_cgroup_array(void) +{ + struct bpf_array *cgroup_array = (struct bpf_array *)&m_cgroup_array; + struct bpf_map *map = (struct bpf_map *)&m_cgroup_array; + + VERIFY(check_default(&cgroup_array->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_lru_hash SEC(".maps"); + +static inline int check_lru_hash(void) +{ + struct bpf_htab *lru_hash = (struct bpf_htab *)&m_lru_hash; + struct bpf_map *map = (struct bpf_map *)&m_lru_hash; + + VERIFY(check_default(&lru_hash->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_lru_percpu_hash SEC(".maps"); + +static inline int check_lru_percpu_hash(void) +{ + struct bpf_htab *lru_percpu_hash = (struct bpf_htab *)&m_lru_percpu_hash; + struct bpf_map *map = (struct bpf_map *)&m_lru_percpu_hash; + + VERIFY(check_default(&lru_percpu_hash->map, map)); + + return 1; +} + +struct lpm_trie { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct lpm_key { + struct bpf_lpm_trie_key trie_key; + __u32 data; +}; + +struct { + __uint(type, BPF_MAP_TYPE_LPM_TRIE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __uint(max_entries, MAX_ENTRIES); + __type(key, struct lpm_key); + __type(value, __u32); +} m_lpm_trie SEC(".maps"); + +static inline int check_lpm_trie(void) +{ + struct lpm_trie *lpm_trie = (struct lpm_trie *)&m_lpm_trie; + struct bpf_map *map = (struct bpf_map *)&m_lpm_trie; + + VERIFY(check(&lpm_trie->map, map, sizeof(struct lpm_key), sizeof(__u32), + MAX_ENTRIES)); + + return 1; +} + +struct inner_map { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); +} inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); + }); +} m_array_of_maps SEC(".maps") = { + .values = { (void *)&inner_map, 0, 0, 0, 0, 0, 0, 0, 0 }, +}; + +static inline int check_array_of_maps(void) +{ + struct bpf_array *array_of_maps = (struct bpf_array *)&m_array_of_maps; + struct bpf_map *map = (struct bpf_map *)&m_array_of_maps; + + VERIFY(check_default(&array_of_maps->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); + __array(values, struct inner_map); +} m_hash_of_maps SEC(".maps") = { + .values = { + [2] = &inner_map, + }, +}; + +static inline int check_hash_of_maps(void) +{ + struct bpf_htab *hash_of_maps = (struct bpf_htab *)&m_hash_of_maps; + struct bpf_map *map = (struct bpf_map *)&m_hash_of_maps; + + VERIFY(check_default(&hash_of_maps->map, map)); + + return 1; +} + +struct bpf_dtab { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_devmap SEC(".maps"); + +static inline int check_devmap(void) +{ + struct bpf_dtab *devmap = (struct bpf_dtab *)&m_devmap; + struct bpf_map *map = (struct bpf_map *)&m_devmap; + + VERIFY(check_default(&devmap->map, map)); + + return 1; +} + +struct bpf_stab { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_sockmap SEC(".maps"); + +static inline int check_sockmap(void) +{ + struct bpf_stab *sockmap = (struct bpf_stab *)&m_sockmap; + struct bpf_map *map = (struct bpf_map *)&m_sockmap; + + VERIFY(check_default(&sockmap->map, map)); + + return 1; +} + +struct bpf_cpu_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_CPUMAP); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_cpumap SEC(".maps"); + +static inline int check_cpumap(void) +{ + struct bpf_cpu_map *cpumap = (struct bpf_cpu_map *)&m_cpumap; + struct bpf_map *map = (struct bpf_map *)&m_cpumap; + + VERIFY(check_default(&cpumap->map, map)); + + return 1; +} + +struct xsk_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_XSKMAP); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_xskmap SEC(".maps"); + +static inline int check_xskmap(void) +{ + struct xsk_map *xskmap = (struct xsk_map *)&m_xskmap; + struct bpf_map *map = (struct bpf_map *)&m_xskmap; + + VERIFY(check_default(&xskmap->map, map)); + + return 1; +} + +struct bpf_shtab { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_sockhash SEC(".maps"); + +static inline int check_sockhash(void) +{ + struct bpf_shtab *sockhash = (struct bpf_shtab *)&m_sockhash; + struct bpf_map *map = (struct bpf_map *)&m_sockhash; + + VERIFY(check_default(&sockhash->map, map)); + + return 1; +} + +struct bpf_cgroup_storage_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, __u32); +} m_cgroup_storage SEC(".maps"); + +static inline int check_cgroup_storage(void) +{ + struct bpf_cgroup_storage_map *cgroup_storage = + (struct bpf_cgroup_storage_map *)&m_cgroup_storage; + struct bpf_map *map = (struct bpf_map *)&m_cgroup_storage; + + VERIFY(check(&cgroup_storage->map, map, + sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0)); + + return 1; +} + +struct reuseport_array { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_reuseport_sockarray SEC(".maps"); + +static inline int check_reuseport_sockarray(void) +{ + struct reuseport_array *reuseport_sockarray = + (struct reuseport_array *)&m_reuseport_sockarray; + struct bpf_map *map = (struct bpf_map *)&m_reuseport_sockarray; + + VERIFY(check_default(&reuseport_sockarray->map, map)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE); + __type(key, struct bpf_cgroup_storage_key); + __type(value, __u32); +} m_percpu_cgroup_storage SEC(".maps"); + +static inline int check_percpu_cgroup_storage(void) +{ + struct bpf_cgroup_storage_map *percpu_cgroup_storage = + (struct bpf_cgroup_storage_map *)&m_percpu_cgroup_storage; + struct bpf_map *map = (struct bpf_map *)&m_percpu_cgroup_storage; + + VERIFY(check(&percpu_cgroup_storage->map, map, + sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0)); + + return 1; +} + +struct bpf_queue_stack { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_QUEUE); + __uint(max_entries, MAX_ENTRIES); + __type(value, __u32); +} m_queue SEC(".maps"); + +static inline int check_queue(void) +{ + struct bpf_queue_stack *queue = (struct bpf_queue_stack *)&m_queue; + struct bpf_map *map = (struct bpf_map *)&m_queue; + + VERIFY(check(&queue->map, map, 0, sizeof(__u32), MAX_ENTRIES)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_STACK); + __uint(max_entries, MAX_ENTRIES); + __type(value, __u32); +} m_stack SEC(".maps"); + +static inline int check_stack(void) +{ + struct bpf_queue_stack *stack = (struct bpf_queue_stack *)&m_stack; + struct bpf_map *map = (struct bpf_map *)&m_stack; + + VERIFY(check(&stack->map, map, 0, sizeof(__u32), MAX_ENTRIES)); + + return 1; +} + +struct bpf_sk_storage_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, __u32); + __type(value, __u32); +} m_sk_storage SEC(".maps"); + +static inline int check_sk_storage(void) +{ + struct bpf_sk_storage_map *sk_storage = + (struct bpf_sk_storage_map *)&m_sk_storage; + struct bpf_map *map = (struct bpf_map *)&m_sk_storage; + + VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0)); + + return 1; +} + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP_HASH); + __uint(max_entries, MAX_ENTRIES); + __type(key, __u32); + __type(value, __u32); +} m_devmap_hash SEC(".maps"); + +static inline int check_devmap_hash(void) +{ + struct bpf_dtab *devmap_hash = (struct bpf_dtab *)&m_devmap_hash; + struct bpf_map *map = (struct bpf_map *)&m_devmap_hash; + + VERIFY(check_default(&devmap_hash->map, map)); + + return 1; +} + +struct bpf_ringbuf_map { + struct bpf_map map; +} __attribute__((preserve_access_index)); + +struct { + __uint(type, BPF_MAP_TYPE_RINGBUF); + __uint(max_entries, 1 << 12); +} m_ringbuf SEC(".maps"); + +static inline int check_ringbuf(void) +{ + struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf; + struct bpf_map *map = (struct bpf_map *)&m_ringbuf; + + VERIFY(check(&ringbuf->map, map, 0, 0, 1 << 12)); + + return 1; +} + +SEC("cgroup_skb/egress") +int cg_skb(void *ctx) +{ + VERIFY_TYPE(BPF_MAP_TYPE_HASH, check_hash); + VERIFY_TYPE(BPF_MAP_TYPE_ARRAY, check_array); + VERIFY_TYPE(BPF_MAP_TYPE_PROG_ARRAY, check_prog_array); + VERIFY_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, check_perf_event_array); + VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_HASH, check_percpu_hash); + VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, check_percpu_array); + VERIFY_TYPE(BPF_MAP_TYPE_STACK_TRACE, check_stack_trace); + VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, check_cgroup_array); + VERIFY_TYPE(BPF_MAP_TYPE_LRU_HASH, check_lru_hash); + VERIFY_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, check_lru_percpu_hash); + VERIFY_TYPE(BPF_MAP_TYPE_LPM_TRIE, check_lpm_trie); + VERIFY_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, check_array_of_maps); + VERIFY_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, check_hash_of_maps); + VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP, check_devmap); + VERIFY_TYPE(BPF_MAP_TYPE_SOCKMAP, check_sockmap); + VERIFY_TYPE(BPF_MAP_TYPE_CPUMAP, check_cpumap); + VERIFY_TYPE(BPF_MAP_TYPE_XSKMAP, check_xskmap); + VERIFY_TYPE(BPF_MAP_TYPE_SOCKHASH, check_sockhash); + VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, check_cgroup_storage); + VERIFY_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, + check_reuseport_sockarray); + VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + check_percpu_cgroup_storage); + VERIFY_TYPE(BPF_MAP_TYPE_QUEUE, check_queue); + VERIFY_TYPE(BPF_MAP_TYPE_STACK, check_stack); + VERIFY_TYPE(BPF_MAP_TYPE_SK_STORAGE, check_sk_storage); + VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, check_devmap_hash); + VERIFY_TYPE(BPF_MAP_TYPE_RINGBUF, check_ringbuf); + + return 1; +} + +__u32 _version SEC("version") = 1; +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_autoload.c b/tools/testing/selftests/bpf/progs/test_autoload.c new file mode 100644 index 000000000000..62c8cdec6d5d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_autoload.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +bool prog1_called = false; +bool prog2_called = false; +bool prog3_called = false; + +SEC("raw_tp/sys_enter") +int prog1(const void *ctx) +{ + prog1_called = true; + return 0; +} + +SEC("raw_tp/sys_exit") +int prog2(const void *ctx) +{ + prog2_called = true; + return 0; +} + +struct fake_kernel_struct { + int whatever; +} __attribute__((preserve_access_index)); + +SEC("fentry/unexisting-kprobe-will-fail-if-loaded") +int prog3(const void *ctx) +{ + struct fake_kernel_struct *fake = (void *)ctx; + fake->whatever = 123; + prog3_called = true; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_endian.c b/tools/testing/selftests/bpf/progs/test_endian.c new file mode 100644 index 000000000000..ddb687c5d125 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_endian.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#define IN16 0x1234 +#define IN32 0x12345678U +#define IN64 0x123456789abcdef0ULL + +__u16 in16 = 0; +__u32 in32 = 0; +__u64 in64 = 0; + +__u16 out16 = 0; +__u32 out32 = 0; +__u64 out64 = 0; + +__u16 const16 = 0; +__u32 const32 = 0; +__u64 const64 = 0; + +SEC("raw_tp/sys_enter") +int sys_enter(const void *ctx) +{ + out16 = __builtin_bswap16(in16); + out32 = __builtin_bswap32(in32); + out64 = __builtin_bswap64(in64); + const16 = ___bpf_swab16(IN16); + const32 = ___bpf_swab32(IN32); + const64 = ___bpf_swab64(IN64); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c index 29817a703984..b6a6eb279e54 100644 --- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c +++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c @@ -57,8 +57,9 @@ struct { SEC("raw_tracepoint/sys_enter") int bpf_prog1(void *ctx) { - int max_len, max_buildid_len, usize, ksize, total_size; + int max_len, max_buildid_len, total_size; struct stack_trace_t *data; + long usize, ksize; void *raw_data; __u32 key = 0; diff --git a/tools/testing/selftests/bpf/progs/test_ksyms.c b/tools/testing/selftests/bpf/progs/test_ksyms.c new file mode 100644 index 000000000000..6c9cbb5a3bdf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ksyms.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <stdbool.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +__u64 out__bpf_link_fops = -1; +__u64 out__bpf_link_fops1 = -1; +__u64 out__btf_size = -1; +__u64 out__per_cpu_start = -1; + +extern const void bpf_link_fops __ksym; +extern const void __start_BTF __ksym; +extern const void __stop_BTF __ksym; +extern const void __per_cpu_start __ksym; +/* non-existing symbol, weak, default to zero */ +extern const void bpf_link_fops1 __ksym __weak; + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + out__bpf_link_fops = (__u64)&bpf_link_fops; + out__btf_size = (__u64)(&__stop_BTF - &__start_BTF); + out__per_cpu_start = (__u64)&__per_cpu_start; + + out__bpf_link_fops1 = (__u64)&bpf_link_fops1; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c new file mode 100644 index 000000000000..cd4b72c55dfe --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_varlen.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +#define MAX_LEN 256 + +char buf_in1[MAX_LEN] = {}; +char buf_in2[MAX_LEN] = {}; + +int test_pid = 0; +bool capture = false; + +/* .bss */ +long payload1_len1 = 0; +long payload1_len2 = 0; +long total1 = 0; +char payload1[MAX_LEN + MAX_LEN] = {}; + +/* .data */ +int payload2_len1 = -1; +int payload2_len2 = -1; +int total2 = -1; +char payload2[MAX_LEN + MAX_LEN] = { 1 }; + +int payload3_len1 = -1; +int payload3_len2 = -1; +int total3= -1; +char payload3[MAX_LEN + MAX_LEN] = { 1 }; + +int payload4_len1 = -1; +int payload4_len2 = -1; +int total4= -1; +char payload4[MAX_LEN + MAX_LEN] = { 1 }; + +SEC("raw_tp/sys_enter") +int handler64_unsigned(void *regs) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + void *payload = payload1; + u64 len; + + /* ignore irrelevant invocations */ + if (test_pid != pid || !capture) + return 0; + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); + if (len <= MAX_LEN) { + payload += len; + payload1_len1 = len; + } + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); + if (len <= MAX_LEN) { + payload += len; + payload1_len2 = len; + } + + total1 = payload - (void *)payload1; + + return 0; +} + +SEC("raw_tp/sys_exit") +int handler64_signed(void *regs) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + void *payload = payload3; + long len; + + /* ignore irrelevant invocations */ + if (test_pid != pid || !capture) + return 0; + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); + if (len >= 0) { + payload += len; + payload3_len1 = len; + } + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); + if (len >= 0) { + payload += len; + payload3_len2 = len; + } + total3 = payload - (void *)payload3; + + return 0; +} + +SEC("tp/raw_syscalls/sys_enter") +int handler32_unsigned(void *regs) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + void *payload = payload2; + u32 len; + + /* ignore irrelevant invocations */ + if (test_pid != pid || !capture) + return 0; + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); + if (len <= MAX_LEN) { + payload += len; + payload2_len1 = len; + } + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); + if (len <= MAX_LEN) { + payload += len; + payload2_len2 = len; + } + + total2 = payload - (void *)payload2; + + return 0; +} + +SEC("tp/raw_syscalls/sys_exit") +int handler32_signed(void *regs) +{ + int pid = bpf_get_current_pid_tgid() >> 32; + void *payload = payload4; + int len; + + /* ignore irrelevant invocations */ + if (test_pid != pid || !capture) + return 0; + + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]); + if (len >= 0) { + payload += len; + payload4_len1 = len; + } + len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]); + if (len >= 0) { + payload += len; + payload4_len2 = len; + } + total4 = payload - (void *)payload4; + + return 0; +} + +SEC("tp/syscalls/sys_exit_getpid") +int handler_exit(void *regs) +{ + long bla; + + if (bpf_probe_read_kernel(&bla, sizeof(bla), 0)) + return 1; + else + return 0; +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c index 5611b564d3b1..29fa09d6a6c6 100644 --- a/tools/testing/selftests/bpf/progs/test_vmlinux.c +++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c @@ -63,20 +63,20 @@ int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id) return 0; } -SEC("kprobe/hrtimer_nanosleep") -int BPF_KPROBE(handle__kprobe, - ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid) +SEC("kprobe/hrtimer_start_range_ns") +int BPF_KPROBE(handle__kprobe, struct hrtimer *timer, ktime_t tim, u64 delta_ns, + const enum hrtimer_mode mode) { - if (rqtp == MY_TV_NSEC) + if (tim == MY_TV_NSEC) kprobe_called = true; return 0; } -SEC("fentry/hrtimer_nanosleep") -int BPF_PROG(handle__fentry, - ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid) +SEC("fentry/hrtimer_start_range_ns") +int BPF_PROG(handle__fentry, struct hrtimer *timer, ktime_t tim, u64 delta_ns, + const enum hrtimer_mode mode) { - if (rqtp == MY_TV_NSEC) + if (tim == MY_TV_NSEC) fentry_called = true; return 0; } diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 54fa5fa688ce..104e833d0087 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -121,6 +121,24 @@ static void reset_affinity() { } } +static void save_netns(void) +{ + env.saved_netns_fd = open("/proc/self/ns/net", O_RDONLY); + if (env.saved_netns_fd == -1) { + perror("open(/proc/self/ns/net)"); + exit(-1); + } +} + +static void restore_netns(void) +{ + if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) { + stdio_restore(); + perror("setns(CLONE_NEWNS)"); + exit(-1); + } +} + void test__end_subtest() { struct prog_test_def *test = env.test; @@ -138,8 +156,6 @@ void test__end_subtest() test->test_num, test->subtest_num, test->subtest_name, sub_error_cnt ? "FAIL" : "OK"); - reset_affinity(); - free(test->subtest_name); test->subtest_name = NULL; } @@ -366,6 +382,8 @@ enum ARG_KEYS { ARG_TEST_NAME_BLACKLIST = 'b', ARG_VERIFIER_STATS = 's', ARG_VERBOSE = 'v', + ARG_GET_TEST_CNT = 'c', + ARG_LIST_TEST_NAMES = 'l', }; static const struct argp_option opts[] = { @@ -379,6 +397,10 @@ static const struct argp_option opts[] = { "Output verifier statistics", }, { "verbose", ARG_VERBOSE, "LEVEL", OPTION_ARG_OPTIONAL, "Verbose output (use -vv or -vvv for progressively verbose output)" }, + { "count", ARG_GET_TEST_CNT, NULL, 0, + "Get number of selected top-level tests " }, + { "list", ARG_LIST_TEST_NAMES, NULL, 0, + "List test names that would run (without running them) " }, {}, }; @@ -511,6 +533,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state) } } break; + case ARG_GET_TEST_CNT: + env->get_test_cnt = true; + break; + case ARG_LIST_TEST_NAMES: + env->list_test_names = true; + break; case ARGP_KEY_ARG: argp_usage(state); break; @@ -643,6 +671,7 @@ int main(int argc, char **argv) return -1; } + save_netns(); stdio_hijack(); for (i = 0; i < prog_test_cnt; i++) { struct prog_test_def *test = &prog_test_defs[i]; @@ -654,6 +683,17 @@ int main(int argc, char **argv) test->test_num, test->test_name)) continue; + if (env.get_test_cnt) { + env.succ_cnt++; + continue; + } + + if (env.list_test_names) { + fprintf(env.stdout, "%s\n", test->test_name); + env.succ_cnt++; + continue; + } + test->run_test(); /* ensure last sub-test is finalized properly */ if (test->subtest_name) @@ -673,19 +713,34 @@ int main(int argc, char **argv) test->error_cnt ? "FAIL" : "OK"); reset_affinity(); + restore_netns(); if (test->need_cgroup_cleanup) cleanup_cgroup_environment(); } stdio_restore(); + + if (env.get_test_cnt) { + printf("%d\n", env.succ_cnt); + goto out; + } + + if (env.list_test_names) + goto out; + fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n", env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt); +out: free_str_set(&env.test_selector.blacklist); free_str_set(&env.test_selector.whitelist); free(env.test_selector.num_set); free_str_set(&env.subtest_selector.blacklist); free_str_set(&env.subtest_selector.whitelist); free(env.subtest_selector.num_set); + close(env.saved_netns_fd); + + if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0) + return EXIT_FAILURE; return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS; } diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index f4503c926aca..6e09bf738473 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -66,6 +66,8 @@ struct test_env { enum verbosity verbosity; bool jit_enabled; + bool get_test_cnt; + bool list_test_names; struct prog_test_def *test; FILE *stdout; @@ -78,6 +80,8 @@ struct test_env { int sub_succ_cnt; /* successful sub-tests */ int fail_cnt; /* total failed tests + sub-tests */ int skip_cnt; /* skipped tests */ + + int saved_netns_fd; }; extern struct test_env env; diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c new file mode 100644 index 000000000000..b52209db8250 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/map_ptr.c @@ -0,0 +1,62 @@ +{ + "bpf_map_ptr: read with negative offset rejected", + .insns = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result_unpriv = REJECT, + .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .result = REJECT, + .errstr = "R1 is bpf_array invalid negative access: off=-8", +}, +{ + "bpf_map_ptr: write rejected", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 3 }, + .result_unpriv = REJECT, + .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .result = REJECT, + .errstr = "only read from bpf_array is supported", +}, +{ + "bpf_map_ptr: read non-existent field rejected", + .insns = { + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 1), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result_unpriv = REJECT, + .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .result = REJECT, + .errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4", +}, +{ + "bpf_map_ptr: read ops field accepted", + .insns = { + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0), + BPF_MOV64_IMM(BPF_REG_0, 1), + BPF_EXIT_INSN(), + }, + .fixup_map_array_48b = { 1 }, + .result_unpriv = REJECT, + .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN", + .result = ACCEPT, + .retval = 1, +}, diff --git a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c index cd26ee6b7b1d..1f2b8c4cb26d 100644 --- a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c +++ b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c @@ -56,7 +56,7 @@ .fixup_map_in_map = { 16 }, .fixup_map_array_48b = { 13 }, .result = REJECT, - .errstr = "R0 invalid mem access 'map_ptr'", + .errstr = "only read from bpf_array is supported", }, { "cond: two branches returning different map pointers for lookup (tail, tail)", diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c index 97ee658e1242..ed4e76b24649 100644 --- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c +++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c @@ -836,3 +836,41 @@ .errstr = "R0 invalid mem access 'inv'", .errstr_unpriv = "R0 pointer -= pointer prohibited", }, +{ + "32bit pkt_ptr -= scalar", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2), + BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_7), + BPF_ALU32_REG(BPF_SUB, BPF_REG_6, BPF_REG_4), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, +{ + "32bit scalar -= pkt_ptr", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1, + offsetof(struct __sk_buff, data_end)), + BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1, + offsetof(struct __sk_buff, data)), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40), + BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2), + BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_6), + BPF_ALU32_REG(BPF_SUB, BPF_REG_4, BPF_REG_7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, +}, diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh new file mode 100644 index 000000000000..ba1d53b9f815 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -0,0 +1,786 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +VNI_GEN=$RANDOM +NSIM_ID=$((RANDOM % 1024)) +NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID +NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID +NSIM_NETDEV= +HAS_ETHTOOL= +EXIT_STATUS=0 +num_cases=0 +num_errors=0 + +clean_up_devs=( ) + +function err_cnt { + echo "ERROR:" $@ + EXIT_STATUS=1 + ((num_errors++)) + ((num_cases++)) +} + +function pass_cnt { + ((num_cases++)) +} + +function cleanup_tuns { + for dev in "${clean_up_devs[@]}"; do + [ -e /sys/class/net/$dev ] && ip link del dev $dev + done + clean_up_devs=( ) +} + +function cleanup_nsim { + if [ -e $NSIM_DEV_SYS ]; then + echo $NSIM_ID > /sys/bus/netdevsim/del_device + fi +} + +function cleanup { + cleanup_tuns + cleanup_nsim +} + +trap cleanup EXIT + +function new_vxlan { + local dev=$1 + local dstport=$2 + local lower=$3 + local ipver=$4 + local flags=$5 + + local group ipfl + + [ "$ipver" != '6' ] && group=239.1.1.1 || group=fff1::1 + [ "$ipver" != '6' ] || ipfl="-6" + + [[ ! "$flags" =~ "external" ]] && flags="$flags id $((VNI_GEN++))" + + ip $ipfl link add $dev type vxlan \ + group $group \ + dev $lower \ + dstport $dstport \ + $flags + + ip link set dev $dev up + + clean_up_devs=("${clean_up_devs[@]}" $dev) + + check_tables +} + +function new_geneve { + local dev=$1 + local dstport=$2 + local ipver=$3 + local flags=$4 + + local group ipfl + + [ "$ipver" != '6' ] && remote=1.1.1.2 || group=::2 + [ "$ipver" != '6' ] || ipfl="-6" + + [[ ! "$flags" =~ "external" ]] && flags="$flags vni $((VNI_GEN++))" + + ip $ipfl link add $dev type geneve \ + remote $remote \ + dstport $dstport \ + $flags + + ip link set dev $dev up + + clean_up_devs=("${clean_up_devs[@]}" $dev) + + check_tables +} + +function del_dev { + local dev=$1 + + ip link del dev $dev + check_tables +} + +# Helpers for netdevsim port/type encoding +function mke { + local port=$1 + local type=$2 + + echo $((port << 16 | type)) +} + +function pre { + local val=$1 + + echo -e "port: $((val >> 16))\ttype: $((val & 0xffff))" +} + +function pre_ethtool { + local val=$1 + local port=$((val >> 16)) + local type=$((val & 0xffff)) + + case $type in + 1) + type_name="vxlan" + ;; + 2) + type_name="geneve" + ;; + 4) + type_name="vxlan-gpe" + ;; + *) + type_name="bit X" + ;; + esac + + echo "port $port, $type_name" +} + +function check_table { + local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1 + local -n expected=$2 + local last=$3 + + read -a have < $path + + if [ ${#expected[@]} -ne ${#have[@]} ]; then + echo "check_table: BAD NUMBER OF ITEMS" + return 0 + fi + + for i in "${!expected[@]}"; do + if [ -n "$HAS_ETHTOOL" -a ${expected[i]} -ne 0 ]; then + pp_expected=`pre_ethtool ${expected[i]}` + ethtool --show-tunnels $NSIM_NETDEV | grep "$pp_expected" >/dev/null + if [ $? -ne 0 -a $last -ne 0 ]; then + err_cnt "ethtool table $1 on port $port: $pfx - $msg" + echo " check_table: ethtool does not contain '$pp_expected'" + ethtool --show-tunnels $NSIM_NETDEV + return 0 + + fi + fi + + if [ ${expected[i]} != ${have[i]} ]; then + if [ $last -ne 0 ]; then + err_cnt "table $1 on port $port: $pfx - $msg" + echo " check_table: wrong entry $i" + echo " expected: `pre ${expected[i]}`" + echo " have: `pre ${have[i]}`" + return 0 + fi + return 1 + fi + done + + pass_cnt + return 0 +} + +function check_tables { + # Need retries in case we have workqueue making the changes + local retries=10 + + while ! check_table 0 exp0 $((retries == 0)); do + sleep 0.02 + ((retries--)) + done + while ! check_table 1 exp1 $((retries == 0)); do + sleep 0.02 + ((retries--)) + done +} + +function print_table { + local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1 + read -a have < $path + + tree $NSIM_DEV_DFS/ + + echo "Port $port table $1:" + + for i in "${!have[@]}"; do + echo " `pre ${have[i]}`" + done + +} + +function print_tables { + print_table 0 + print_table 1 +} + +function get_netdev_name { + local -n old=$1 + + new=$(ls /sys/class/net) + + for netdev in $new; do + for check in $old; do + [ $netdev == $check ] && break + done + + if [ $netdev != $check ]; then + echo $netdev + break + fi + done +} + +### +### Code start +### + +# Probe ethtool support +ethtool -h | grep show-tunnels 2>&1 >/dev/null && HAS_ETHTOOL=y + +modprobe netdevsim + +# Basic test +pfx="basic" + +for port in 0 1; do + old_netdevs=$(ls /sys/class/net) + if [ $port -eq 0 ]; then + echo $NSIM_ID > /sys/bus/netdevsim/new_device + else + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + echo 1 > $NSIM_DEV_SYS/new_port + fi + NSIM_NETDEV=`get_netdev_name old_netdevs` + + msg="new NIC device created" + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + check_tables + + msg="VxLAN v4 devices" + exp0=( `mke 4789 1` 0 0 0 ) + new_vxlan vxlan0 4789 $NSIM_NETDEV + new_vxlan vxlan1 4789 $NSIM_NETDEV + + msg="VxLAN v4 devices go down" + exp0=( 0 0 0 0 ) + ifconfig vxlan1 down + ifconfig vxlan0 down + check_tables + + msg="VxLAN v6 devices" + exp0=( `mke 4789 1` 0 0 0 ) + new_vxlan vxlanA 4789 $NSIM_NETDEV 6 + + for ifc in vxlan0 vxlan1; do + ifconfig $ifc up + done + + new_vxlan vxlanB 4789 $NSIM_NETDEV 6 + + msg="another VxLAN v6 devices" + exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) + new_vxlan vxlanC 4790 $NSIM_NETDEV 6 + + msg="Geneve device" + exp1=( `mke 6081 2` 0 0 0 ) + new_geneve gnv0 6081 + + msg="NIC device goes down" + ifconfig $NSIM_NETDEV down + if [ $port -eq 1 ]; then + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + fi + check_tables + msg="NIC device goes up again" + ifconfig $NSIM_NETDEV up + exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) + exp1=( `mke 6081 2` 0 0 0 ) + check_tables + + cleanup_tuns + + msg="tunnels destroyed" + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + check_tables + + modprobe -r geneve + modprobe -r vxlan + modprobe -r udp_tunnel + + check_tables +done + +modprobe -r netdevsim + +# Module tests +pfx="module tests" + +if modinfo netdevsim | grep udp_tunnel >/dev/null; then + err_cnt "netdevsim depends on udp_tunnel" +else + pass_cnt +fi + +modprobe netdevsim + +old_netdevs=$(ls /sys/class/net) +port=0 +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port +echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep +echo 0 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` + +msg="create VxLANs" +exp0=( 0 0 0 0 ) # sleep is longer than out wait +new_vxlan vxlan0 10000 $NSIM_NETDEV + +modprobe -r vxlan +modprobe -r udp_tunnel + +msg="remove tunnels" +exp0=( 0 0 0 0 ) +check_tables + +msg="create VxLANs" +exp0=( 0 0 0 0 ) # sleep is longer than out wait +new_vxlan vxlan0 10000 $NSIM_NETDEV + +exp0=( 0 0 0 0 ) + +modprobe -r netdevsim +modprobe netdevsim + +# Overflow the table + +function overflow_table0 { + local pfx=$1 + + msg="create VxLANs 1/5" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="create VxLANs 2/5" + exp0=( `mke 10000 1` `mke 10001 1` 0 0 ) + new_vxlan vxlan1 10001 $NSIM_NETDEV + + msg="create VxLANs 3/5" + exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` 0 ) + new_vxlan vxlan2 10002 $NSIM_NETDEV + + msg="create VxLANs 4/5" + exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` `mke 10003 1` ) + new_vxlan vxlan3 10003 $NSIM_NETDEV + + msg="create VxLANs 5/5" + new_vxlan vxlan4 10004 $NSIM_NETDEV +} + +function overflow_table1 { + local pfx=$1 + + msg="create GENEVE 1/5" + exp1=( `mke 20000 2` 0 0 0 ) + new_geneve gnv0 20000 + + msg="create GENEVE 2/5" + exp1=( `mke 20000 2` `mke 20001 2` 0 0 ) + new_geneve gnv1 20001 + + msg="create GENEVE 3/5" + exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` 0 ) + new_geneve gnv2 20002 + + msg="create GENEVE 4/5" + exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` `mke 20003 2` ) + new_geneve gnv3 20003 + + msg="create GENEVE 5/5" + new_geneve gnv4 20004 +} + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + overflow_table0 "overflow NIC table" + overflow_table1 "overflow NIC table" + + msg="replace VxLAN in overflow table" + exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` ) + del_dev vxlan1 + + msg="vacate VxLAN in overflow table" + exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) + del_dev vxlan2 + + msg="replace GENEVE in overflow table" + exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` ) + del_dev gnv1 + + msg="vacate GENEVE in overflow table" + exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` ) + del_dev gnv2 + + msg="table sharing - share" + exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` ) + new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external" + + msg="table sharing - overflow" + new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external" + msg="table sharing - overflow v6" + new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external" + + exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` ) + del_dev gnv4 + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# Sync all +pfx="sync all" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port +echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + overflow_table0 "overflow NIC table" + overflow_table1 "overflow NIC table" + + msg="replace VxLAN in overflow table" + exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` ) + del_dev vxlan1 + + msg="vacate VxLAN in overflow table" + exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) + del_dev vxlan2 + + msg="replace GENEVE in overflow table" + exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` ) + del_dev gnv1 + + msg="vacate GENEVE in overflow table" + exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` ) + del_dev gnv2 + + msg="table sharing - share" + exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` ) + new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external" + + msg="table sharing - overflow" + new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external" + msg="table sharing - overflow v6" + new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external" + + exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` ) + del_dev gnv4 + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# Destroy full NIC +pfx="destroy full" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + overflow_table0 "destroy NIC" + overflow_table1 "destroy NIC" + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# IPv4 only +pfx="IPv4 only" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port +echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + msg="create VxLANs v6" + new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 + + msg="create VxLANs v6" + new_vxlan vxlanA1 10000 $NSIM_NETDEV 6 + + ip link set dev vxlanA0 down + ip link set dev vxlanA0 up + check_tables + + msg="create VxLANs v4" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="down VxLANs v4" + exp0=( 0 0 0 0 ) + ip link set dev vxlan0 down + check_tables + + msg="up VxLANs v4" + exp0=( `mke 10000 1` 0 0 0 ) + ip link set dev vxlan0 up + check_tables + + msg="destroy VxLANs v4" + exp0=( 0 0 0 0 ) + del_dev vxlan0 + + msg="recreate VxLANs v4" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + del_dev vxlanA0 + del_dev vxlanA1 + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# Failures +pfx="error injection" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error + + msg="1 - create VxLANs v6" + exp0=( 0 0 0 0 ) + new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 + + msg="1 - create VxLANs v4" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="1 - remove VxLANs v4" + del_dev vxlan0 + + msg="1 - remove VxLANs v6" + exp0=( 0 0 0 0 ) + del_dev vxlanA0 + + msg="2 - create GENEVE" + exp1=( `mke 20000 2` 0 0 0 ) + new_geneve gnv0 20000 + + msg="2 - destroy GENEVE" + echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error + exp1=( `mke 20000 2` 0 0 0 ) + del_dev gnv0 + + msg="2 - create second GENEVE" + exp1=( 0 `mke 20001 2` 0 0 ) + new_geneve gnv0 20001 + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# netdev flags +pfx="netdev flags" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + msg="create VxLANs v6" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 + + msg="create VxLANs v4" + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="turn off" + exp0=( 0 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off + check_tables + + msg="turn on" + exp0=( `mke 10000 1` 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on + check_tables + + msg="remove both" + del_dev vxlanA0 + exp0=( 0 0 0 0 ) + del_dev vxlan0 + check_tables + + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off + + msg="create VxLANs v4 - off" + exp0=( 0 0 0 0 ) + new_vxlan vxlan0 10000 $NSIM_NETDEV + + msg="created off - turn on" + exp0=( `mke 10000 1` 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on + check_tables + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +cleanup_nsim + +# device initiated reset +pfx="reset notification" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +for port in 0 1; do + if [ $port -ne 0 ]; then + echo 1 > $NSIM_DEV_DFS/udp_ports_open_only + echo 1 > $NSIM_DEV_DFS/udp_ports_sleep + fi + + echo $port > $NSIM_DEV_SYS/new_port + ifconfig $NSIM_NETDEV up + + msg="create VxLANs v6" + exp0=( `mke 10000 1` 0 0 0 ) + new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 + + msg="create VxLANs v4" + new_vxlan vxlan0 10000 $NSIM_NETDEV + + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + check_tables + + msg="NIC device goes down" + ifconfig $NSIM_NETDEV down + if [ $port -eq 1 ]; then + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + fi + check_tables + + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + check_tables + + msg="NIC device goes up again" + ifconfig $NSIM_NETDEV up + exp0=( `mke 10000 1` 0 0 0 ) + check_tables + + msg="remove both" + del_dev vxlanA0 + exp0=( 0 0 0 0 ) + del_dev vxlan0 + check_tables + + echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset + check_tables + + msg="destroy NIC" + echo $port > $NSIM_DEV_SYS/del_port + + cleanup_tuns + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) +done + +modprobe -r netdevsim + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $num_cases checks" +else + echo "FAILED $num_errors/$num_cases checks" +fi + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 895ec992b2f1..9491bbaa0831 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -17,6 +17,8 @@ TEST_PROGS += route_localnet.sh TEST_PROGS += reuseaddr_ports_exhausted.sh TEST_PROGS += txtimestamp.sh TEST_PROGS += vrf-xfrm-tests.sh +TEST_PROGS += rxtimestamp.sh +TEST_PROGS += devlink_port_split.py TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py new file mode 100755 index 000000000000..58bb7e9b88ce --- /dev/null +++ b/tools/testing/selftests/net/devlink_port_split.py @@ -0,0 +1,277 @@ +#!/usr/bin/python3 +# SPDX-License-Identifier: GPL-2.0 + +from subprocess import PIPE, Popen +import json +import time +import argparse +import collections +import sys + +# +# Test port split configuration using devlink-port lanes attribute. +# The test is skipped in case the attribute is not available. +# +# First, check that all the ports with 1 lane fail to split. +# Second, check that all the ports with more than 1 lane can be split +# to all valid configurations (e.g., split to 2, split to 4 etc.) +# + + +Port = collections.namedtuple('Port', 'bus_info name') + + +def run_command(cmd, should_fail=False): + """ + Run a command in subprocess. + Return: Tuple of (stdout, stderr). + """ + + p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) + stdout, stderr = p.communicate() + stdout, stderr = stdout.decode(), stderr.decode() + + if stderr != "" and not should_fail: + print("Error sending command: %s" % cmd) + print(stdout) + print(stderr) + return stdout, stderr + + +class devlink_ports(object): + """ + Class that holds information on the devlink ports, required to the tests; + if_names: A list of interfaces in the devlink ports. + """ + + def get_if_names(dev): + """ + Get a list of physical devlink ports. + Return: Array of tuples (bus_info/port, if_name). + """ + + arr = [] + + cmd = "devlink -j port show" + stdout, stderr = run_command(cmd) + assert stderr == "" + ports = json.loads(stdout)['port'] + + for port in ports: + if dev in port: + if ports[port]['flavour'] == 'physical': + arr.append(Port(bus_info=port, name=ports[port]['netdev'])) + + return arr + + def __init__(self, dev): + self.if_names = devlink_ports.get_if_names(dev) + + +def get_max_lanes(port): + """ + Get the $port's maximum number of lanes. + Return: number of lanes, e.g. 1, 2, 4 and 8. + """ + + cmd = "devlink -j port show %s" % port + stdout, stderr = run_command(cmd) + assert stderr == "" + values = list(json.loads(stdout)['port'].values())[0] + + if 'lanes' in values: + lanes = values['lanes'] + else: + lanes = 0 + return lanes + + +def get_split_ability(port): + """ + Get the $port split ability. + Return: split ability, true or false. + """ + + cmd = "devlink -j port show %s" % port.name + stdout, stderr = run_command(cmd) + assert stderr == "" + values = list(json.loads(stdout)['port'].values())[0] + + return values['splittable'] + + +def split(k, port, should_fail=False): + """ + Split $port into $k ports. + If should_fail == True, the split should fail. Otherwise, should pass. + Return: Array of sub ports after splitting. + If the $port wasn't split, the array will be empty. + """ + + cmd = "devlink port split %s count %s" % (port.bus_info, k) + stdout, stderr = run_command(cmd, should_fail=should_fail) + + if should_fail: + if not test(stderr != "", "%s is unsplittable" % port.name): + print("split an unsplittable port %s" % port.name) + return create_split_group(port, k) + else: + if stderr == "": + return create_split_group(port, k) + print("didn't split a splittable port %s" % port.name) + + return [] + + +def unsplit(port): + """ + Unsplit $port. + """ + + cmd = "devlink port unsplit %s" % port + stdout, stderr = run_command(cmd) + test(stderr == "", "Unsplit port %s" % port) + + +def exists(port, dev): + """ + Check if $port exists in the devlink ports. + Return: True is so, False otherwise. + """ + + return any(dev_port.name == port + for dev_port in devlink_ports.get_if_names(dev)) + + +def exists_and_lanes(ports, lanes, dev): + """ + Check if every port in the list $ports exists in the devlink ports and has + $lanes number of lanes after splitting. + Return: True if both are True, False otherwise. + """ + + for port in ports: + max_lanes = get_max_lanes(port) + if not exists(port, dev): + print("port %s doesn't exist in devlink ports" % port) + return False + if max_lanes != lanes: + print("port %s has %d lanes, but %s were expected" + % (port, lanes, max_lanes)) + return False + return True + + +def test(cond, msg): + """ + Check $cond and print a message accordingly. + Return: True is pass, False otherwise. + """ + + if cond: + print("TEST: %-60s [ OK ]" % msg) + else: + print("TEST: %-60s [FAIL]" % msg) + + return cond + + +def create_split_group(port, k): + """ + Create the split group for $port. + Return: Array with $k elements, which are the split port group. + """ + + return list(port.name + "s" + str(i) for i in range(k)) + + +def split_unsplittable_port(port, k): + """ + Test that splitting of unsplittable port fails. + """ + + # split to max + new_split_group = split(k, port, should_fail=True) + + if new_split_group != []: + unsplit(port.bus_info) + + +def split_splittable_port(port, k, lanes, dev): + """ + Test that splitting of splittable port passes correctly. + """ + + new_split_group = split(k, port) + + # Once the split command ends, it takes some time to the sub ifaces' + # to get their names. Use udevadm to continue only when all current udev + # events are handled. + cmd = "udevadm settle" + stdout, stderr = run_command(cmd) + assert stderr == "" + + if new_split_group != []: + test(exists_and_lanes(new_split_group, lanes/k, dev), + "split port %s into %s" % (port.name, k)) + + unsplit(port.bus_info) + + +def make_parser(): + parser = argparse.ArgumentParser(description='A test for port splitting.') + parser.add_argument('--dev', + help='The devlink handle of the device under test. ' + + 'The default is the first registered devlink ' + + 'handle.') + + return parser + + +def main(cmdline=None): + parser = make_parser() + args = parser.parse_args(cmdline) + + dev = args.dev + if not dev: + cmd = "devlink -j dev show" + stdout, stderr = run_command(cmd) + assert stderr == "" + + devs = json.loads(stdout)['dev'] + dev = list(devs.keys())[0] + + cmd = "devlink dev show %s" % dev + stdout, stderr = run_command(cmd) + if stderr != "": + print("devlink device %s can not be found" % dev) + sys.exit(1) + + ports = devlink_ports(dev) + + for port in ports.if_names: + max_lanes = get_max_lanes(port.name) + + # If max lanes is 0, do not test port splitting at all + if max_lanes == 0: + continue + + # If 1 lane, shouldn't be able to split + elif max_lanes == 1: + test(not get_split_ability(port), + "%s should not be able to split" % port.name) + split_unsplittable_port(port, max_lanes) + + # Else, splitting should pass and all the split ports should exist. + else: + lane = max_lanes + test(get_split_ability(port), + "%s should be able to split" % port.name) + while lane > 1: + split_splittable_port(port, lane, max_lanes, dev) + + lane //= 2 + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh index eb8e2a23bbb4..ea7a11a9f788 100755 --- a/tools/testing/selftests/net/forwarding/ethtool.sh +++ b/tools/testing/selftests/net/forwarding/ethtool.sh @@ -50,23 +50,6 @@ cleanup() h1_destroy } -different_speeds_get() -{ - local dev1=$1; shift - local dev2=$1; shift - local with_mode=$1; shift - local adver=$1; shift - - local -a speeds_arr - - speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver)) - if [[ ${#speeds_arr[@]} < 2 ]]; then - check_err 1 "cannot check different speeds. There are not enough speeds" - fi - - echo ${speeds_arr[0]} ${speeds_arr[1]} -} - same_speeds_autoneg_off() { # Check that when each of the reported speeds is forced, the links come diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh new file mode 100755 index 000000000000..4b42dfd4efd1 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh @@ -0,0 +1,102 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + autoneg + autoneg_force_mode + no_cable +" + +NUM_NETIFS=2 +source lib.sh +source ethtool_lib.sh + +setup_prepare() +{ + swp1=${NETIFS[p1]} + swp2=${NETIFS[p2]} + swp3=$NETIF_NO_CABLE +} + +ethtool_extended_state_check() +{ + local dev=$1; shift + local expected_ext_state=$1; shift + local expected_ext_substate=${1:-""}; shift + + local ext_state=$(ethtool $dev | grep "Link detected" \ + | cut -d "(" -f2 | cut -d ")" -f1) + local ext_substate=$(echo $ext_state | cut -sd "," -f2 \ + | sed -e 's/^[[:space:]]*//') + ext_state=$(echo $ext_state | cut -d "," -f1) + + [[ $ext_state == $expected_ext_state ]] + check_err $? "Expected \"$expected_ext_state\", got \"$ext_state\"" + + [[ $ext_substate == $expected_ext_substate ]] + check_err $? "Expected \"$expected_ext_substate\", got \"$ext_substate\"" +} + +autoneg() +{ + RET=0 + + ip link set dev $swp1 up + + sleep 4 + ethtool_extended_state_check $swp1 "Autoneg" "No partner detected" + + log_test "Autoneg, No partner detected" + + ip link set dev $swp1 down +} + +autoneg_force_mode() +{ + RET=0 + + ip link set dev $swp1 up + ip link set dev $swp2 up + + local -a speeds_arr=($(different_speeds_get $swp1 $swp2 0 0)) + local speed1=${speeds_arr[0]} + local speed2=${speeds_arr[1]} + + ethtool_set $swp1 speed $speed1 autoneg off + ethtool_set $swp2 speed $speed2 autoneg off + + sleep 4 + ethtool_extended_state_check $swp1 "Autoneg" \ + "No partner detected during force mode" + + ethtool_extended_state_check $swp2 "Autoneg" \ + "No partner detected during force mode" + + log_test "Autoneg, No partner detected during force mode" + + ethtool -s $swp2 autoneg on + ethtool -s $swp1 autoneg on + + ip link set dev $swp2 down + ip link set dev $swp1 down +} + +no_cable() +{ + RET=0 + + ip link set dev $swp3 up + + sleep 1 + ethtool_extended_state_check $swp3 "No cable" + + log_test "No cable" + + ip link set dev $swp3 down +} + +setup_prepare + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh index 925d229a59d8..9188e624dec0 100644 --- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh +++ b/tools/testing/selftests/net/forwarding/ethtool_lib.sh @@ -67,3 +67,20 @@ common_speeds_get() <(printf '%s\n' "${dev1_speeds[@]}" | sort -u) \ <(printf '%s\n' "${dev2_speeds[@]}" | sort -u) } + +different_speeds_get() +{ + local dev1=$1; shift + local dev2=$1; shift + local with_mode=$1; shift + local adver=$1; shift + + local -a speeds_arr + + speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver)) + if [[ ${#speeds_arr[@]} < 2 ]]; then + check_err 1 "cannot check different speeds. There are not enough speeds" + fi + + echo ${speeds_arr[0]} ${speeds_arr[1]} +} diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index e2adb533c8fc..b802c14d2950 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -14,6 +14,9 @@ NETIFS[p6]=veth5 NETIFS[p7]=veth6 NETIFS[p8]=veth7 +# Port that does not have a cable connected. +NETIF_NO_CABLE=eth8 + ############################################################################## # Defines diff --git a/tools/testing/selftests/net/forwarding/pedit_l4port.sh b/tools/testing/selftests/net/forwarding/pedit_l4port.sh new file mode 100755 index 000000000000..5f20d289ee43 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/pedit_l4port.sh @@ -0,0 +1,198 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on egress of $swp2, the +# traffic is acted upon by a pedit action. An ingress filter installed on $h2 verifies that the +# packet looks like expected. +# +# +----------------------+ +----------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +----|-----------------+ +----------------|-----+ +# | | +# +----|----------------------------------------------------------------|-----+ +# | SW | | | +# | +-|----------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | +--------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_udp_sport + test_udp_dport + test_tcp_sport + test_tcp_dport +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +: ${HIT_TIMEOUT:=2000} # ms + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add name br1 up type bridge vlan_filtering 1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h2mac=$(mac_get $h2) + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +ping_ipv6() +{ + ping6_test $h1 2001:db8:1::2 +} + +do_test_pedit_l4port_one() +{ + local pedit_locus=$1; shift + local pedit_prot=$1; shift + local pedit_action=$1; shift + local match_prot=$1; shift + local match_flower=$1; shift + local mz_flags=$1; shift + local saddr=$1; shift + local daddr=$1; shift + + tc filter add $pedit_locus handle 101 pref 1 \ + flower action pedit ex munge $pedit_action + tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \ + flower skip_hw $match_flower action pass + + RET=0 + + $MZ $mz_flags $h1 -c 10 -d 20msec -p 100 \ + -a own -b $h2mac -q -t $pedit_prot sp=54321,dp=12345 + + local pkts + pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \ + tc_rule_handle_stats_get "dev $h2 ingress" 101) + check_err $? "Expected to get 10 packets, but got $pkts." + + pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101) + ((pkts >= 10)) + check_err $? "Expected to get 10 packets on pedit rule, but got $pkts." + + log_test "$pedit_locus pedit $pedit_action" + + tc filter del dev $h2 ingress pref 1 + tc filter del $pedit_locus pref 1 +} + +do_test_pedit_l4port() +{ + local locus=$1; shift + local prot=$1; shift + local pedit_port=$1; shift + local flower_port=$1; shift + local port + + for port in 1 11111 65535; do + do_test_pedit_l4port_one "$locus" "$prot" \ + "$prot $pedit_port set $port" \ + ip "ip_proto $prot $flower_port $port" \ + "-A 192.0.2.1 -B 192.0.2.2" + done +} + +test_udp_sport() +{ + do_test_pedit_l4port "dev $swp1 ingress" udp sport src_port + do_test_pedit_l4port "dev $swp2 egress" udp sport src_port +} + +test_udp_dport() +{ + do_test_pedit_l4port "dev $swp1 ingress" udp dport dst_port + do_test_pedit_l4port "dev $swp2 egress" udp dport dst_port +} + +test_tcp_sport() +{ + do_test_pedit_l4port "dev $swp1 ingress" tcp sport src_port + do_test_pedit_l4port "dev $swp2 egress" tcp sport src_port +} + +test_tcp_dport() +{ + do_test_pedit_l4port "dev $swp1 ingress" tcp dport dst_port + do_test_pedit_l4port "dev $swp2 egress" tcp dport dst_port +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh new file mode 100755 index 000000000000..e714bae473fb --- /dev/null +++ b/tools/testing/selftests/net/forwarding/sch_red.sh @@ -0,0 +1,492 @@ +# SPDX-License-Identifier: GPL-2.0 + +# This test sends one stream of traffic from H1 through a TBF shaper, to a RED +# within TBF shaper on $swp3. The two shapers have the same configuration, and +# thus the resulting stream should fill all available bandwidth on the latter +# shaper. A second stream is sent from H2 also via $swp3, and used to inject +# additional traffic. Since all available bandwidth is taken, this traffic has +# to go to backlog. +# +# +--------------------------+ +--------------------------+ +# | H1 | | H2 | +# | + $h1 | | + $h2 | +# | | 192.0.2.1/28 | | | 192.0.2.2/28 | +# | | TBF 10Mbps | | | | +# +-----|--------------------+ +-----|--------------------+ +# | | +# +-----|------------------------------------------------|--------------------+ +# | SW | | | +# | +--|------------------------------------------------|----------------+ | +# | | + $swp1 + $swp2 | | +# | | BR | | +# | | | | +# | | + $swp3 | | +# | | | TBF 10Mbps / RED | | +# | +--------------------------------|-----------------------------------+ | +# | | | +# +-----------------------------------|---------------------------------------+ +# | +# +-----|--------------------+ +# | H3 | | +# | + $h1 | +# | 192.0.2.3/28 | +# | | +# +--------------------------+ + +ALL_TESTS=" + ping_ipv4 + ecn_test + ecn_nodrop_test + red_test + red_qevent_test + ecn_qevent_test +" + +NUM_NETIFS=6 +CHECK_TC="yes" +source lib.sh + +BACKLOG=30000 +PKTSZ=1400 + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 + mtu_set $h1 10000 + tc qdisc replace dev $h1 root handle 1: tbf \ + rate 10Mbit burst 10K limit 1M +} + +h1_destroy() +{ + tc qdisc del dev $h1 root + mtu_restore $h1 + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 + mtu_set $h2 10000 +} + +h2_destroy() +{ + mtu_restore $h2 + simple_if_fini $h2 192.0.2.2/28 +} + +h3_create() +{ + simple_if_init $h3 192.0.2.3/28 + mtu_set $h3 10000 +} + +h3_destroy() +{ + mtu_restore $h3 + simple_if_fini $h3 192.0.2.3/28 +} + +switch_create() +{ + ip link add dev br up type bridge + ip link set dev $swp1 up master br + ip link set dev $swp2 up master br + ip link set dev $swp3 up master br + + mtu_set $swp1 10000 + mtu_set $swp2 10000 + mtu_set $swp3 10000 + + tc qdisc replace dev $swp3 root handle 1: tbf \ + rate 10Mbit burst 10K limit 1M + ip link add name _drop_test up type dummy +} + +switch_destroy() +{ + ip link del dev _drop_test + tc qdisc del dev $swp3 root + + mtu_restore $h3 + mtu_restore $h2 + mtu_restore $h1 + + ip link set dev $swp3 down nomaster + ip link set dev $swp2 down nomaster + ip link set dev $swp1 down nomaster + ip link del dev br +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + h2=${NETIFS[p3]} + swp2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + h3=${NETIFS[p6]} + + h3_mac=$(mac_get $h3) + + vrf_prepare + + h1_create + h2_create + h3_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h3_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.3 " from host 1" + ping_test $h2 192.0.2.3 " from host 2" +} + +get_qdisc_backlog() +{ + qdisc_stats_get $swp3 11: .backlog +} + +get_nmarked() +{ + qdisc_stats_get $swp3 11: .marked +} + +get_qdisc_npackets() +{ + qdisc_stats_get $swp3 11: .packets +} + +get_nmirrored() +{ + link_stats_get _drop_test tx packets +} + +send_packets() +{ + local proto=$1; shift + local pkts=$1; shift + + $MZ $h2 -p $PKTSZ -a own -b $h3_mac -A 192.0.2.2 -B 192.0.2.3 -t $proto -q -c $pkts "$@" +} + +# This sends traffic in an attempt to build a backlog of $size. Returns 0 on +# success. After 10 failed attempts it bails out and returns 1. It dumps the +# backlog size to stdout. +build_backlog() +{ + local size=$1; shift + local proto=$1; shift + + local i=0 + + while :; do + local cur=$(get_qdisc_backlog) + local diff=$((size - cur)) + local pkts=$(((diff + PKTSZ - 1) / PKTSZ)) + + if ((cur >= size)); then + echo $cur + return 0 + elif ((i++ > 10)); then + echo $cur + return 1 + fi + + send_packets $proto $pkts "$@" + sleep 1 + done +} + +check_marking() +{ + local cond=$1; shift + + local npackets_0=$(get_qdisc_npackets) + local nmarked_0=$(get_nmarked) + sleep 5 + local npackets_1=$(get_qdisc_npackets) + local nmarked_1=$(get_nmarked) + + local nmarked_d=$((nmarked_1 - nmarked_0)) + local npackets_d=$((npackets_1 - npackets_0)) + local pct=$((100 * nmarked_d / npackets_d)) + + echo $pct + ((pct $cond)) +} + +check_mirroring() +{ + local cond=$1; shift + + local npackets_0=$(get_qdisc_npackets) + local nmirrored_0=$(get_nmirrored) + sleep 5 + local npackets_1=$(get_qdisc_npackets) + local nmirrored_1=$(get_nmirrored) + + local nmirrored_d=$((nmirrored_1 - nmirrored_0)) + local npackets_d=$((npackets_1 - npackets_0)) + local pct=$((100 * nmirrored_d / npackets_d)) + + echo $pct + ((pct $cond)) +} + +ecn_test_common() +{ + local name=$1; shift + local limit=$1; shift + local backlog + local pct + + # Build the below-the-limit backlog using UDP. We could use TCP just + # fine, but this way we get a proof that UDP is accepted when queue + # length is below the limit. The main stream is using TCP, and if the + # limit is misconfigured, we would see this traffic being ECN marked. + RET=0 + backlog=$(build_backlog $((2 * limit / 3)) udp) + check_err $? "Could not build the requested backlog" + pct=$(check_marking "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + log_test "$name backlog < limit" + + # Now push TCP, because non-TCP traffic would be early-dropped after the + # backlog crosses the limit, and we want to make sure that the backlog + # is above the limit. + RET=0 + backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_marking ">= 95") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95." + log_test "$name backlog > limit" +} + +do_ecn_test() +{ + local limit=$1; shift + local name=ECN + + $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ + -a own -b $h3_mac -t tcp -q tos=0x01 & + sleep 1 + + ecn_test_common "$name" $limit + + # Up there we saw that UDP gets accepted when backlog is below the + # limit. Now that it is above, it should all get dropped, and backlog + # building should fail. + RET=0 + build_backlog $((2 * limit)) udp >/dev/null + check_fail $? "UDP traffic went into backlog instead of being early-dropped" + log_test "$name backlog > limit: UDP early-dropped" + + stop_traffic + sleep 1 +} + +do_ecn_nodrop_test() +{ + local limit=$1; shift + local name="ECN nodrop" + + $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ + -a own -b $h3_mac -t tcp -q tos=0x01 & + sleep 1 + + ecn_test_common "$name" $limit + + # Up there we saw that UDP gets accepted when backlog is below the + # limit. Now that it is above, in nodrop mode, make sure it goes to + # backlog as well. + RET=0 + build_backlog $((2 * limit)) udp >/dev/null + check_err $? "UDP traffic was early-dropped instead of getting into backlog" + log_test "$name backlog > limit: UDP not dropped" + + stop_traffic + sleep 1 +} + +do_red_test() +{ + local limit=$1; shift + local backlog + local pct + + # Use ECN-capable TCP to verify there's no marking even though the queue + # is above limit. + $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ + -a own -b $h3_mac -t tcp -q tos=0x01 & + + # Pushing below the queue limit should work. + RET=0 + backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_marking "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + log_test "RED backlog < limit" + + # Pushing above should not. + RET=0 + backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01) + check_fail $? "Traffic went into backlog instead of being early-dropped" + pct=$(check_marking "== 0") + check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0." + log_test "RED backlog > limit" + + stop_traffic + sleep 1 +} + +do_red_qevent_test() +{ + local limit=$1; shift + local backlog + local base + local now + local pct + + RET=0 + + $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ + -a own -b $h3_mac -t udp -q & + sleep 1 + + tc filter add block 10 pref 1234 handle 102 matchall skip_hw \ + action mirred egress mirror dev _drop_test + + # Push to the queue until it's at the limit. The configured limit is + # rounded by the qdisc, so this is the best we can do to get to the real + # limit. + build_backlog $((3 * limit / 2)) udp >/dev/null + + base=$(get_nmirrored) + send_packets udp 100 + sleep 1 + now=$(get_nmirrored) + ((now >= base + 100)) + check_err $? "Dropped packets not observed: 100 expected, $((now - base)) seen" + + tc filter del block 10 pref 1234 handle 102 matchall + + base=$(get_nmirrored) + send_packets udp 100 + sleep 1 + now=$(get_nmirrored) + ((now == base)) + check_err $? "Dropped packets still observed: 0 expected, $((now - base)) seen" + + log_test "RED early_dropped packets mirrored" + + stop_traffic + sleep 1 +} + +do_ecn_qevent_test() +{ + local limit=$1; shift + local name=ECN + + RET=0 + + $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \ + -a own -b $h3_mac -t tcp -q tos=0x01 & + sleep 1 + + tc filter add block 10 pref 1234 handle 102 matchall skip_hw \ + action mirred egress mirror dev _drop_test + + backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_mirroring "== 0") + check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected == 0." + + backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01) + check_err $? "Could not build the requested backlog" + pct=$(check_mirroring ">= 95") + check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected >= 95." + + tc filter del block 10 pref 1234 handle 102 matchall + + log_test "ECN marked packets mirrored" + + stop_traffic + sleep 1 +} + +install_qdisc() +{ + local -a args=("$@") + + tc qdisc replace dev $swp3 parent 1:1 handle 11: red \ + limit 1M avpkt $PKTSZ probability 1 \ + min $BACKLOG max $((BACKLOG + 1)) burst 38 "${args[@]}" + sleep 1 +} + +uninstall_qdisc() +{ + tc qdisc del dev $swp3 parent 1:1 +} + +ecn_test() +{ + install_qdisc ecn + do_ecn_test $BACKLOG + uninstall_qdisc +} + +ecn_nodrop_test() +{ + install_qdisc ecn nodrop + do_ecn_nodrop_test $BACKLOG + uninstall_qdisc +} + +red_test() +{ + install_qdisc + do_red_test $BACKLOG + uninstall_qdisc +} + +red_qevent_test() +{ + install_qdisc qevent early_drop block 10 + do_red_qevent_test $BACKLOG + uninstall_qdisc +} + +ecn_qevent_test() +{ + install_qdisc ecn qevent mark block 10 + do_ecn_qevent_test $BACKLOG + uninstall_qdisc +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index f50976ee7d44..aa254aefc2c3 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -5,7 +5,7 @@ KSFT_KHDR_INSTALL := 1 CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include -TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh +TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh TEST_GEN_FILES = mptcp_connect pm_nl_ctl diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh new file mode 100755 index 000000000000..39edce4f541c --- /dev/null +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) +ns="ns1-$rndh" +ksft_skip=4 +test_cnt=1 +ret=0 +pids=() + +flush_pids() +{ + # mptcp_connect in join mode will sleep a bit before completing, + # give it some time + sleep 1.1 + + for pid in ${pids[@]}; do + [ -d /proc/$pid ] && kill -SIGUSR1 $pid >/dev/null 2>&1 + done + pids=() +} + +cleanup() +{ + ip netns del $ns + for pid in ${pids[@]}; do + [ -d /proc/$pid ] && kill -9 $pid >/dev/null 2>&1 + done +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi +ss -h | grep -q MPTCP +if [ $? -ne 0 ];then + echo "SKIP: ss tool does not support MPTCP" + exit $ksft_skip +fi + +__chk_nr() +{ + local condition="$1" + local expected=$2 + local msg nr + + shift 2 + msg=$* + nr=$(ss -inmHMN $ns | $condition) + + printf "%-50s" "$msg" + if [ $nr != $expected ]; then + echo "[ fail ] expected $expected found $nr" + ret=$test_cnt + else + echo "[ ok ]" + fi + test_cnt=$((test_cnt+1)) +} + +chk_msk_nr() +{ + __chk_nr "grep -c token:" $* +} + +chk_msk_fallback_nr() +{ + __chk_nr "grep -c fallback" $* +} + +chk_msk_remote_key_nr() +{ + __chk_nr "grep -c remote_key" $* +} + + +trap cleanup EXIT +ip netns add $ns +ip -n $ns link set dev lo up + +echo "a" | ip netns exec $ns ./mptcp_connect -p 10000 -l 0.0.0.0 -t 100 >/dev/null & +sleep 0.1 +pids[0]=$! +chk_msk_nr 0 "no msk on netns creation" + +echo "b" | ip netns exec $ns ./mptcp_connect -p 10000 127.0.0.1 -j -t 100 >/dev/null & +sleep 0.1 +pids[1]=$! +chk_msk_nr 2 "after MPC handshake " +chk_msk_remote_key_nr 2 "....chk remote_key" +chk_msk_fallback_nr 0 "....chk no fallback" +flush_pids + + +echo "a" | ip netns exec $ns ./mptcp_connect -p 10001 -s TCP -l 0.0.0.0 -t 100 >/dev/null & +pids[0]=$! +sleep 0.1 +echo "b" | ip netns exec $ns ./mptcp_connect -p 10001 127.0.0.1 -j -t 100 >/dev/null & +pids[1]=$! +sleep 0.1 +chk_msk_fallback_nr 1 "check fallback" +flush_pids + +NR_CLIENTS=100 +for I in `seq 1 $NR_CLIENTS`; do + echo "a" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) -l 0.0.0.0 -t 100 -w 10 >/dev/null & + pids[$((I*2))]=$! +done +sleep 0.1 + +for I in `seq 1 $NR_CLIENTS`; do + echo "b" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) 127.0.0.1 -t 100 -w 10 >/dev/null & + pids[$((I*2 + 1))]=$! +done +sleep 1.5 + +chk_msk_nr $((NR_CLIENTS*2)) "many msk socket present" +flush_pids + +exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index cedee5b952ba..cad6f73a5fd0 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -11,6 +11,7 @@ #include <stdio.h> #include <stdlib.h> #include <strings.h> +#include <signal.h> #include <unistd.h> #include <sys/poll.h> @@ -36,6 +37,7 @@ extern int optind; static int poll_timeout = 10 * 1000; static bool listen_mode; +static bool quit; enum cfg_mode { CFG_MODE_POLL, @@ -52,11 +54,12 @@ static int pf = AF_INET; static int cfg_sndbuf; static int cfg_rcvbuf; static bool cfg_join; +static int cfg_wait; static void die_usage(void) { fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]" - "[-l] connect_address\n"); + "[-l] [-w sec] connect_address\n"); fprintf(stderr, "\t-6 use ipv6\n"); fprintf(stderr, "\t-t num -- set poll timeout to num\n"); fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); @@ -65,9 +68,15 @@ static void die_usage(void) fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n"); fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n"); fprintf(stderr, "\t-u -- check mptcp ulp\n"); + fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); exit(1); } +static void handle_signal(int nr) +{ + quit = true; +} + static const char *getxinfo_strerr(int err) { if (err == EAI_SYSTEM) @@ -418,8 +427,8 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } /* leave some time for late join/announce */ - if (cfg_join) - usleep(400000); + if (cfg_wait) + usleep(cfg_wait); close(peerfd); return 0; @@ -812,11 +821,12 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:")) != -1) { + while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:w:")) != -1) { switch (c) { case 'j': cfg_join = true; cfg_mode = CFG_MODE_POLL; + cfg_wait = 400000; break; case 'l': listen_mode = true; @@ -850,6 +860,9 @@ static void parse_opts(int argc, char **argv) case 'R': cfg_rcvbuf = parse_int(optarg); break; + case 'w': + cfg_wait = atoi(optarg)*1000000; + break; } } @@ -865,6 +878,7 @@ int main(int argc, char *argv[]) { init_rng(); + signal(SIGUSR1, handle_signal); parse_opts(argc, argv); if (tcpulp_audit) diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index acf02e156d20..c0589e071f20 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -3,7 +3,7 @@ time_start=$(date +%s) -optstring="S:R:d:e:l:r:h4cm:" +optstring="S:R:d:e:l:r:h4cm:f:t" ret=0 sin="" sout="" @@ -21,6 +21,8 @@ testmode="" sndbuf=0 rcvbuf=0 options_log=true +do_tcp=0 +filesize=0 if [ $tc_loss -eq 100 ];then tc_loss=1% @@ -40,9 +42,11 @@ usage() { echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)" echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)" echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" + echo -e "\t-f: size of file to transfer in bytes (default random)" echo -e "\t-S: set sndbuf value (default: use kernel default)" echo -e "\t-R: set rcvbuf value (default: use kernel default)" echo -e "\t-m: test mode (poll, sendfile; default: poll)" + echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)" } while getopts "$optstring" option;do @@ -94,6 +98,12 @@ while getopts "$optstring" option;do "m") testmode="$OPTARG" ;; + "f") + filesize="$OPTARG" + ;; + "t") + do_tcp=$((do_tcp+1)) + ;; "?") usage $0 exit 1 @@ -385,10 +395,14 @@ do_transfer() capuser="-Z $SUDO_USER" fi - local capfile="${listener_ns}-${connector_ns}-${cl_proto}-${srv_proto}-${connect_addr}.pcap" + local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}" + local capopt="-i any -s 65535 -B 32768 ${capuser}" + + ip netns exec ${listener_ns} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 & + local cappid_listener=$! - ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & - local cappid=$! + ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 & + local cappid_connector=$! sleep 1 fi @@ -413,7 +427,8 @@ do_transfer() if $capture; then sleep 1 - kill $cappid + kill ${cappid_listener} + kill ${cappid_connector} fi local duration @@ -449,20 +464,25 @@ make_file() { local name=$1 local who=$2 + local SIZE=$filesize + local ksize + local rem - local SIZE TSIZE - SIZE=$((RANDOM % (1024 * 8))) - TSIZE=$((SIZE * 1024)) + if [ $SIZE -eq 0 ]; then + local MAXSIZE=$((1024 * 1024 * 8)) + local MINSIZE=$((1024 * 256)) + + SIZE=$(((RANDOM * RANDOM + MINSIZE) % MAXSIZE)) + fi - dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null + ksize=$((SIZE / 1024)) + rem=$((SIZE - (ksize * 1024))) - SIZE=$((RANDOM % 1024)) - SIZE=$((SIZE + 128)) - TSIZE=$((TSIZE + SIZE)) - dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null + dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null + dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" - echo "Created $name (size $TSIZE) containing data sent by $who" + echo "Created $name (size $(du -b "$name")) containing data sent by $who" } run_tests_lo() @@ -497,9 +517,11 @@ run_tests_lo() return 1 fi - # don't bother testing fallback tcp except for loopback case. - if [ ${listener_ns} != ${connector_ns} ]; then - return 0 + if [ $do_tcp -eq 0 ]; then + # don't bother testing fallback tcp except for loopback case. + if [ ${listener_ns} != ${connector_ns} ]; then + return 0 + fi fi do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr} @@ -516,6 +538,15 @@ run_tests_lo() return 1 fi + if [ $do_tcp -gt 1 ] ;then + do_transfer ${listener_ns} ${connector_ns} TCP TCP ${connect_addr} ${local_addr} + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + return 1 + fi + fi + return 0 } diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c index 422e7761254d..221fdece47d4 100644 --- a/tools/testing/selftests/net/rxtimestamp.c +++ b/tools/testing/selftests/net/rxtimestamp.c @@ -44,6 +44,7 @@ struct test_case { struct options sockopt; struct tstamps expected; bool enabled; + bool warn_on_fail; }; struct sof_flag { @@ -67,44 +68,44 @@ static struct socket_type socket_types[] = { static struct test_case test_cases[] = { { {}, {} }, { - { so_timestamp: 1 }, - { tstamp: true } + { .so_timestamp = 1 }, + { .tstamp = true } }, { - { so_timestampns: 1 }, - { tstampns: true } + { .so_timestampns = 1 }, + { .tstampns = true } }, { - { so_timestamp: 1, so_timestampns: 1 }, - { tstampns: true } + { .so_timestamp = 1, .so_timestampns = 1 }, + { .tstampns = true } }, { - { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE }, + { .so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE }, {} }, { /* Loopback device does not support hw timestamps. */ - { so_timestamping: SOF_TIMESTAMPING_RX_HARDWARE }, + { .so_timestamping = SOF_TIMESTAMPING_RX_HARDWARE }, {} }, { - { so_timestamping: SOF_TIMESTAMPING_SOFTWARE }, - {} + { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE }, + .warn_on_fail = true }, { - { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE + { .so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE | SOF_TIMESTAMPING_RX_HARDWARE }, {} }, { - { so_timestamping: SOF_TIMESTAMPING_SOFTWARE + { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE }, - { swtstamp: true } + { .swtstamp = true } }, { - { so_timestamp: 1, so_timestamping: SOF_TIMESTAMPING_SOFTWARE + { .so_timestamp = 1, .so_timestamping = SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_RX_SOFTWARE }, - { tstamp: true, swtstamp: true } + { .tstamp = true, .swtstamp = true } }, }; @@ -115,6 +116,9 @@ static struct option long_options[] = { { "tcp", no_argument, 0, 't' }, { "udp", no_argument, 0, 'u' }, { "ip", no_argument, 0, 'i' }, + { "strict", no_argument, 0, 'S' }, + { "ipv4", no_argument, 0, '4' }, + { "ipv6", no_argument, 0, '6' }, { NULL, 0, NULL, 0 }, }; @@ -270,37 +274,55 @@ void config_so_flags(int rcv, struct options o) error(1, errno, "Failed to set SO_TIMESTAMPING"); } -bool run_test_case(struct socket_type s, struct test_case t) +bool run_test_case(struct socket_type *s, int test_num, char ip_version, + bool strict) { - int port = (s.type == SOCK_RAW) ? 0 : next_port++; + union { + struct sockaddr_in6 addr6; + struct sockaddr_in addr4; + struct sockaddr addr_un; + } addr; int read_size = op_size; - struct sockaddr_in addr; + int src, dst, rcv, port; + socklen_t addr_size; bool failed = false; - int src, dst, rcv; - src = socket(AF_INET, s.type, s.protocol); + port = (s->type == SOCK_RAW) ? 0 : next_port++; + memset(&addr, 0, sizeof(addr)); + if (ip_version == '4') { + addr.addr4.sin_family = AF_INET; + addr.addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr.addr4.sin_port = htons(port); + addr_size = sizeof(addr.addr4); + if (s->type == SOCK_RAW) + read_size += 20; /* for IPv4 header */ + } else { + addr.addr6.sin6_family = AF_INET6; + addr.addr6.sin6_addr = in6addr_loopback; + addr.addr6.sin6_port = htons(port); + addr_size = sizeof(addr.addr6); + } + printf("Starting testcase %d over ipv%c...\n", test_num, ip_version); + src = socket(addr.addr_un.sa_family, s->type, + s->protocol); if (src < 0) error(1, errno, "Failed to open src socket"); - dst = socket(AF_INET, s.type, s.protocol); + dst = socket(addr.addr_un.sa_family, s->type, + s->protocol); if (dst < 0) error(1, errno, "Failed to open dst socket"); - memset(&addr, 0, sizeof(addr)); - addr.sin_family = AF_INET; - addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); - addr.sin_port = htons(port); - - if (bind(dst, (struct sockaddr *)&addr, sizeof(addr)) < 0) + if (bind(dst, &addr.addr_un, addr_size) < 0) error(1, errno, "Failed to bind to port %d", port); - if (s.type == SOCK_STREAM && (listen(dst, 1) < 0)) + if (s->type == SOCK_STREAM && (listen(dst, 1) < 0)) error(1, errno, "Failed to listen"); - if (connect(src, (struct sockaddr *)&addr, sizeof(addr)) < 0) + if (connect(src, &addr.addr_un, addr_size) < 0) error(1, errno, "Failed to connect"); - if (s.type == SOCK_STREAM) { + if (s->type == SOCK_STREAM) { rcv = accept(dst, NULL, NULL); if (rcv < 0) error(1, errno, "Failed to accept"); @@ -309,17 +331,22 @@ bool run_test_case(struct socket_type s, struct test_case t) rcv = dst; } - config_so_flags(rcv, t.sockopt); + config_so_flags(rcv, test_cases[test_num].sockopt); usleep(20000); /* setsockopt for SO_TIMESTAMPING is asynchronous */ do_send(src); - if (s.type == SOCK_RAW) - read_size += 20; /* for IP header */ - failed = do_recv(rcv, read_size, t.expected); + failed = do_recv(rcv, read_size, test_cases[test_num].expected); close(rcv); close(src); + if (failed) { + printf("FAILURE in testcase %d over ipv%c ", test_num, + ip_version); + print_test_case(&test_cases[test_num]); + if (!strict && test_cases[test_num].warn_on_fail) + failed = false; + } return failed; } @@ -327,6 +354,9 @@ int main(int argc, char **argv) { bool all_protocols = true; bool all_tests = true; + bool cfg_ipv4 = false; + bool cfg_ipv6 = false; + bool strict = false; int arg_index = 0; int failures = 0; int s, t; @@ -363,6 +393,15 @@ int main(int argc, char **argv) all_protocols = false; socket_types[0].enabled = true; break; + case 'S': + strict = true; + break; + case '4': + cfg_ipv4 = true; + break; + case '6': + cfg_ipv6 = true; + break; default: error(1, 0, "Failed to parse parameters."); } @@ -376,13 +415,14 @@ int main(int argc, char **argv) for (t = 0; t < ARRAY_SIZE(test_cases); t++) { if (!all_tests && !test_cases[t].enabled) continue; - - printf("Starting testcase %d...\n", t); - if (run_test_case(socket_types[s], test_cases[t])) { - failures++; - printf("FAILURE in test case "); - print_test_case(&test_cases[t]); - } + if (cfg_ipv4 || !cfg_ipv6) + if (run_test_case(&socket_types[s], t, '4', + strict)) + failures++; + if (cfg_ipv6 || !cfg_ipv4) + if (run_test_case(&socket_types[s], t, '6', + strict)) + failures++; } } if (!failures) diff --git a/tools/testing/selftests/net/rxtimestamp.sh b/tools/testing/selftests/net/rxtimestamp.sh new file mode 100755 index 000000000000..91631e88bf46 --- /dev/null +++ b/tools/testing/selftests/net/rxtimestamp.sh @@ -0,0 +1,4 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +./in_netns.sh ./rxtimestamp $@ diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh new file mode 100755 index 000000000000..5274f4a1fba1 --- /dev/null +++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh @@ -0,0 +1,390 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is designed for testing the new VRF strict_mode functionality. + +ret=0 + +# identifies the "init" network namespace which is often called root network +# namespace. +INIT_NETNS_NAME="init" + +PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + nsuccess=$((nsuccess+1)) + printf "\n TEST: %-60s [ OK ]\n" "${msg}" + else + ret=1 + nfail=$((nfail+1)) + printf "\n TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +print_log_test_results() +{ + if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} + fi +} + +log_section() +{ + echo + echo "################################################################################" + echo "TEST SECTION: $*" + echo "################################################################################" +} + +ip_expand_args() +{ + local nsname=$1 + local nsarg="" + + if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then + nsarg="-netns ${nsname}" + fi + + echo "${nsarg}" +} + +vrf_count() +{ + local nsname=$1 + local nsarg="$(ip_expand_args ${nsname})" + + ip ${nsarg} -o link show type vrf | wc -l +} + +count_vrf_by_table_id() +{ + local nsname=$1 + local tableid=$2 + local nsarg="$(ip_expand_args ${nsname})" + + ip ${nsarg} -d -o link show type vrf | grep "table ${tableid}" | wc -l +} + +add_vrf() +{ + local nsname=$1 + local vrfname=$2 + local vrftable=$3 + local nsarg="$(ip_expand_args ${nsname})" + + ip ${nsarg} link add ${vrfname} type vrf table ${vrftable} &>/dev/null +} + +add_vrf_and_check() +{ + local nsname=$1 + local vrfname=$2 + local vrftable=$3 + local cnt + local rc + + add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$? + + cnt=$(count_vrf_by_table_id ${nsname} ${vrftable}) + + log_test ${rc} 0 "${nsname}: add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}" +} + +add_vrf_and_check_fail() +{ + local nsname=$1 + local vrfname=$2 + local vrftable=$3 + local cnt + local rc + + add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$? + + cnt=$(count_vrf_by_table_id ${nsname} ${vrftable}) + + log_test ${rc} 2 "${nsname}: CANNOT add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}" +} + +del_vrf_and_check() +{ + local nsname=$1 + local vrfname=$2 + local nsarg="$(ip_expand_args ${nsname})" + + ip ${nsarg} link del ${vrfname} + log_test $? 0 "${nsname}: remove vrf ${vrfname}" +} + +config_vrf_and_check() +{ + local nsname=$1 + local addr=$2 + local vrfname=$3 + local nsarg="$(ip_expand_args ${nsname})" + + ip ${nsarg} link set dev ${vrfname} up && \ + ip ${nsarg} addr add ${addr} dev ${vrfname} + log_test $? 0 "${nsname}: vrf ${vrfname} up, addr ${addr}" +} + +read_strict_mode() +{ + local nsname=$1 + local rval + local rc=0 + local nsexec="" + + if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then + # a custom network namespace is provided + nsexec="ip netns exec ${nsname}" + fi + + rval="$(${nsexec} bash -c "cat /proc/sys/net/vrf/strict_mode" | \ + grep -E "^[0-1]$")" &> /dev/null + if [ $? -ne 0 ]; then + # set errors + rval=255 + rc=1 + fi + + # on success, rval can be only 0 or 1; on error, rval is equal to 255 + echo ${rval} + return ${rc} +} + +read_strict_mode_compare_and_check() +{ + local nsname=$1 + local expected=$2 + local res + + res="$(read_strict_mode ${nsname})" + log_test ${res} ${expected} "${nsname}: check strict_mode=${res}" +} + +set_strict_mode() +{ + local nsname=$1 + local val=$2 + local nsexec="" + + if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then + # a custom network namespace is provided + nsexec="ip netns exec ${nsname}" + fi + + ${nsexec} bash -c "echo ${val} >/proc/sys/net/vrf/strict_mode" &>/dev/null +} + +enable_strict_mode() +{ + local nsname=$1 + + set_strict_mode ${nsname} 1 +} + +disable_strict_mode() +{ + local nsname=$1 + + set_strict_mode ${nsname} 0 +} + +disable_strict_mode_and_check() +{ + local nsname=$1 + + disable_strict_mode ${nsname} + log_test $? 0 "${nsname}: disable strict_mode (=0)" +} + +enable_strict_mode_and_check() +{ + local nsname=$1 + + enable_strict_mode ${nsname} + log_test $? 0 "${nsname}: enable strict_mode (=1)" +} + +enable_strict_mode_and_check_fail() +{ + local nsname=$1 + + enable_strict_mode ${nsname} + log_test $? 1 "${nsname}: CANNOT enable strict_mode" +} + +strict_mode_check_default() +{ + local nsname=$1 + local strictmode + local vrfcnt + + vrfcnt=$(vrf_count ${nsname}) + strictmode=$(read_strict_mode ${nsname}) + log_test ${strictmode} 0 "${nsname}: strict_mode=0 by default, ${vrfcnt} vrfs" +} + +setup() +{ + modprobe vrf + + ip netns add testns + ip netns exec testns ip link set lo up +} + +cleanup() +{ + ip netns del testns 2>/dev/null + + ip link del vrf100 2>/dev/null + ip link del vrf101 2>/dev/null + ip link del vrf102 2>/dev/null + + echo 0 >/proc/sys/net/vrf/strict_mode 2>/dev/null +} + +vrf_strict_mode_tests_init() +{ + vrf_strict_mode_check_support init + + strict_mode_check_default init + + add_vrf_and_check init vrf100 100 + config_vrf_and_check init 172.16.100.1/24 vrf100 + + enable_strict_mode_and_check init + + add_vrf_and_check_fail init vrf101 100 + + disable_strict_mode_and_check init + + add_vrf_and_check init vrf101 100 + config_vrf_and_check init 172.16.101.1/24 vrf101 + + enable_strict_mode_and_check_fail init + + del_vrf_and_check init vrf101 + + enable_strict_mode_and_check init + + add_vrf_and_check init vrf102 102 + config_vrf_and_check init 172.16.102.1/24 vrf102 + + # the strict_modle is enabled in the init +} + +vrf_strict_mode_tests_testns() +{ + vrf_strict_mode_check_support testns + + strict_mode_check_default testns + + enable_strict_mode_and_check testns + + add_vrf_and_check testns vrf100 100 + config_vrf_and_check testns 10.0.100.1/24 vrf100 + + add_vrf_and_check_fail testns vrf101 100 + + add_vrf_and_check_fail testns vrf102 100 + + add_vrf_and_check testns vrf200 200 + + disable_strict_mode_and_check testns + + add_vrf_and_check testns vrf101 100 + + add_vrf_and_check testns vrf102 100 + + #the strict_mode is disabled in the testns +} + +vrf_strict_mode_tests_mix() +{ + read_strict_mode_compare_and_check init 1 + + read_strict_mode_compare_and_check testns 0 + + del_vrf_and_check testns vrf101 + + del_vrf_and_check testns vrf102 + + disable_strict_mode_and_check init + + enable_strict_mode_and_check testns + + enable_strict_mode_and_check init + enable_strict_mode_and_check init + + disable_strict_mode_and_check testns + disable_strict_mode_and_check testns + + read_strict_mode_compare_and_check init 1 + + read_strict_mode_compare_and_check testns 0 +} + +vrf_strict_mode_tests() +{ + log_section "VRF strict_mode test on init network namespace" + vrf_strict_mode_tests_init + + log_section "VRF strict_mode test on testns network namespace" + vrf_strict_mode_tests_testns + + log_section "VRF strict_mode test mixing init and testns network namespaces" + vrf_strict_mode_tests_mix +} + +vrf_strict_mode_check_support() +{ + local nsname=$1 + local output + local rc + + output="$(lsmod | grep '^vrf' | awk '{print $1}')" + if [ -z "${output}" ]; then + modinfo vrf || return $? + fi + + # we do not care about the value of the strict_mode; we only check if + # the strict_mode parameter is available or not. + read_strict_mode ${nsname} &>/dev/null; rc=$? + log_test ${rc} 0 "${nsname}: net.vrf.strict_mode is available" + + return ${rc} +} + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit 0 +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit 0 +fi + +cleanup &> /dev/null + +setup +vrf_strict_mode_tests +cleanup + +print_log_test_results + +exit $ret |