summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/bpf/bpftool/.gitignore5
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-btf.rst5
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-link.rst13
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-map.rst8
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-prog.rst11
-rw-r--r--tools/bpf/bpftool/Makefile66
-rw-r--r--tools/bpf/bpftool/btf.c6
-rw-r--r--tools/bpf/bpftool/common.c344
-rw-r--r--tools/bpf/bpftool/feature.c4
-rw-r--r--tools/bpf/bpftool/link.c11
-rw-r--r--tools/bpf/bpftool/main.c12
-rw-r--r--tools/bpf/bpftool/main.h125
-rw-r--r--tools/bpf/bpftool/map.c167
-rw-r--r--tools/bpf/bpftool/pids.c231
-rw-r--r--tools/bpf/bpftool/prog.c193
-rw-r--r--tools/bpf/bpftool/skeleton/pid_iter.bpf.c80
-rw-r--r--tools/bpf/bpftool/skeleton/pid_iter.h12
-rw-r--r--tools/bpf/bpftool/skeleton/profiler.bpf.c7
-rw-r--r--tools/bpf/bpftool/skeleton/profiler.h46
-rw-r--r--tools/build/feature/Makefile4
-rw-r--r--tools/build/feature/test-clang-bpf-co-re.c9
-rw-r--r--tools/build/feature/test-clang-bpf-global-var.c4
-rw-r--r--tools/include/uapi/linux/bpf.h271
-rw-r--r--tools/lib/bpf/bpf_core_read.h8
-rw-r--r--tools/lib/bpf/bpf_endian.h43
-rw-r--r--tools/lib/bpf/bpf_helpers.h1
-rw-r--r--tools/lib/bpf/btf.h5
-rw-r--r--tools/lib/bpf/libbpf.c663
-rw-r--r--tools/lib/bpf/libbpf.h32
-rw-r--r--tools/lib/bpf/libbpf.map19
-rw-r--r--tools/testing/selftests/bpf/Makefile9
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c157
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/autoload.c41
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c85
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/connect_force_port.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/endian.c53
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_ptr.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_rtt.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/varlen.c68
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter.h80
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c18
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_netlink.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task.c18
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_file.c20
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c37
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c234
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c250
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c17
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c17
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h18
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp4.c71
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp6.c79
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h51
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_prog.c27
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c686
-rw-r--r--tools/testing/selftests/bpf/progs/test_autoload.c40
-rw-r--r--tools/testing/selftests/bpf/progs/test_endian.c37
-rw-r--r--tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_varlen.c158
-rw-r--r--tools/testing/selftests/bpf/progs/test_vmlinux.c16
-rw-r--r--tools/testing/selftests/bpf/test_progs.c59
-rw-r--r--tools/testing/selftests/bpf/test_progs.h4
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ptr.c62
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ptr_mixing.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/value_ptr_arith.c38
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh786
-rw-r--r--tools/testing/selftests/net/Makefile2
-rwxr-xr-xtools/testing/selftests/net/devlink_port_split.py277
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool.sh17
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_extended_state.sh102
-rw-r--r--tools/testing/selftests/net/forwarding/ethtool_lib.sh17
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample3
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_l4port.sh198
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_red.sh492
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile2
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh121
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c22
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh65
-rw-r--r--tools/testing/selftests/net/rxtimestamp.c122
-rwxr-xr-xtools/testing/selftests/net/rxtimestamp.sh4
-rwxr-xr-xtools/testing/selftests/net/vrf_strict_mode_test.sh390
87 files changed, 6618 insertions, 1069 deletions
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index 26cde83e1ca3..3e601bcfd461 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -1,10 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
*.d
-/_bpftool
+/bpftool-bootstrap
/bpftool
bpftool*.8
bpf-helpers.*
FEATURE-DUMP.bpftool
feature
libbpf
-profiler.skel.h
+/*.skel.h
+/vmlinux.h
diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index ce3a724f50c1..896f4c6c2870 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -36,6 +36,11 @@ DESCRIPTION
otherwise list all BTF objects currently loaded on the
system.
+ Since Linux 5.8 bpftool is able to discover information about
+ processes that hold open file descriptors (FDs) against BTF
+ objects. On such kernels bpftool will automatically emit this
+ information as well.
+
**bpftool btf dump** *BTF_SRC*
Dump BTF entries from a given *BTF_SRC*.
diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
index 0e43d7b06c11..38b0949a185b 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -37,6 +37,11 @@ DESCRIPTION
zero or more named attributes, some of which depend on type
of link.
+ Since Linux 5.8 bpftool is able to discover information about
+ processes that hold open file descriptors (FDs) against BPF
+ links. On such kernels bpftool will automatically emit this
+ information as well.
+
**bpftool link pin** *LINK* *FILE*
Pin link *LINK* as *FILE*.
@@ -82,6 +87,7 @@ EXAMPLES
10: cgroup prog 25
cgroup_id 614 attach_type egress
+ pids test_progs(223)
**# bpftool --json --pretty link show**
@@ -91,7 +97,12 @@ EXAMPLES
"type": "cgroup",
"prog_id": 25,
"cgroup_id": 614,
- "attach_type": "egress"
+ "attach_type": "egress",
+ "pids": [{
+ "pid": 223,
+ "comm": "test_progs"
+ }
+ ]
}
]
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 70c78faa47ab..41e2a74252d0 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -62,6 +62,11 @@ DESCRIPTION
Output will start with map ID followed by map type and
zero or more named attributes (depending on kernel version).
+ Since Linux 5.8 bpftool is able to discover information about
+ processes that hold open file descriptors (FDs) against BPF
+ maps. On such kernels bpftool will automatically emit this
+ information as well.
+
**bpftool map create** *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* **entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
Create a new map with given parameters and pin it to *bpffs*
as *FILE*.
@@ -180,7 +185,8 @@ EXAMPLES
::
10: hash name some_map flags 0x0
- key 4B value 8B max_entries 2048 memlock 167936B
+ key 4B value 8B max_entries 2048 memlock 167936B
+ pids systemd(1)
The following three commands are equivalent:
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 2b254959d488..412ea3d9bf7f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -75,6 +75,11 @@ DESCRIPTION
program run. Activation or deactivation of the feature is
performed via the **kernel.bpf_stats_enabled** sysctl knob.
+ Since Linux 5.8 bpftool is able to discover information about
+ processes that hold open file descriptors (FDs) against BPF
+ programs. On such kernels bpftool will automatically emit this
+ information as well.
+
**bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }]
Dump eBPF instructions of the programs from the kernel. By
default, eBPF will be disassembled and printed to standard
@@ -243,6 +248,7 @@ EXAMPLES
10: xdp name some_prog tag 005a3d2123620c8b gpl run_time_ns 81632 run_cnt 10
loaded_at 2017-09-29T20:11:00+0000 uid 0
xlated 528B jited 370B memlock 4096B map_ids 10
+ pids systemd(1)
**# bpftool --json --pretty prog show**
@@ -262,6 +268,11 @@ EXAMPLES
"bytes_jited": 370,
"bytes_memlock": 4096,
"map_ids": [10
+ ],
+ "pids": [{
+ "pid": 1,
+ "comm": "systemd"
+ }
]
}
]
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 9e85f101be85..51bd520ed437 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -40,8 +40,9 @@ bash_compdir ?= /usr/share/bash-completion/completions
CFLAGS += -O2
CFLAGS += -W -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers
-CFLAGS += $(filter-out -Wswitch-enum,$(EXTRA_WARNINGS))
+CFLAGS += $(filter-out -Wswitch-enum -Wnested-externs,$(EXTRA_WARNINGS))
CFLAGS += -DPACKAGE='"bpftool"' -D__EXPORTED_HEADERS__ \
+ -I$(if $(OUTPUT),$(OUTPUT),.) \
-I$(srctree)/kernel/bpf/ \
-I$(srctree)/tools/include \
-I$(srctree)/tools/include/uapi \
@@ -61,9 +62,9 @@ CLANG ?= clang
FEATURE_USER = .bpftool
FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib libcap \
- clang-bpf-global-var
+ clang-bpf-co-re
FEATURE_DISPLAY = libbfd disassembler-four-args zlib libcap \
- clang-bpf-global-var
+ clang-bpf-co-re
check_feat := 1
NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
@@ -116,40 +117,60 @@ CFLAGS += -DHAVE_LIBBFD_SUPPORT
SRCS += $(BFD_SRCS)
endif
+BPFTOOL_BOOTSTRAP := $(if $(OUTPUT),$(OUTPUT)bpftool-bootstrap,./bpftool-bootstrap)
+
+BOOTSTRAP_OBJS = $(addprefix $(OUTPUT),main.o common.o json_writer.o gen.o btf.o)
OBJS = $(patsubst %.c,$(OUTPUT)%.o,$(SRCS)) $(OUTPUT)disasm.o
-_OBJS = $(filter-out $(OUTPUT)prog.o,$(OBJS)) $(OUTPUT)_prog.o
-ifeq ($(feature-clang-bpf-global-var),1)
- __OBJS = $(OBJS)
-else
- __OBJS = $(_OBJS)
-endif
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
+ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+ ../../../vmlinux \
+ /sys/kernel/btf/vmlinux \
+ /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
-$(OUTPUT)_prog.o: prog.c
- $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $<
+ifneq ($(VMLINUX_BTF)$(VMLINUX_H),)
+ifeq ($(feature-clang-bpf-co-re),1)
-$(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS)
+BUILD_BPF_SKELS := 1
-skeleton/profiler.bpf.o: skeleton/profiler.bpf.c $(LIBBPF)
+$(OUTPUT)vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL_BOOTSTRAP)
+ifeq ($(VMLINUX_H),)
+ $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) btf dump file $< format c > $@
+else
+ $(Q)cp "$(VMLINUX_H)" $@
+endif
+
+$(OUTPUT)%.bpf.o: skeleton/%.bpf.c $(OUTPUT)vmlinux.h $(LIBBPF)
$(QUIET_CLANG)$(CLANG) \
+ -I$(if $(OUTPUT),$(OUTPUT),.) \
-I$(srctree)/tools/include/uapi/ \
- -I$(LIBBPF_PATH) -I$(srctree)/tools/lib \
+ -I$(LIBBPF_PATH) \
+ -I$(srctree)/tools/lib \
-g -O2 -target bpf -c $< -o $@
-profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o
- $(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@
+$(OUTPUT)%.skel.h: $(OUTPUT)%.bpf.o $(BPFTOOL_BOOTSTRAP)
+ $(QUIET_GEN)$(BPFTOOL_BOOTSTRAP) gen skeleton $< > $@
-$(OUTPUT)prog.o: prog.c profiler.skel.h
- $(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
+$(OUTPUT)prog.o: $(OUTPUT)profiler.skel.h
+
+$(OUTPUT)pids.o: $(OUTPUT)pid_iter.skel.h
+
+endif
+endif
+
+CFLAGS += $(if $(BUILD_BPF_SKELS),,-DBPFTOOL_WITHOUT_SKELETONS)
$(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
$(OUTPUT)feature.o: | zdep
-$(OUTPUT)bpftool: $(__OBJS) $(LIBBPF)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS)
+$(BPFTOOL_BOOTSTRAP): $(BOOTSTRAP_OBJS) $(LIBBPF)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(BOOTSTRAP_OBJS) $(LIBS)
+
+$(OUTPUT)bpftool: $(OBJS) $(LIBBPF)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(OBJS) $(LIBS)
$(OUTPUT)%.o: %.c
$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
@@ -157,7 +178,7 @@ $(OUTPUT)%.o: %.c
clean: $(LIBBPF)-clean
$(call QUIET_CLEAN, bpftool)
$(Q)$(RM) -- $(OUTPUT)bpftool $(OUTPUT)*.o $(OUTPUT)*.d
- $(Q)$(RM) -- $(OUTPUT)_bpftool profiler.skel.h skeleton/profiler.bpf.o
+ $(Q)$(RM) -- $(BPFTOOL_BOOTSTRAP) $(OUTPUT)*.skel.h $(OUTPUT)vmlinux.h
$(Q)$(RM) -r -- $(OUTPUT)libbpf/
$(call QUIET_CLEAN, core-gen)
$(Q)$(RM) -- $(OUTPUT)FEATURE-DUMP.bpftool
@@ -192,6 +213,7 @@ FORCE:
zdep:
@if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi
+.SECONDARY:
.PHONY: all FORCE clean install uninstall zdep
.PHONY: doc doc-clean doc-install doc-uninstall
.DEFAULT_GOAL := all
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index faac8189b285..fc9bc7a23db6 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -809,6 +809,7 @@ show_btf_plain(struct bpf_btf_info *info, int fd,
printf("%s%u", n++ == 0 ? " map_ids " : ",",
obj->obj_id);
}
+ emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
printf("\n");
}
@@ -841,6 +842,9 @@ show_btf_json(struct bpf_btf_info *info, int fd,
jsonw_uint(json_wtr, obj->obj_id);
}
jsonw_end_array(json_wtr); /* map_ids */
+
+ emit_obj_refs_json(&refs_table, info->id, json_wtr); /* pids */
+
jsonw_end_object(json_wtr); /* btf object */
}
@@ -893,6 +897,7 @@ static int do_show(int argc, char **argv)
close(fd);
return err;
}
+ build_obj_refs_table(&refs_table, BPF_OBJ_BTF);
if (fd >= 0) {
err = show_btf(fd, &btf_prog_table, &btf_map_table);
@@ -939,6 +944,7 @@ static int do_show(int argc, char **argv)
exit_free:
delete_btf_table(&btf_prog_table);
delete_btf_table(&btf_map_table);
+ delete_obj_refs_table(&refs_table);
return err;
}
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index c47bdc65de8e..18e5604fe260 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -29,6 +29,42 @@
#define BPF_FS_MAGIC 0xcafe4a11
#endif
+const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
+ [BPF_CGROUP_INET_INGRESS] = "ingress",
+ [BPF_CGROUP_INET_EGRESS] = "egress",
+ [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
+ [BPF_CGROUP_SOCK_OPS] = "sock_ops",
+ [BPF_CGROUP_DEVICE] = "device",
+ [BPF_CGROUP_INET4_BIND] = "bind4",
+ [BPF_CGROUP_INET6_BIND] = "bind6",
+ [BPF_CGROUP_INET4_CONNECT] = "connect4",
+ [BPF_CGROUP_INET6_CONNECT] = "connect6",
+ [BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
+ [BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
+ [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4",
+ [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6",
+ [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4",
+ [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6",
+ [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
+ [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
+ [BPF_CGROUP_SYSCTL] = "sysctl",
+ [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
+ [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
+ [BPF_CGROUP_GETSOCKOPT] = "getsockopt",
+ [BPF_CGROUP_SETSOCKOPT] = "setsockopt",
+
+ [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
+ [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
+ [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
+ [BPF_LIRC_MODE2] = "lirc_mode2",
+ [BPF_FLOW_DISSECTOR] = "flow_dissector",
+ [BPF_TRACE_RAW_TP] = "raw_tp",
+ [BPF_TRACE_FENTRY] = "fentry",
+ [BPF_TRACE_FEXIT] = "fexit",
+ [BPF_MODIFY_RETURN] = "mod_ret",
+ [BPF_LSM_MAC] = "lsm_mac",
+};
+
void p_err(const char *fmt, ...)
{
va_list ap;
@@ -581,3 +617,311 @@ print_all_levels(__maybe_unused enum libbpf_print_level level,
{
return vfprintf(stderr, format, args);
}
+
+static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
+{
+ unsigned int id = 0;
+ int fd, nb_fds = 0;
+ void *tmp;
+ int err;
+
+ while (true) {
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+
+ err = bpf_prog_get_next_id(id, &id);
+ if (err) {
+ if (errno != ENOENT) {
+ p_err("%s", strerror(errno));
+ goto err_close_fds;
+ }
+ return nb_fds;
+ }
+
+ fd = bpf_prog_get_fd_by_id(id);
+ if (fd < 0) {
+ p_err("can't get prog by id (%u): %s",
+ id, strerror(errno));
+ goto err_close_fds;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ p_err("can't get prog info (%u): %s",
+ id, strerror(errno));
+ goto err_close_fd;
+ }
+
+ if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) ||
+ (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) {
+ close(fd);
+ continue;
+ }
+
+ if (nb_fds > 0) {
+ tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
+ if (!tmp) {
+ p_err("failed to realloc");
+ goto err_close_fd;
+ }
+ *fds = tmp;
+ }
+ (*fds)[nb_fds++] = fd;
+ }
+
+err_close_fd:
+ close(fd);
+err_close_fds:
+ while (--nb_fds >= 0)
+ close((*fds)[nb_fds]);
+ return -1;
+}
+
+int prog_parse_fds(int *argc, char ***argv, int **fds)
+{
+ if (is_prefix(**argv, "id")) {
+ unsigned int id;
+ char *endptr;
+
+ NEXT_ARGP();
+
+ id = strtoul(**argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as ID", **argv);
+ return -1;
+ }
+ NEXT_ARGP();
+
+ (*fds)[0] = bpf_prog_get_fd_by_id(id);
+ if ((*fds)[0] < 0) {
+ p_err("get by id (%u): %s", id, strerror(errno));
+ return -1;
+ }
+ return 1;
+ } else if (is_prefix(**argv, "tag")) {
+ unsigned char tag[BPF_TAG_SIZE];
+
+ NEXT_ARGP();
+
+ if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2,
+ tag + 3, tag + 4, tag + 5, tag + 6, tag + 7)
+ != BPF_TAG_SIZE) {
+ p_err("can't parse tag");
+ return -1;
+ }
+ NEXT_ARGP();
+
+ return prog_fd_by_nametag(tag, fds, true);
+ } else if (is_prefix(**argv, "name")) {
+ char *name;
+
+ NEXT_ARGP();
+
+ name = **argv;
+ if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
+ p_err("can't parse name");
+ return -1;
+ }
+ NEXT_ARGP();
+
+ return prog_fd_by_nametag(name, fds, false);
+ } else if (is_prefix(**argv, "pinned")) {
+ char *path;
+
+ NEXT_ARGP();
+
+ path = **argv;
+ NEXT_ARGP();
+
+ (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG);
+ if ((*fds)[0] < 0)
+ return -1;
+ return 1;
+ }
+
+ p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv);
+ return -1;
+}
+
+int prog_parse_fd(int *argc, char ***argv)
+{
+ int *fds = NULL;
+ int nb_fds, fd;
+
+ fds = malloc(sizeof(int));
+ if (!fds) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+ nb_fds = prog_parse_fds(argc, argv, &fds);
+ if (nb_fds != 1) {
+ if (nb_fds > 1) {
+ p_err("several programs match this handle");
+ while (nb_fds--)
+ close(fds[nb_fds]);
+ }
+ fd = -1;
+ goto exit_free;
+ }
+
+ fd = fds[0];
+exit_free:
+ free(fds);
+ return fd;
+}
+
+static int map_fd_by_name(char *name, int **fds)
+{
+ unsigned int id = 0;
+ int fd, nb_fds = 0;
+ void *tmp;
+ int err;
+
+ while (true) {
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+
+ err = bpf_map_get_next_id(id, &id);
+ if (err) {
+ if (errno != ENOENT) {
+ p_err("%s", strerror(errno));
+ goto err_close_fds;
+ }
+ return nb_fds;
+ }
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd < 0) {
+ p_err("can't get map by id (%u): %s",
+ id, strerror(errno));
+ goto err_close_fds;
+ }
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ p_err("can't get map info (%u): %s",
+ id, strerror(errno));
+ goto err_close_fd;
+ }
+
+ if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) {
+ close(fd);
+ continue;
+ }
+
+ if (nb_fds > 0) {
+ tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
+ if (!tmp) {
+ p_err("failed to realloc");
+ goto err_close_fd;
+ }
+ *fds = tmp;
+ }
+ (*fds)[nb_fds++] = fd;
+ }
+
+err_close_fd:
+ close(fd);
+err_close_fds:
+ while (--nb_fds >= 0)
+ close((*fds)[nb_fds]);
+ return -1;
+}
+
+int map_parse_fds(int *argc, char ***argv, int **fds)
+{
+ if (is_prefix(**argv, "id")) {
+ unsigned int id;
+ char *endptr;
+
+ NEXT_ARGP();
+
+ id = strtoul(**argv, &endptr, 0);
+ if (*endptr) {
+ p_err("can't parse %s as ID", **argv);
+ return -1;
+ }
+ NEXT_ARGP();
+
+ (*fds)[0] = bpf_map_get_fd_by_id(id);
+ if ((*fds)[0] < 0) {
+ p_err("get map by id (%u): %s", id, strerror(errno));
+ return -1;
+ }
+ return 1;
+ } else if (is_prefix(**argv, "name")) {
+ char *name;
+
+ NEXT_ARGP();
+
+ name = **argv;
+ if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
+ p_err("can't parse name");
+ return -1;
+ }
+ NEXT_ARGP();
+
+ return map_fd_by_name(name, fds);
+ } else if (is_prefix(**argv, "pinned")) {
+ char *path;
+
+ NEXT_ARGP();
+
+ path = **argv;
+ NEXT_ARGP();
+
+ (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP);
+ if ((*fds)[0] < 0)
+ return -1;
+ return 1;
+ }
+
+ p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv);
+ return -1;
+}
+
+int map_parse_fd(int *argc, char ***argv)
+{
+ int *fds = NULL;
+ int nb_fds, fd;
+
+ fds = malloc(sizeof(int));
+ if (!fds) {
+ p_err("mem alloc failed");
+ return -1;
+ }
+ nb_fds = map_parse_fds(argc, argv, &fds);
+ if (nb_fds != 1) {
+ if (nb_fds > 1) {
+ p_err("several maps match this handle");
+ while (nb_fds--)
+ close(fds[nb_fds]);
+ }
+ fd = -1;
+ goto exit_free;
+ }
+
+ fd = fds[0];
+exit_free:
+ free(fds);
+ return fd;
+}
+
+int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
+{
+ int err;
+ int fd;
+
+ fd = map_parse_fd(argc, argv);
+ if (fd < 0)
+ return -1;
+
+ err = bpf_obj_get_info_by_fd(fd, info, info_len);
+ if (err) {
+ p_err("can't get map info: %s", strerror(errno));
+ close(fd);
+ return err;
+ }
+
+ return fd;
+}
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 768bf77df886..1cd75807673e 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -695,7 +695,7 @@ section_program_types(bool *supported_types, const char *define_prefix,
"/*** eBPF program types ***/",
define_prefix);
- for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
+ for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++)
probe_prog_type(i, supported_types, define_prefix, ifindex);
print_end_section();
@@ -741,7 +741,7 @@ section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex)
" %sBPF__PROG_TYPE_ ## prog_type ## __HELPER_ ## helper\n",
define_prefix, define_prefix, define_prefix,
define_prefix);
- for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
+ for (i = BPF_PROG_TYPE_UNSPEC + 1; i < prog_type_name_size; i++)
probe_helpers_for_progtype(i, supported_types[i], define_prefix,
ifindex);
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index fca57ee8fafe..326b8fdf0243 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -108,7 +108,7 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
if (err)
return err;
- if (prog_info.type < ARRAY_SIZE(prog_type_name))
+ if (prog_info.type < prog_type_name_size)
jsonw_string_field(json_wtr, "prog_type",
prog_type_name[prog_info.type]);
else
@@ -143,6 +143,9 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
}
jsonw_end_array(json_wtr);
}
+
+ emit_obj_refs_json(&refs_table, info->id, json_wtr);
+
jsonw_end_object(json_wtr);
return 0;
@@ -184,7 +187,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
if (err)
return err;
- if (prog_info.type < ARRAY_SIZE(prog_type_name))
+ if (prog_info.type < prog_type_name_size)
printf("\n\tprog_type %s ",
prog_type_name[prog_info.type]);
else
@@ -212,6 +215,7 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
printf("\n\tpinned %s", obj->path);
}
}
+ emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
printf("\n");
@@ -257,6 +261,7 @@ static int do_show(int argc, char **argv)
if (show_pinned)
build_pinned_obj_table(&link_table, BPF_OBJ_LINK);
+ build_obj_refs_table(&refs_table, BPF_OBJ_LINK);
if (argc == 2) {
fd = link_parse_fd(&argc, &argv);
@@ -296,6 +301,8 @@ static int do_show(int argc, char **argv)
if (json_output)
jsonw_end_array(json_wtr);
+ delete_obj_refs_table(&refs_table);
+
return errno == ENOENT ? 0 : -1;
}
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 46bd716a9d86..4a191fcbeb82 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -31,6 +31,7 @@ bool relaxed_maps;
struct pinned_obj_table prog_table;
struct pinned_obj_table map_table;
struct pinned_obj_table link_table;
+struct obj_refs_table refs_table;
static void __noreturn clean_and_exit(int i)
{
@@ -92,9 +93,16 @@ int cmd_select(const struct cmd *cmds, int argc, char **argv,
if (argc < 1 && cmds[0].func)
return cmds[0].func(argc, argv);
- for (i = 0; cmds[i].func; i++)
- if (is_prefix(*argv, cmds[i].cmd))
+ for (i = 0; cmds[i].cmd; i++) {
+ if (is_prefix(*argv, cmds[i].cmd)) {
+ if (!cmds[i].func) {
+ p_err("command '%s' is not supported in bootstrap mode",
+ cmds[i].cmd);
+ return -1;
+ }
return cmds[i].func(argc - 1, argv + 1);
+ }
+ }
help(argc - 1, argv + 1);
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 5cdf0bc049bd..78d34e860713 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -56,82 +56,21 @@
#define HELP_SPEC_LINK \
"LINK := { id LINK_ID | pinned FILE }"
-static const char * const prog_type_name[] = {
- [BPF_PROG_TYPE_UNSPEC] = "unspec",
- [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
- [BPF_PROG_TYPE_KPROBE] = "kprobe",
- [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
- [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
- [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
- [BPF_PROG_TYPE_XDP] = "xdp",
- [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
- [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
- [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
- [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
- [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
- [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
- [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
- [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
- [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
- [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
- [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
- [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
- [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
- [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
- [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
- [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
- [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
- [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
- [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
- [BPF_PROG_TYPE_TRACING] = "tracing",
- [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
- [BPF_PROG_TYPE_EXT] = "ext",
-};
+extern const char * const prog_type_name[];
+extern const size_t prog_type_name_size;
-static const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
- [BPF_CGROUP_INET_INGRESS] = "ingress",
- [BPF_CGROUP_INET_EGRESS] = "egress",
- [BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
- [BPF_CGROUP_SOCK_OPS] = "sock_ops",
- [BPF_CGROUP_DEVICE] = "device",
- [BPF_CGROUP_INET4_BIND] = "bind4",
- [BPF_CGROUP_INET6_BIND] = "bind6",
- [BPF_CGROUP_INET4_CONNECT] = "connect4",
- [BPF_CGROUP_INET6_CONNECT] = "connect6",
- [BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
- [BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
- [BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4",
- [BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6",
- [BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4",
- [BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6",
- [BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
- [BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
- [BPF_CGROUP_SYSCTL] = "sysctl",
- [BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
- [BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
- [BPF_CGROUP_GETSOCKOPT] = "getsockopt",
- [BPF_CGROUP_SETSOCKOPT] = "setsockopt",
-
- [BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
- [BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
- [BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
- [BPF_LIRC_MODE2] = "lirc_mode2",
- [BPF_FLOW_DISSECTOR] = "flow_dissector",
- [BPF_TRACE_RAW_TP] = "raw_tp",
- [BPF_TRACE_FENTRY] = "fentry",
- [BPF_TRACE_FEXIT] = "fexit",
- [BPF_MODIFY_RETURN] = "mod_ret",
- [BPF_LSM_MAC] = "lsm_mac",
-};
+extern const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE];
extern const char * const map_type_name[];
extern const size_t map_type_name_size;
+/* keep in sync with the definition in skeleton/pid_iter.bpf.c */
enum bpf_obj_type {
BPF_OBJ_UNKNOWN,
BPF_OBJ_PROG,
BPF_OBJ_MAP,
BPF_OBJ_LINK,
+ BPF_OBJ_BTF,
};
extern const char *bin_name;
@@ -139,12 +78,14 @@ extern const char *bin_name;
extern json_writer_t *json_wtr;
extern bool json_output;
extern bool show_pinned;
+extern bool show_pids;
extern bool block_mount;
extern bool verifier_logs;
extern bool relaxed_maps;
extern struct pinned_obj_table prog_table;
extern struct pinned_obj_table map_table;
extern struct pinned_obj_table link_table;
+extern struct obj_refs_table refs_table;
void __printf(1, 2) p_err(const char *fmt, ...);
void __printf(1, 2) p_info(const char *fmt, ...);
@@ -168,12 +109,35 @@ struct pinned_obj {
struct hlist_node hash;
};
+struct obj_refs_table {
+ DECLARE_HASHTABLE(table, 16);
+};
+
+struct obj_ref {
+ int pid;
+ char comm[16];
+};
+
+struct obj_refs {
+ struct hlist_node node;
+ __u32 id;
+ int ref_cnt;
+ struct obj_ref *refs;
+};
+
struct btf;
struct bpf_line_info;
int build_pinned_obj_table(struct pinned_obj_table *table,
enum bpf_obj_type type);
void delete_pinned_obj_table(struct pinned_obj_table *tab);
+__weak int build_obj_refs_table(struct obj_refs_table *table,
+ enum bpf_obj_type type);
+__weak void delete_obj_refs_table(struct obj_refs_table *table);
+__weak void emit_obj_refs_json(struct obj_refs_table *table, __u32 id,
+ json_writer_t *json_wtr);
+__weak void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id,
+ const char *prefix);
void print_dev_plain(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
void print_dev_json(__u32 ifindex, __u64 ns_dev, __u64 ns_inode);
@@ -194,23 +158,28 @@ int mount_bpffs_for_pin(const char *name);
int do_pin_any(int argc, char **argv, int (*get_fd_by_id)(int *, char ***));
int do_pin_fd(int fd, const char *name);
-int do_prog(int argc, char **arg);
-int do_map(int argc, char **arg);
-int do_link(int argc, char **arg);
-int do_event_pipe(int argc, char **argv);
-int do_cgroup(int argc, char **arg);
-int do_perf(int argc, char **arg);
-int do_net(int argc, char **arg);
-int do_tracelog(int argc, char **arg);
-int do_feature(int argc, char **argv);
-int do_btf(int argc, char **argv);
+/* commands available in bootstrap mode */
int do_gen(int argc, char **argv);
-int do_struct_ops(int argc, char **argv);
-int do_iter(int argc, char **argv);
+int do_btf(int argc, char **argv);
+
+/* non-bootstrap only commands */
+int do_prog(int argc, char **arg) __weak;
+int do_map(int argc, char **arg) __weak;
+int do_link(int argc, char **arg) __weak;
+int do_event_pipe(int argc, char **argv) __weak;
+int do_cgroup(int argc, char **arg) __weak;
+int do_perf(int argc, char **arg) __weak;
+int do_net(int argc, char **arg) __weak;
+int do_tracelog(int argc, char **arg) __weak;
+int do_feature(int argc, char **argv) __weak;
+int do_struct_ops(int argc, char **argv) __weak;
+int do_iter(int argc, char **argv) __weak;
int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
int prog_parse_fd(int *argc, char ***argv);
+int prog_parse_fds(int *argc, char ***argv, int **fds);
int map_parse_fd(int *argc, char ***argv);
+int map_parse_fds(int *argc, char ***argv, int **fds);
int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len);
struct bpf_prog_linfo;
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 1d3b60651078..3a27d31a1856 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -93,162 +93,6 @@ static void *alloc_value(struct bpf_map_info *info)
return malloc(info->value_size);
}
-static int map_fd_by_name(char *name, int **fds)
-{
- unsigned int id = 0;
- int fd, nb_fds = 0;
- void *tmp;
- int err;
-
- while (true) {
- struct bpf_map_info info = {};
- __u32 len = sizeof(info);
-
- err = bpf_map_get_next_id(id, &id);
- if (err) {
- if (errno != ENOENT) {
- p_err("%s", strerror(errno));
- goto err_close_fds;
- }
- return nb_fds;
- }
-
- fd = bpf_map_get_fd_by_id(id);
- if (fd < 0) {
- p_err("can't get map by id (%u): %s",
- id, strerror(errno));
- goto err_close_fds;
- }
-
- err = bpf_obj_get_info_by_fd(fd, &info, &len);
- if (err) {
- p_err("can't get map info (%u): %s",
- id, strerror(errno));
- goto err_close_fd;
- }
-
- if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) {
- close(fd);
- continue;
- }
-
- if (nb_fds > 0) {
- tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
- if (!tmp) {
- p_err("failed to realloc");
- goto err_close_fd;
- }
- *fds = tmp;
- }
- (*fds)[nb_fds++] = fd;
- }
-
-err_close_fd:
- close(fd);
-err_close_fds:
- while (--nb_fds >= 0)
- close((*fds)[nb_fds]);
- return -1;
-}
-
-static int map_parse_fds(int *argc, char ***argv, int **fds)
-{
- if (is_prefix(**argv, "id")) {
- unsigned int id;
- char *endptr;
-
- NEXT_ARGP();
-
- id = strtoul(**argv, &endptr, 0);
- if (*endptr) {
- p_err("can't parse %s as ID", **argv);
- return -1;
- }
- NEXT_ARGP();
-
- (*fds)[0] = bpf_map_get_fd_by_id(id);
- if ((*fds)[0] < 0) {
- p_err("get map by id (%u): %s", id, strerror(errno));
- return -1;
- }
- return 1;
- } else if (is_prefix(**argv, "name")) {
- char *name;
-
- NEXT_ARGP();
-
- name = **argv;
- if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
- p_err("can't parse name");
- return -1;
- }
- NEXT_ARGP();
-
- return map_fd_by_name(name, fds);
- } else if (is_prefix(**argv, "pinned")) {
- char *path;
-
- NEXT_ARGP();
-
- path = **argv;
- NEXT_ARGP();
-
- (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP);
- if ((*fds)[0] < 0)
- return -1;
- return 1;
- }
-
- p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv);
- return -1;
-}
-
-int map_parse_fd(int *argc, char ***argv)
-{
- int *fds = NULL;
- int nb_fds, fd;
-
- fds = malloc(sizeof(int));
- if (!fds) {
- p_err("mem alloc failed");
- return -1;
- }
- nb_fds = map_parse_fds(argc, argv, &fds);
- if (nb_fds != 1) {
- if (nb_fds > 1) {
- p_err("several maps match this handle");
- while (nb_fds--)
- close(fds[nb_fds]);
- }
- fd = -1;
- goto exit_free;
- }
-
- fd = fds[0];
-exit_free:
- free(fds);
- return fd;
-}
-
-int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len)
-{
- int err;
- int fd;
-
- fd = map_parse_fd(argc, argv);
- if (fd < 0)
- return -1;
-
- err = bpf_obj_get_info_by_fd(fd, info, info_len);
- if (err) {
- p_err("can't get map info: %s", strerror(errno));
- close(fd);
- return err;
- }
-
- return fd;
-}
-
static int do_dump_btf(const struct btf_dumper *d,
struct bpf_map_info *map_info, void *key,
void *value)
@@ -629,7 +473,7 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
if (owner_prog_type) {
unsigned int prog_type = atoi(owner_prog_type);
- if (prog_type < ARRAY_SIZE(prog_type_name))
+ if (prog_type < prog_type_name_size)
jsonw_string_field(json_wtr, "owner_prog_type",
prog_type_name[prog_type]);
else
@@ -666,6 +510,8 @@ static int show_map_close_json(int fd, struct bpf_map_info *info)
jsonw_end_array(json_wtr);
}
+ emit_obj_refs_json(&refs_table, info->id, json_wtr);
+
jsonw_end_object(json_wtr);
return 0;
@@ -712,7 +558,7 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
if (owner_prog_type) {
unsigned int prog_type = atoi(owner_prog_type);
- if (prog_type < ARRAY_SIZE(prog_type_name))
+ if (prog_type < prog_type_name_size)
printf("owner_prog_type %s ",
prog_type_name[prog_type]);
else
@@ -753,6 +599,8 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info)
if (frozen)
printf("%sfrozen", info->btf_id ? " " : "");
+ emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
+
printf("\n");
return 0;
}
@@ -811,6 +659,7 @@ static int do_show(int argc, char **argv)
if (show_pinned)
build_pinned_obj_table(&map_table, BPF_OBJ_MAP);
+ build_obj_refs_table(&refs_table, BPF_OBJ_MAP);
if (argc == 2)
return do_show_subset(argc, argv);
@@ -854,6 +703,8 @@ static int do_show(int argc, char **argv)
if (json_output)
jsonw_end_array(json_wtr);
+ delete_obj_refs_table(&refs_table);
+
return errno == ENOENT ? 0 : -1;
}
diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c
new file mode 100644
index 000000000000..7d5416667c85
--- /dev/null
+++ b/tools/bpf/bpftool/pids.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2020 Facebook */
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+
+#include "main.h"
+#include "skeleton/pid_iter.h"
+
+#ifdef BPFTOOL_WITHOUT_SKELETONS
+
+int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
+{
+ p_err("bpftool built without PID iterator support");
+ return -ENOTSUP;
+}
+void delete_obj_refs_table(struct obj_refs_table *table) {}
+void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix) {}
+
+#else /* BPFTOOL_WITHOUT_SKELETONS */
+
+#include "pid_iter.skel.h"
+
+static void add_ref(struct obj_refs_table *table, struct pid_iter_entry *e)
+{
+ struct obj_refs *refs;
+ struct obj_ref *ref;
+ void *tmp;
+ int i;
+
+ hash_for_each_possible(table->table, refs, node, e->id) {
+ if (refs->id != e->id)
+ continue;
+
+ for (i = 0; i < refs->ref_cnt; i++) {
+ if (refs->refs[i].pid == e->pid)
+ return;
+ }
+
+ tmp = realloc(refs->refs, (refs->ref_cnt + 1) * sizeof(*ref));
+ if (!tmp) {
+ p_err("failed to re-alloc memory for ID %u, PID %d, COMM %s...",
+ e->id, e->pid, e->comm);
+ return;
+ }
+ refs->refs = tmp;
+ ref = &refs->refs[refs->ref_cnt];
+ ref->pid = e->pid;
+ memcpy(ref->comm, e->comm, sizeof(ref->comm));
+ refs->ref_cnt++;
+
+ return;
+ }
+
+ /* new ref */
+ refs = calloc(1, sizeof(*refs));
+ if (!refs) {
+ p_err("failed to alloc memory for ID %u, PID %d, COMM %s...",
+ e->id, e->pid, e->comm);
+ return;
+ }
+
+ refs->id = e->id;
+ refs->refs = malloc(sizeof(*refs->refs));
+ if (!refs->refs) {
+ free(refs);
+ p_err("failed to alloc memory for ID %u, PID %d, COMM %s...",
+ e->id, e->pid, e->comm);
+ return;
+ }
+ ref = &refs->refs[0];
+ ref->pid = e->pid;
+ memcpy(ref->comm, e->comm, sizeof(ref->comm));
+ refs->ref_cnt = 1;
+ hash_add(table->table, &refs->node, e->id);
+}
+
+static int __printf(2, 0)
+libbpf_print_none(__maybe_unused enum libbpf_print_level level,
+ __maybe_unused const char *format,
+ __maybe_unused va_list args)
+{
+ return 0;
+}
+
+int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type)
+{
+ char buf[4096];
+ struct pid_iter_bpf *skel;
+ struct pid_iter_entry *e;
+ int err, ret, fd = -1, i;
+ libbpf_print_fn_t default_print;
+
+ hash_init(table->table);
+ set_max_rlimit();
+
+ skel = pid_iter_bpf__open();
+ if (!skel) {
+ p_err("failed to open PID iterator skeleton");
+ return -1;
+ }
+
+ skel->rodata->obj_type = type;
+
+ /* we don't want output polluted with libbpf errors if bpf_iter is not
+ * supported
+ */
+ default_print = libbpf_set_print(libbpf_print_none);
+ err = pid_iter_bpf__load(skel);
+ libbpf_set_print(default_print);
+ if (err) {
+ /* too bad, kernel doesn't support BPF iterators yet */
+ err = 0;
+ goto out;
+ }
+ err = pid_iter_bpf__attach(skel);
+ if (err) {
+ /* if we loaded above successfully, attach has to succeed */
+ p_err("failed to attach PID iterator: %d", err);
+ goto out;
+ }
+
+ fd = bpf_iter_create(bpf_link__fd(skel->links.iter));
+ if (fd < 0) {
+ err = -errno;
+ p_err("failed to create PID iterator session: %d", err);
+ goto out;
+ }
+
+ while (true) {
+ ret = read(fd, buf, sizeof(buf));
+ if (ret < 0) {
+ err = -errno;
+ p_err("failed to read PID iterator output: %d", err);
+ goto out;
+ }
+ if (ret == 0)
+ break;
+ if (ret % sizeof(*e)) {
+ err = -EINVAL;
+ p_err("invalid PID iterator output format");
+ goto out;
+ }
+ ret /= sizeof(*e);
+
+ e = (void *)buf;
+ for (i = 0; i < ret; i++, e++) {
+ add_ref(table, e);
+ }
+ }
+ err = 0;
+out:
+ if (fd >= 0)
+ close(fd);
+ pid_iter_bpf__destroy(skel);
+ return err;
+}
+
+void delete_obj_refs_table(struct obj_refs_table *table)
+{
+ struct obj_refs *refs;
+ struct hlist_node *tmp;
+ unsigned int bkt;
+
+ hash_for_each_safe(table->table, bkt, tmp, refs, node) {
+ hash_del(&refs->node);
+ free(refs->refs);
+ free(refs);
+ }
+}
+
+void emit_obj_refs_json(struct obj_refs_table *table, __u32 id,
+ json_writer_t *json_writer)
+{
+ struct obj_refs *refs;
+ struct obj_ref *ref;
+ int i;
+
+ if (hash_empty(table->table))
+ return;
+
+ hash_for_each_possible(table->table, refs, node, id) {
+ if (refs->id != id)
+ continue;
+ if (refs->ref_cnt == 0)
+ break;
+
+ jsonw_name(json_writer, "pids");
+ jsonw_start_array(json_writer);
+ for (i = 0; i < refs->ref_cnt; i++) {
+ ref = &refs->refs[i];
+ jsonw_start_object(json_writer);
+ jsonw_int_field(json_writer, "pid", ref->pid);
+ jsonw_string_field(json_writer, "comm", ref->comm);
+ jsonw_end_object(json_writer);
+ }
+ jsonw_end_array(json_writer);
+ break;
+ }
+}
+
+void emit_obj_refs_plain(struct obj_refs_table *table, __u32 id, const char *prefix)
+{
+ struct obj_refs *refs;
+ struct obj_ref *ref;
+ int i;
+
+ if (hash_empty(table->table))
+ return;
+
+ hash_for_each_possible(table->table, refs, node, id) {
+ if (refs->id != id)
+ continue;
+ if (refs->ref_cnt == 0)
+ break;
+
+ printf("%s", prefix);
+ for (i = 0; i < refs->ref_cnt; i++) {
+ ref = &refs->refs[i];
+ printf("%s%s(%d)", i == 0 ? "" : ", ", ref->comm, ref->pid);
+ }
+ break;
+ }
+}
+
+
+#endif
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index a5eff83496f2..6863c57effd0 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -29,6 +29,40 @@
#include "main.h"
#include "xlated_dumper.h"
+const char * const prog_type_name[] = {
+ [BPF_PROG_TYPE_UNSPEC] = "unspec",
+ [BPF_PROG_TYPE_SOCKET_FILTER] = "socket_filter",
+ [BPF_PROG_TYPE_KPROBE] = "kprobe",
+ [BPF_PROG_TYPE_SCHED_CLS] = "sched_cls",
+ [BPF_PROG_TYPE_SCHED_ACT] = "sched_act",
+ [BPF_PROG_TYPE_TRACEPOINT] = "tracepoint",
+ [BPF_PROG_TYPE_XDP] = "xdp",
+ [BPF_PROG_TYPE_PERF_EVENT] = "perf_event",
+ [BPF_PROG_TYPE_CGROUP_SKB] = "cgroup_skb",
+ [BPF_PROG_TYPE_CGROUP_SOCK] = "cgroup_sock",
+ [BPF_PROG_TYPE_LWT_IN] = "lwt_in",
+ [BPF_PROG_TYPE_LWT_OUT] = "lwt_out",
+ [BPF_PROG_TYPE_LWT_XMIT] = "lwt_xmit",
+ [BPF_PROG_TYPE_SOCK_OPS] = "sock_ops",
+ [BPF_PROG_TYPE_SK_SKB] = "sk_skb",
+ [BPF_PROG_TYPE_CGROUP_DEVICE] = "cgroup_device",
+ [BPF_PROG_TYPE_SK_MSG] = "sk_msg",
+ [BPF_PROG_TYPE_RAW_TRACEPOINT] = "raw_tracepoint",
+ [BPF_PROG_TYPE_CGROUP_SOCK_ADDR] = "cgroup_sock_addr",
+ [BPF_PROG_TYPE_LWT_SEG6LOCAL] = "lwt_seg6local",
+ [BPF_PROG_TYPE_LIRC_MODE2] = "lirc_mode2",
+ [BPF_PROG_TYPE_SK_REUSEPORT] = "sk_reuseport",
+ [BPF_PROG_TYPE_FLOW_DISSECTOR] = "flow_dissector",
+ [BPF_PROG_TYPE_CGROUP_SYSCTL] = "cgroup_sysctl",
+ [BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE] = "raw_tracepoint_writable",
+ [BPF_PROG_TYPE_CGROUP_SOCKOPT] = "cgroup_sockopt",
+ [BPF_PROG_TYPE_TRACING] = "tracing",
+ [BPF_PROG_TYPE_STRUCT_OPS] = "struct_ops",
+ [BPF_PROG_TYPE_EXT] = "ext",
+};
+
+const size_t prog_type_name_size = ARRAY_SIZE(prog_type_name);
+
enum dump_mode {
DUMP_JITED,
DUMP_XLATED,
@@ -86,158 +120,6 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size)
strftime(buf, size, "%FT%T%z", &load_tm);
}
-static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
-{
- unsigned int id = 0;
- int fd, nb_fds = 0;
- void *tmp;
- int err;
-
- while (true) {
- struct bpf_prog_info info = {};
- __u32 len = sizeof(info);
-
- err = bpf_prog_get_next_id(id, &id);
- if (err) {
- if (errno != ENOENT) {
- p_err("%s", strerror(errno));
- goto err_close_fds;
- }
- return nb_fds;
- }
-
- fd = bpf_prog_get_fd_by_id(id);
- if (fd < 0) {
- p_err("can't get prog by id (%u): %s",
- id, strerror(errno));
- goto err_close_fds;
- }
-
- err = bpf_obj_get_info_by_fd(fd, &info, &len);
- if (err) {
- p_err("can't get prog info (%u): %s",
- id, strerror(errno));
- goto err_close_fd;
- }
-
- if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) ||
- (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) {
- close(fd);
- continue;
- }
-
- if (nb_fds > 0) {
- tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
- if (!tmp) {
- p_err("failed to realloc");
- goto err_close_fd;
- }
- *fds = tmp;
- }
- (*fds)[nb_fds++] = fd;
- }
-
-err_close_fd:
- close(fd);
-err_close_fds:
- while (--nb_fds >= 0)
- close((*fds)[nb_fds]);
- return -1;
-}
-
-static int prog_parse_fds(int *argc, char ***argv, int **fds)
-{
- if (is_prefix(**argv, "id")) {
- unsigned int id;
- char *endptr;
-
- NEXT_ARGP();
-
- id = strtoul(**argv, &endptr, 0);
- if (*endptr) {
- p_err("can't parse %s as ID", **argv);
- return -1;
- }
- NEXT_ARGP();
-
- (*fds)[0] = bpf_prog_get_fd_by_id(id);
- if ((*fds)[0] < 0) {
- p_err("get by id (%u): %s", id, strerror(errno));
- return -1;
- }
- return 1;
- } else if (is_prefix(**argv, "tag")) {
- unsigned char tag[BPF_TAG_SIZE];
-
- NEXT_ARGP();
-
- if (sscanf(**argv, BPF_TAG_FMT, tag, tag + 1, tag + 2,
- tag + 3, tag + 4, tag + 5, tag + 6, tag + 7)
- != BPF_TAG_SIZE) {
- p_err("can't parse tag");
- return -1;
- }
- NEXT_ARGP();
-
- return prog_fd_by_nametag(tag, fds, true);
- } else if (is_prefix(**argv, "name")) {
- char *name;
-
- NEXT_ARGP();
-
- name = **argv;
- if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
- p_err("can't parse name");
- return -1;
- }
- NEXT_ARGP();
-
- return prog_fd_by_nametag(name, fds, false);
- } else if (is_prefix(**argv, "pinned")) {
- char *path;
-
- NEXT_ARGP();
-
- path = **argv;
- NEXT_ARGP();
-
- (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG);
- if ((*fds)[0] < 0)
- return -1;
- return 1;
- }
-
- p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv);
- return -1;
-}
-
-int prog_parse_fd(int *argc, char ***argv)
-{
- int *fds = NULL;
- int nb_fds, fd;
-
- fds = malloc(sizeof(int));
- if (!fds) {
- p_err("mem alloc failed");
- return -1;
- }
- nb_fds = prog_parse_fds(argc, argv, &fds);
- if (nb_fds != 1) {
- if (nb_fds > 1) {
- p_err("several programs match this handle");
- while (nb_fds--)
- close(fds[nb_fds]);
- }
- fd = -1;
- goto exit_free;
- }
-
- fd = fds[0];
-exit_free:
- free(fds);
- return fd;
-}
-
static void show_prog_maps(int fd, __u32 num_maps)
{
struct bpf_prog_info info = {};
@@ -342,6 +224,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd)
jsonw_end_array(json_wtr);
}
+ emit_obj_refs_json(&refs_table, info->id, json_wtr);
+
jsonw_end_object(json_wtr);
}
@@ -408,6 +292,8 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd)
if (info->btf_id)
printf("\n\tbtf_id %d", info->btf_id);
+ emit_obj_refs_plain(&refs_table, info->id, "\n\tpids ");
+
printf("\n");
}
@@ -473,6 +359,7 @@ static int do_show(int argc, char **argv)
if (show_pinned)
build_pinned_obj_table(&prog_table, BPF_OBJ_PROG);
+ build_obj_refs_table(&refs_table, BPF_OBJ_PROG);
if (argc == 2)
return do_show_subset(argc, argv);
@@ -514,6 +401,8 @@ static int do_show(int argc, char **argv)
if (json_output)
jsonw_end_array(json_wtr);
+ delete_obj_refs_table(&refs_table);
+
return err;
}
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.bpf.c b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
new file mode 100644
index 000000000000..8468a608911e
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/pid_iter.bpf.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (c) 2020 Facebook */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+#include "pid_iter.h"
+
+/* keep in sync with the definition in main.h */
+enum bpf_obj_type {
+ BPF_OBJ_UNKNOWN,
+ BPF_OBJ_PROG,
+ BPF_OBJ_MAP,
+ BPF_OBJ_LINK,
+ BPF_OBJ_BTF,
+};
+
+extern const void bpf_link_fops __ksym;
+extern const void bpf_map_fops __ksym;
+extern const void bpf_prog_fops __ksym;
+extern const void btf_fops __ksym;
+
+const volatile enum bpf_obj_type obj_type = BPF_OBJ_UNKNOWN;
+
+static __always_inline __u32 get_obj_id(void *ent, enum bpf_obj_type type)
+{
+ switch (type) {
+ case BPF_OBJ_PROG:
+ return BPF_CORE_READ((struct bpf_prog *)ent, aux, id);
+ case BPF_OBJ_MAP:
+ return BPF_CORE_READ((struct bpf_map *)ent, id);
+ case BPF_OBJ_BTF:
+ return BPF_CORE_READ((struct btf *)ent, id);
+ case BPF_OBJ_LINK:
+ return BPF_CORE_READ((struct bpf_link *)ent, id);
+ default:
+ return 0;
+ }
+}
+
+SEC("iter/task_file")
+int iter(struct bpf_iter__task_file *ctx)
+{
+ struct file *file = ctx->file;
+ struct task_struct *task = ctx->task;
+ struct pid_iter_entry e;
+ const void *fops;
+
+ if (!file || !task)
+ return 0;
+
+ switch (obj_type) {
+ case BPF_OBJ_PROG:
+ fops = &bpf_prog_fops;
+ break;
+ case BPF_OBJ_MAP:
+ fops = &bpf_map_fops;
+ break;
+ case BPF_OBJ_BTF:
+ fops = &btf_fops;
+ break;
+ case BPF_OBJ_LINK:
+ fops = &bpf_link_fops;
+ break;
+ default:
+ return 0;
+ }
+
+ if (file->f_op != fops)
+ return 0;
+
+ e.pid = task->tgid;
+ e.id = get_obj_id(file->private_data, obj_type);
+ bpf_probe_read(&e.comm, sizeof(e.comm), task->group_leader->comm);
+ bpf_seq_write(ctx->meta->seq, &e, sizeof(e));
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/bpf/bpftool/skeleton/pid_iter.h b/tools/bpf/bpftool/skeleton/pid_iter.h
new file mode 100644
index 000000000000..5692cf257adb
--- /dev/null
+++ b/tools/bpf/bpftool/skeleton/pid_iter.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2020 Facebook */
+#ifndef __PID_ITER_H
+#define __PID_ITER_H
+
+struct pid_iter_entry {
+ __u32 id;
+ int pid;
+ char comm[16];
+};
+
+#endif
diff --git a/tools/bpf/bpftool/skeleton/profiler.bpf.c b/tools/bpf/bpftool/skeleton/profiler.bpf.c
index 20034c12f7c5..4e3512f700c0 100644
--- a/tools/bpf/bpftool/skeleton/profiler.bpf.c
+++ b/tools/bpf/bpftool/skeleton/profiler.bpf.c
@@ -1,7 +1,6 @@
-// SPDX-License-Identifier: GPL-2.0
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
// Copyright (c) 2020 Facebook
-#include "profiler.h"
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -116,4 +115,4 @@ int BPF_PROG(fexit_XXX)
return 0;
}
-char LICENSE[] SEC("license") = "GPL";
+char LICENSE[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/bpf/bpftool/skeleton/profiler.h b/tools/bpf/bpftool/skeleton/profiler.h
deleted file mode 100644
index 1f767e9510f7..000000000000
--- a/tools/bpf/bpftool/skeleton/profiler.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-#ifndef __PROFILER_H
-#define __PROFILER_H
-
-/* useful typedefs from vmlinux.h */
-
-typedef signed char __s8;
-typedef unsigned char __u8;
-typedef short int __s16;
-typedef short unsigned int __u16;
-typedef int __s32;
-typedef unsigned int __u32;
-typedef long long int __s64;
-typedef long long unsigned int __u64;
-
-typedef __s8 s8;
-typedef __u8 u8;
-typedef __s16 s16;
-typedef __u16 u16;
-typedef __s32 s32;
-typedef __u32 u32;
-typedef __s64 s64;
-typedef __u64 u64;
-
-enum {
- false = 0,
- true = 1,
-};
-
-#ifdef __CHECKER__
-#define __bitwise__ __attribute__((bitwise))
-#else
-#define __bitwise__
-#endif
-
-typedef __u16 __bitwise__ __le16;
-typedef __u16 __bitwise__ __be16;
-typedef __u32 __bitwise__ __le32;
-typedef __u32 __bitwise__ __be32;
-typedef __u64 __bitwise__ __le64;
-typedef __u64 __bitwise__ __be64;
-
-typedef __u16 __bitwise__ __sum16;
-typedef __u32 __bitwise__ __wsum;
-
-#endif /* __PROFILER_H */
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index b1f0321180f5..88371f7f0369 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -68,7 +68,7 @@ FILES= \
test-llvm-version.bin \
test-libaio.bin \
test-libzstd.bin \
- test-clang-bpf-global-var.bin \
+ test-clang-bpf-co-re.bin \
test-file-handle.bin \
test-libpfm4.bin
@@ -325,7 +325,7 @@ $(OUTPUT)test-libaio.bin:
$(OUTPUT)test-libzstd.bin:
$(BUILD) -lzstd
-$(OUTPUT)test-clang-bpf-global-var.bin:
+$(OUTPUT)test-clang-bpf-co-re.bin:
$(CLANG) -S -g -target bpf -o - $(patsubst %.bin,%.c,$(@F)) | \
grep BTF_KIND_VAR
diff --git a/tools/build/feature/test-clang-bpf-co-re.c b/tools/build/feature/test-clang-bpf-co-re.c
new file mode 100644
index 000000000000..cb5265bfdd83
--- /dev/null
+++ b/tools/build/feature/test-clang-bpf-co-re.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+struct test {
+ int a;
+ int b;
+} __attribute__((preserve_access_index));
+
+volatile struct test global_value_for_test = {};
diff --git a/tools/build/feature/test-clang-bpf-global-var.c b/tools/build/feature/test-clang-bpf-global-var.c
deleted file mode 100644
index 221f1481d52e..000000000000
--- a/tools/build/feature/test-clang-bpf-global-var.c
+++ /dev/null
@@ -1,4 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2020 Facebook
-
-volatile int global_value_for_test = 1;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 8bd33050b7bb..ea382e47e400 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -653,7 +653,7 @@ union bpf_attr {
* Map value associated to *key*, or **NULL** if no entry was
* found.
*
- * int bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
+ * long bpf_map_update_elem(struct bpf_map *map, const void *key, const void *value, u64 flags)
* Description
* Add or update the value of the entry associated to *key* in
* *map* with *value*. *flags* is one of:
@@ -671,13 +671,13 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_map_delete_elem(struct bpf_map *map, const void *key)
+ * long bpf_map_delete_elem(struct bpf_map *map, const void *key)
* Description
* Delete entry with *key* from *map*.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read(void *dst, u32 size, const void *unsafe_ptr)
* Description
* For tracing programs, safely attempt to read *size* bytes from
* kernel space address *unsafe_ptr* and store the data in *dst*.
@@ -695,7 +695,7 @@ union bpf_attr {
* Return
* Current *ktime*.
*
- * int bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
+ * long bpf_trace_printk(const char *fmt, u32 fmt_size, ...)
* Description
* This helper is a "printk()-like" facility for debugging. It
* prints a message defined by format *fmt* (of size *fmt_size*)
@@ -775,7 +775,7 @@ union bpf_attr {
* Return
* The SMP id of the processor running the program.
*
- * int bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
+ * long bpf_skb_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len, u64 flags)
* Description
* Store *len* bytes from address *from* into the packet
* associated to *skb*, at *offset*. *flags* are a combination of
@@ -792,7 +792,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
+ * long bpf_l3_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 size)
* Description
* Recompute the layer 3 (e.g. IP) checksum for the packet
* associated to *skb*. Computation is incremental, so the helper
@@ -817,7 +817,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
+ * long bpf_l4_csum_replace(struct sk_buff *skb, u32 offset, u64 from, u64 to, u64 flags)
* Description
* Recompute the layer 4 (e.g. TCP, UDP or ICMP) checksum for the
* packet associated to *skb*. Computation is incremental, so the
@@ -849,7 +849,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
+ * long bpf_tail_call(void *ctx, struct bpf_map *prog_array_map, u32 index)
* Description
* This special helper is used to trigger a "tail call", or in
* other words, to jump into another eBPF program. The same stack
@@ -880,7 +880,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
+ * long bpf_clone_redirect(struct sk_buff *skb, u32 ifindex, u64 flags)
* Description
* Clone and redirect the packet associated to *skb* to another
* net device of index *ifindex*. Both ingress and egress
@@ -916,7 +916,7 @@ union bpf_attr {
* A 64-bit integer containing the current GID and UID, and
* created as such: *current_gid* **<< 32 \|** *current_uid*.
*
- * int bpf_get_current_comm(void *buf, u32 size_of_buf)
+ * long bpf_get_current_comm(void *buf, u32 size_of_buf)
* Description
* Copy the **comm** attribute of the current task into *buf* of
* *size_of_buf*. The **comm** attribute contains the name of
@@ -953,7 +953,7 @@ union bpf_attr {
* Return
* The classid, or 0 for the default unconfigured classid.
*
- * int bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+ * long bpf_skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
* Description
* Push a *vlan_tci* (VLAN tag control information) of protocol
* *vlan_proto* to the packet associated to *skb*, then update
@@ -969,7 +969,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_vlan_pop(struct sk_buff *skb)
+ * long bpf_skb_vlan_pop(struct sk_buff *skb)
* Description
* Pop a VLAN header from the packet associated to *skb*.
*
@@ -981,7 +981,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * long bpf_skb_get_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
* Description
* Get tunnel metadata. This helper takes a pointer *key* to an
* empty **struct bpf_tunnel_key** of **size**, that will be
@@ -1032,7 +1032,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
+ * long bpf_skb_set_tunnel_key(struct sk_buff *skb, struct bpf_tunnel_key *key, u32 size, u64 flags)
* Description
* Populate tunnel metadata for packet associated to *skb.* The
* tunnel metadata is set to the contents of *key*, of *size*. The
@@ -1098,7 +1098,7 @@ union bpf_attr {
* The value of the perf event counter read from the map, or a
* negative error code in case of failure.
*
- * int bpf_redirect(u32 ifindex, u64 flags)
+ * long bpf_redirect(u32 ifindex, u64 flags)
* Description
* Redirect the packet to another net device of index *ifindex*.
* This helper is somewhat similar to **bpf_clone_redirect**\
@@ -1145,7 +1145,7 @@ union bpf_attr {
* The realm of the route for the packet associated to *skb*, or 0
* if none was found.
*
- * int bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * long bpf_perf_event_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
* Description
* Write raw *data* blob into a special BPF perf event held by
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -1190,7 +1190,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len)
+ * long bpf_skb_load_bytes(const void *skb, u32 offset, void *to, u32 len)
* Description
* This helper was provided as an easy way to load data from a
* packet. It can be used to load *len* bytes from *offset* from
@@ -1207,7 +1207,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags)
+ * long bpf_get_stackid(void *ctx, struct bpf_map *map, u64 flags)
* Description
* Walk a user or a kernel stack and return its id. To achieve
* this, the helper needs *ctx*, which is a pointer to the context
@@ -1276,7 +1276,7 @@ union bpf_attr {
* The checksum result, or a negative error code in case of
* failure.
*
- * int bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
+ * long bpf_skb_get_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
* Description
* Retrieve tunnel options metadata for the packet associated to
* *skb*, and store the raw tunnel option data to the buffer *opt*
@@ -1294,7 +1294,7 @@ union bpf_attr {
* Return
* The size of the option data retrieved.
*
- * int bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
+ * long bpf_skb_set_tunnel_opt(struct sk_buff *skb, void *opt, u32 size)
* Description
* Set tunnel options metadata for the packet associated to *skb*
* to the option data contained in the raw buffer *opt* of *size*.
@@ -1304,7 +1304,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
+ * long bpf_skb_change_proto(struct sk_buff *skb, __be16 proto, u64 flags)
* Description
* Change the protocol of the *skb* to *proto*. Currently
* supported are transition from IPv4 to IPv6, and from IPv6 to
@@ -1331,7 +1331,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_change_type(struct sk_buff *skb, u32 type)
+ * long bpf_skb_change_type(struct sk_buff *skb, u32 type)
* Description
* Change the packet type for the packet associated to *skb*. This
* comes down to setting *skb*\ **->pkt_type** to *type*, except
@@ -1358,7 +1358,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
+ * long bpf_skb_under_cgroup(struct sk_buff *skb, struct bpf_map *map, u32 index)
* Description
* Check whether *skb* is a descendant of the cgroup2 held by
* *map* of type **BPF_MAP_TYPE_CGROUP_ARRAY**, at *index*.
@@ -1389,7 +1389,7 @@ union bpf_attr {
* Return
* A pointer to the current task struct.
*
- * int bpf_probe_write_user(void *dst, const void *src, u32 len)
+ * long bpf_probe_write_user(void *dst, const void *src, u32 len)
* Description
* Attempt in a safe way to write *len* bytes from the buffer
* *src* to *dst* in memory. It only works for threads that are in
@@ -1408,7 +1408,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
+ * long bpf_current_task_under_cgroup(struct bpf_map *map, u32 index)
* Description
* Check whether the probe is being run is the context of a given
* subset of the cgroup2 hierarchy. The cgroup2 to test is held by
@@ -1420,7 +1420,7 @@ union bpf_attr {
* * 1, if the *skb* task does not belong to the cgroup2.
* * A negative error code, if an error occurred.
*
- * int bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
+ * long bpf_skb_change_tail(struct sk_buff *skb, u32 len, u64 flags)
* Description
* Resize (trim or grow) the packet associated to *skb* to the
* new *len*. The *flags* are reserved for future usage, and must
@@ -1444,7 +1444,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_pull_data(struct sk_buff *skb, u32 len)
+ * long bpf_skb_pull_data(struct sk_buff *skb, u32 len)
* Description
* Pull in non-linear data in case the *skb* is non-linear and not
* all of *len* are part of the linear section. Make *len* bytes
@@ -1500,7 +1500,7 @@ union bpf_attr {
* recalculation the next time the kernel tries to access this
* hash or when the **bpf_get_hash_recalc**\ () helper is called.
*
- * int bpf_get_numa_node_id(void)
+ * long bpf_get_numa_node_id(void)
* Description
* Return the id of the current NUMA node. The primary use case
* for this helper is the selection of sockets for the local NUMA
@@ -1511,7 +1511,7 @@ union bpf_attr {
* Return
* The id of current NUMA node.
*
- * int bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
+ * long bpf_skb_change_head(struct sk_buff *skb, u32 len, u64 flags)
* Description
* Grows headroom of packet associated to *skb* and adjusts the
* offset of the MAC header accordingly, adding *len* bytes of
@@ -1532,7 +1532,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
+ * long bpf_xdp_adjust_head(struct xdp_buff *xdp_md, int delta)
* Description
* Adjust (move) *xdp_md*\ **->data** by *delta* bytes. Note that
* it is possible to use a negative value for *delta*. This helper
@@ -1547,7 +1547,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe kernel address
* *unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for
@@ -1595,14 +1595,14 @@ union bpf_attr {
* is returned (note that **overflowuid** might also be the actual
* UID value for the socket).
*
- * u32 bpf_set_hash(struct sk_buff *skb, u32 hash)
+ * long bpf_set_hash(struct sk_buff *skb, u32 hash)
* Description
* Set the full hash for *skb* (set the field *skb*\ **->hash**)
* to value *hash*.
* Return
* 0
*
- * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
+ * long bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
* Description
* Emulate a call to **setsockopt()** on the socket associated to
* *bpf_socket*, which must be a full socket. The *level* at
@@ -1621,16 +1621,19 @@ union bpf_attr {
*
* * **SOL_SOCKET**, which supports the following *optname*\ s:
* **SO_RCVBUF**, **SO_SNDBUF**, **SO_MAX_PACING_RATE**,
- * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**.
+ * **SO_PRIORITY**, **SO_RCVLOWAT**, **SO_MARK**,
+ * **SO_BINDTODEVICE**, **SO_KEEPALIVE**.
* * **IPPROTO_TCP**, which supports the following *optname*\ s:
* **TCP_CONGESTION**, **TCP_BPF_IW**,
- * **TCP_BPF_SNDCWND_CLAMP**.
+ * **TCP_BPF_SNDCWND_CLAMP**, **TCP_SAVE_SYN**,
+ * **TCP_KEEPIDLE**, **TCP_KEEPINTVL**, **TCP_KEEPCNT**,
+ * **TCP_SYNCNT**, **TCP_USER_TIMEOUT**.
* * **IPPROTO_IP**, which supports *optname* **IP_TOS**.
* * **IPPROTO_IPV6**, which supports *optname* **IPV6_TCLASS**.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
+ * long bpf_skb_adjust_room(struct sk_buff *skb, s32 len_diff, u32 mode, u64 flags)
* Description
* Grow or shrink the room for data in the packet associated to
* *skb* by *len_diff*, and according to the selected *mode*.
@@ -1676,7 +1679,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_redirect_map(struct bpf_map *map, u32 key, u64 flags)
* Description
* Redirect the packet to the endpoint referenced by *map* at
* index *key*. Depending on its type, this *map* can contain
@@ -1697,7 +1700,7 @@ union bpf_attr {
* **XDP_REDIRECT** on success, or the value of the two lower bits
* of the *flags* argument on error.
*
- * int bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_sk_redirect_map(struct sk_buff *skb, struct bpf_map *map, u32 key, u64 flags)
* Description
* Redirect the packet to the socket referenced by *map* (of type
* **BPF_MAP_TYPE_SOCKMAP**) at index *key*. Both ingress and
@@ -1708,7 +1711,7 @@ union bpf_attr {
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
- * int bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sock_map_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
* Description
* Add an entry to, or update a *map* referencing sockets. The
* *skops* is used as a new value for the entry associated to
@@ -1727,7 +1730,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
+ * long bpf_xdp_adjust_meta(struct xdp_buff *xdp_md, int delta)
* Description
* Adjust the address pointed by *xdp_md*\ **->data_meta** by
* *delta* (which can be positive or negative). Note that this
@@ -1756,7 +1759,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
+ * long bpf_perf_event_read_value(struct bpf_map *map, u64 flags, struct bpf_perf_event_value *buf, u32 buf_size)
* Description
* Read the value of a perf event counter, and store it into *buf*
* of size *buf_size*. This helper relies on a *map* of type
@@ -1806,7 +1809,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
+ * long bpf_perf_prog_read_value(struct bpf_perf_event_data *ctx, struct bpf_perf_event_value *buf, u32 buf_size)
* Description
* For en eBPF program attached to a perf event, retrieve the
* value of the event counter associated to *ctx* and store it in
@@ -1817,7 +1820,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
+ * long bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
* Description
* Emulate a call to **getsockopt()** on the socket associated to
* *bpf_socket*, which must be a full socket. The *level* at
@@ -1842,7 +1845,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_override_return(struct pt_regs *regs, u64 rc)
+ * long bpf_override_return(struct pt_regs *regs, u64 rc)
* Description
* Used for error injection, this helper uses kprobes to override
* the return value of the probed function, and to set it to *rc*.
@@ -1867,7 +1870,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
+ * long bpf_sock_ops_cb_flags_set(struct bpf_sock_ops *bpf_sock, int argval)
* Description
* Attempt to set the value of the **bpf_sock_ops_cb_flags** field
* for the full TCP socket associated to *bpf_sock_ops* to
@@ -1911,7 +1914,7 @@ union bpf_attr {
* be set is returned (which comes down to 0 if all bits were set
* as required).
*
- * int bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
+ * long bpf_msg_redirect_map(struct sk_msg_buff *msg, struct bpf_map *map, u32 key, u64 flags)
* Description
* This helper is used in programs implementing policies at the
* socket level. If the message *msg* is allowed to pass (i.e. if
@@ -1925,7 +1928,7 @@ union bpf_attr {
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
- * int bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * long bpf_msg_apply_bytes(struct sk_msg_buff *msg, u32 bytes)
* Description
* For socket policies, apply the verdict of the eBPF program to
* the next *bytes* (number of bytes) of message *msg*.
@@ -1959,7 +1962,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
+ * long bpf_msg_cork_bytes(struct sk_msg_buff *msg, u32 bytes)
* Description
* For socket policies, prevent the execution of the verdict eBPF
* program for message *msg* until *bytes* (byte number) have been
@@ -1977,7 +1980,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
+ * long bpf_msg_pull_data(struct sk_msg_buff *msg, u32 start, u32 end, u64 flags)
* Description
* For socket policies, pull in non-linear data from user space
* for *msg* and set pointers *msg*\ **->data** and *msg*\
@@ -2008,7 +2011,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
+ * long bpf_bind(struct bpf_sock_addr *ctx, struct sockaddr *addr, int addr_len)
* Description
* Bind the socket associated to *ctx* to the address pointed by
* *addr*, of length *addr_len*. This allows for making outgoing
@@ -2026,7 +2029,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
+ * long bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
* Description
* Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
* possible to both shrink and grow the packet tail.
@@ -2040,7 +2043,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
+ * long bpf_skb_get_xfrm_state(struct sk_buff *skb, u32 index, struct bpf_xfrm_state *xfrm_state, u32 size, u64 flags)
* Description
* Retrieve the XFRM state (IP transform framework, see also
* **ip-xfrm(8)**) at *index* in XFRM "security path" for *skb*.
@@ -2056,7 +2059,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags)
+ * long bpf_get_stack(void *ctx, void *buf, u32 size, u64 flags)
* Description
* Return a user or a kernel stack in bpf program provided buffer.
* To achieve this, the helper needs *ctx*, which is a pointer
@@ -2089,7 +2092,7 @@ union bpf_attr {
* A non-negative value equal to or less than *size* on success,
* or a negative error in case of failure.
*
- * int bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
+ * long bpf_skb_load_bytes_relative(const void *skb, u32 offset, void *to, u32 len, u32 start_header)
* Description
* This helper is similar to **bpf_skb_load_bytes**\ () in that
* it provides an easy way to load *len* bytes from *offset*
@@ -2111,7 +2114,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
+ * long bpf_fib_lookup(void *ctx, struct bpf_fib_lookup *params, int plen, u32 flags)
* Description
* Do FIB lookup in kernel tables using parameters in *params*.
* If lookup is successful and result shows packet is to be
@@ -2142,7 +2145,7 @@ union bpf_attr {
* * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
* packet is not forwarded or needs assist from full stack
*
- * int bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
* Description
* Add an entry to, or update a sockhash *map* referencing sockets.
* The *skops* is used as a new value for the entry associated to
@@ -2161,7 +2164,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_msg_redirect_hash(struct sk_msg_buff *msg, struct bpf_map *map, void *key, u64 flags)
* Description
* This helper is used in programs implementing policies at the
* socket level. If the message *msg* is allowed to pass (i.e. if
@@ -2175,7 +2178,7 @@ union bpf_attr {
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
- * int bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sk_redirect_hash(struct sk_buff *skb, struct bpf_map *map, void *key, u64 flags)
* Description
* This helper is used in programs implementing policies at the
* skb socket level. If the sk_buff *skb* is allowed to pass (i.e.
@@ -2189,7 +2192,7 @@ union bpf_attr {
* Return
* **SK_PASS** on success, or **SK_DROP** on error.
*
- * int bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
+ * long bpf_lwt_push_encap(struct sk_buff *skb, u32 type, void *hdr, u32 len)
* Description
* Encapsulate the packet associated to *skb* within a Layer 3
* protocol header. This header is provided in the buffer at
@@ -2226,7 +2229,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
+ * long bpf_lwt_seg6_store_bytes(struct sk_buff *skb, u32 offset, const void *from, u32 len)
* Description
* Store *len* bytes from address *from* into the packet
* associated to *skb*, at *offset*. Only the flags, tag and TLVs
@@ -2241,7 +2244,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
+ * long bpf_lwt_seg6_adjust_srh(struct sk_buff *skb, u32 offset, s32 delta)
* Description
* Adjust the size allocated to TLVs in the outermost IPv6
* Segment Routing Header contained in the packet associated to
@@ -2257,7 +2260,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
+ * long bpf_lwt_seg6_action(struct sk_buff *skb, u32 action, void *param, u32 param_len)
* Description
* Apply an IPv6 Segment Routing action of type *action* to the
* packet associated to *skb*. Each action takes a parameter
@@ -2286,7 +2289,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_rc_repeat(void *ctx)
+ * long bpf_rc_repeat(void *ctx)
* Description
* This helper is used in programs implementing IR decoding, to
* report a successfully decoded repeat key message. This delays
@@ -2305,7 +2308,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * long bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
* Description
* This helper is used in programs implementing IR decoding, to
* report a successfully decoded key press with *scancode*,
@@ -2370,7 +2373,7 @@ union bpf_attr {
* Return
* A pointer to the local storage area.
*
- * int bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
+ * long bpf_sk_select_reuseport(struct sk_reuseport_md *reuse, struct bpf_map *map, void *key, u64 flags)
* Description
* Select a **SO_REUSEPORT** socket from a
* **BPF_MAP_TYPE_REUSEPORT_ARRAY** *map*.
@@ -2471,7 +2474,7 @@ union bpf_attr {
* result is from *reuse*\ **->socks**\ [] using the hash of the
* tuple.
*
- * int bpf_sk_release(struct bpf_sock *sock)
+ * long bpf_sk_release(struct bpf_sock *sock)
* Description
* Release the reference held by *sock*. *sock* must be a
* non-**NULL** pointer that was returned from
@@ -2479,7 +2482,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * long bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
* Description
* Push an element *value* in *map*. *flags* is one of:
*
@@ -2489,19 +2492,19 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_map_pop_elem(struct bpf_map *map, void *value)
+ * long bpf_map_pop_elem(struct bpf_map *map, void *value)
* Description
* Pop an element from *map*.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_map_peek_elem(struct bpf_map *map, void *value)
+ * long bpf_map_peek_elem(struct bpf_map *map, void *value)
* Description
* Get an element from *map* without removing it.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
+ * long bpf_msg_push_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
* Description
* For socket policies, insert *len* bytes into *msg* at offset
* *start*.
@@ -2517,7 +2520,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
+ * long bpf_msg_pop_data(struct sk_msg_buff *msg, u32 start, u32 len, u64 flags)
* Description
* Will remove *len* bytes from a *msg* starting at byte *start*.
* This may result in **ENOMEM** errors under certain situations if
@@ -2529,7 +2532,7 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
+ * long bpf_rc_pointer_rel(void *ctx, s32 rel_x, s32 rel_y)
* Description
* This helper is used in programs implementing IR decoding, to
* report a successfully decoded pointer movement.
@@ -2543,7 +2546,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_spin_lock(struct bpf_spin_lock *lock)
+ * long bpf_spin_lock(struct bpf_spin_lock *lock)
* Description
* Acquire a spinlock represented by the pointer *lock*, which is
* stored as part of a value of a map. Taking the lock allows to
@@ -2591,7 +2594,7 @@ union bpf_attr {
* Return
* 0
*
- * int bpf_spin_unlock(struct bpf_spin_lock *lock)
+ * long bpf_spin_unlock(struct bpf_spin_lock *lock)
* Description
* Release the *lock* previously locked by a call to
* **bpf_spin_lock**\ (\ *lock*\ ).
@@ -2614,7 +2617,7 @@ union bpf_attr {
* A **struct bpf_tcp_sock** pointer on success, or **NULL** in
* case of failure.
*
- * int bpf_skb_ecn_set_ce(struct sk_buff *skb)
+ * long bpf_skb_ecn_set_ce(struct sk_buff *skb)
* Description
* Set ECN (Explicit Congestion Notification) field of IP header
* to **CE** (Congestion Encountered) if current value is **ECT**
@@ -2651,7 +2654,7 @@ union bpf_attr {
* result is from *reuse*\ **->socks**\ [] using the hash of the
* tuple.
*
- * int bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
+ * long bpf_tcp_check_syncookie(struct bpf_sock *sk, void *iph, u32 iph_len, struct tcphdr *th, u32 th_len)
* Description
* Check whether *iph* and *th* contain a valid SYN cookie ACK for
* the listening socket in *sk*.
@@ -2666,7 +2669,7 @@ union bpf_attr {
* 0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
* error otherwise.
*
- * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
+ * long bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags)
* Description
* Get name of sysctl in /proc/sys/ and copy it into provided by
* program buffer *buf* of size *buf_len*.
@@ -2682,7 +2685,7 @@ union bpf_attr {
* **-E2BIG** if the buffer wasn't big enough (*buf* will contain
* truncated name in this case).
*
- * int bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * long bpf_sysctl_get_current_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
* Description
* Get current value of sysctl as it is presented in /proc/sys
* (incl. newline, etc), and copy it as a string into provided
@@ -2701,7 +2704,7 @@ union bpf_attr {
* **-EINVAL** if current value was unavailable, e.g. because
* sysctl is uninitialized and read returns -EIO for it.
*
- * int bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
+ * long bpf_sysctl_get_new_value(struct bpf_sysctl *ctx, char *buf, size_t buf_len)
* Description
* Get new value being written by user space to sysctl (before
* the actual write happens) and copy it as a string into
@@ -2718,7 +2721,7 @@ union bpf_attr {
*
* **-EINVAL** if sysctl is being read.
*
- * int bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
+ * long bpf_sysctl_set_new_value(struct bpf_sysctl *ctx, const char *buf, size_t buf_len)
* Description
* Override new value being written by user space to sysctl with
* value provided by program in buffer *buf* of size *buf_len*.
@@ -2735,7 +2738,7 @@ union bpf_attr {
*
* **-EINVAL** if sysctl is being read.
*
- * int bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
+ * long bpf_strtol(const char *buf, size_t buf_len, u64 flags, long *res)
* Description
* Convert the initial part of the string from buffer *buf* of
* size *buf_len* to a long integer according to the given base
@@ -2759,7 +2762,7 @@ union bpf_attr {
*
* **-ERANGE** if resulting value was out of range.
*
- * int bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
+ * long bpf_strtoul(const char *buf, size_t buf_len, u64 flags, unsigned long *res)
* Description
* Convert the initial part of the string from buffer *buf* of
* size *buf_len* to an unsigned long integer according to the
@@ -2810,7 +2813,7 @@ union bpf_attr {
* **NULL** if not found or there was an error in adding
* a new bpf-local-storage.
*
- * int bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
+ * long bpf_sk_storage_delete(struct bpf_map *map, struct bpf_sock *sk)
* Description
* Delete a bpf-local-storage from a *sk*.
* Return
@@ -2818,7 +2821,7 @@ union bpf_attr {
*
* **-ENOENT** if the bpf-local-storage cannot be found.
*
- * int bpf_send_signal(u32 sig)
+ * long bpf_send_signal(u32 sig)
* Description
* Send signal *sig* to the process of the current task.
* The signal may be delivered to any of this process's threads.
@@ -2859,7 +2862,7 @@ union bpf_attr {
*
* **-EPROTONOSUPPORT** IP packet version is not 4 or 6
*
- * int bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * long bpf_skb_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
* Description
* Write raw *data* blob into a special BPF perf event held by
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -2883,21 +2886,21 @@ union bpf_attr {
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_user(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Safely attempt to read *size* bytes from user space address
* *unsafe_ptr* and store the data in *dst*.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_kernel(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Safely attempt to read *size* bytes from kernel space address
* *unsafe_ptr* and store the data in *dst*.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_user_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe user address
* *unsafe_ptr* to *dst*. The *size* should include the
@@ -2941,7 +2944,7 @@ union bpf_attr {
* including the trailing NUL character. On error, a negative
* value.
*
- * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
+ * long bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
* Description
* Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
* to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply.
@@ -2949,14 +2952,14 @@ union bpf_attr {
* On success, the strictly positive length of the string, including
* the trailing NUL character. On error, a negative value.
*
- * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
+ * long bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
* Description
* Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**.
* *rcv_nxt* is the ack_seq to be sent out.
* Return
* 0 on success, or a negative error in case of failure.
*
- * int bpf_send_signal_thread(u32 sig)
+ * long bpf_send_signal_thread(u32 sig)
* Description
* Send signal *sig* to the thread corresponding to the current task.
* Return
@@ -2976,7 +2979,7 @@ union bpf_attr {
* Return
* The 64 bit jiffies
*
- * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
+ * long bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
* Description
* For an eBPF program attached to a perf event, retrieve the
* branch records (**struct perf_branch_entry**) associated to *ctx*
@@ -2995,7 +2998,7 @@ union bpf_attr {
*
* **-ENOENT** if architecture does not support branch records.
*
- * int bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
+ * long bpf_get_ns_current_pid_tgid(u64 dev, u64 ino, struct bpf_pidns_info *nsdata, u32 size)
* Description
* Returns 0 on success, values for *pid* and *tgid* as seen from the current
* *namespace* will be returned in *nsdata*.
@@ -3007,7 +3010,7 @@ union bpf_attr {
*
* **-ENOENT** if pidns does not exists for the current task.
*
- * int bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
+ * long bpf_xdp_output(void *ctx, struct bpf_map *map, u64 flags, void *data, u64 size)
* Description
* Write raw *data* blob into a special BPF perf event held by
* *map* of type **BPF_MAP_TYPE_PERF_EVENT_ARRAY**. This perf
@@ -3062,7 +3065,7 @@ union bpf_attr {
* Return
* The id is returned or 0 in case the id could not be retrieved.
*
- * int bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
+ * long bpf_sk_assign(struct sk_buff *skb, struct bpf_sock *sk, u64 flags)
* Description
* Assign the *sk* to the *skb*. When combined with appropriate
* routing configuration to receive the packet towards the socket,
@@ -3097,7 +3100,7 @@ union bpf_attr {
* Return
* Current *ktime*.
*
- * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ * long bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
* Description
* **bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
* out the format string.
@@ -3126,7 +3129,7 @@ union bpf_attr {
*
* **-EOVERFLOW** if an overflow happened: The same object will be tried again.
*
- * int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
+ * long bpf_seq_write(struct seq_file *m, const void *data, u32 len)
* Description
* **bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data.
* The *m* represents the seq_file. The *data* and *len* represent the
@@ -3222,7 +3225,7 @@ union bpf_attr {
* Return
* Requested value, or 0, if *flags* are not recognized.
*
- * int bpf_csum_level(struct sk_buff *skb, u64 level)
+ * long bpf_csum_level(struct sk_buff *skb, u64 level)
* Description
* Change the skbs checksum level by one layer up or down, or
* reset it entirely to none in order to have the stack perform
@@ -3253,6 +3256,69 @@ union bpf_attr {
* case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
* is returned or the error code -EACCES in case the skb is not
* subject to CHECKSUM_UNNECESSARY.
+ *
+ * struct tcp6_sock *bpf_skc_to_tcp6_sock(void *sk)
+ * Description
+ * Dynamically cast a *sk* pointer to a *tcp6_sock* pointer.
+ * Return
+ * *sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_sock *bpf_skc_to_tcp_sock(void *sk)
+ * Description
+ * Dynamically cast a *sk* pointer to a *tcp_sock* pointer.
+ * Return
+ * *sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_timewait_sock *bpf_skc_to_tcp_timewait_sock(void *sk)
+ * Description
+ * Dynamically cast a *sk* pointer to a *tcp_timewait_sock* pointer.
+ * Return
+ * *sk* if casting is valid, or NULL otherwise.
+ *
+ * struct tcp_request_sock *bpf_skc_to_tcp_request_sock(void *sk)
+ * Description
+ * Dynamically cast a *sk* pointer to a *tcp_request_sock* pointer.
+ * Return
+ * *sk* if casting is valid, or NULL otherwise.
+ *
+ * struct udp6_sock *bpf_skc_to_udp6_sock(void *sk)
+ * Description
+ * Dynamically cast a *sk* pointer to a *udp6_sock* pointer.
+ * Return
+ * *sk* if casting is valid, or NULL otherwise.
+ *
+ * long bpf_get_task_stack(struct task_struct *task, void *buf, u32 size, u64 flags)
+ * Description
+ * Return a user or a kernel stack in bpf program provided buffer.
+ * To achieve this, the helper needs *task*, which is a valid
+ * pointer to struct task_struct. To store the stacktrace, the
+ * bpf program provides *buf* with a nonnegative *size*.
+ *
+ * The last argument, *flags*, holds the number of stack frames to
+ * skip (from 0 to 255), masked with
+ * **BPF_F_SKIP_FIELD_MASK**. The next bits can be used to set
+ * the following flags:
+ *
+ * **BPF_F_USER_STACK**
+ * Collect a user space stack instead of a kernel stack.
+ * **BPF_F_USER_BUILD_ID**
+ * Collect buildid+offset instead of ips for user stack,
+ * only valid if **BPF_F_USER_STACK** is also specified.
+ *
+ * **bpf_get_task_stack**\ () can collect up to
+ * **PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
+ * to sufficient large buffer size. Note that
+ * this limit can be controlled with the **sysctl** program, and
+ * that it should be manually increased in order to profile long
+ * user stacks (such as stacks for Java programs). To do so, use:
+ *
+ * ::
+ *
+ * # sysctl kernel.perf_event_max_stack=<new value>
+ * Return
+ * A non-negative value equal to or less than *size* on success,
+ * or a negative error in case of failure.
+ *
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3390,7 +3456,14 @@ union bpf_attr {
FN(ringbuf_submit), \
FN(ringbuf_discard), \
FN(ringbuf_query), \
- FN(csum_level),
+ FN(csum_level), \
+ FN(skc_to_tcp6_sock), \
+ FN(skc_to_tcp_sock), \
+ FN(skc_to_tcp_timewait_sock), \
+ FN(skc_to_tcp_request_sock), \
+ FN(skc_to_udp6_sock), \
+ FN(get_task_stack), \
+ /* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index 7009dc90e012..eae5cccff761 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -217,7 +217,7 @@ enum bpf_field_info_kind {
*/
#define BPF_CORE_READ_INTO(dst, src, a, ...) \
({ \
- ___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__) \
+ ___core_read(bpf_core_read, dst, (src), a, ##__VA_ARGS__) \
})
/*
@@ -227,7 +227,7 @@ enum bpf_field_info_kind {
*/
#define BPF_CORE_READ_STR_INTO(dst, src, a, ...) \
({ \
- ___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \
+ ___core_read(bpf_core_read_str, dst, (src), a, ##__VA_ARGS__)\
})
/*
@@ -254,8 +254,8 @@ enum bpf_field_info_kind {
*/
#define BPF_CORE_READ(src, a, ...) \
({ \
- ___type(src, a, ##__VA_ARGS__) __r; \
- BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__); \
+ ___type((src), a, ##__VA_ARGS__) __r; \
+ BPF_CORE_READ_INTO(&__r, (src), a, ##__VA_ARGS__); \
__r; \
})
diff --git a/tools/lib/bpf/bpf_endian.h b/tools/lib/bpf/bpf_endian.h
index fbe28008450f..ec9db4feca9f 100644
--- a/tools/lib/bpf/bpf_endian.h
+++ b/tools/lib/bpf/bpf_endian.h
@@ -2,8 +2,35 @@
#ifndef __BPF_ENDIAN__
#define __BPF_ENDIAN__
-#include <linux/stddef.h>
-#include <linux/swab.h>
+/*
+ * Isolate byte #n and put it into byte #m, for __u##b type.
+ * E.g., moving byte #6 (nnnnnnnn) into byte #1 (mmmmmmmm) for __u64:
+ * 1) xxxxxxxx nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx
+ * 2) nnnnnnnn xxxxxxxx xxxxxxxx xxxxxxxx xxxxxxxx mmmmmmmm xxxxxxxx 00000000
+ * 3) 00000000 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn
+ * 4) 00000000 00000000 00000000 00000000 00000000 00000000 nnnnnnnn 00000000
+ */
+#define ___bpf_mvb(x, b, n, m) ((__u##b)(x) << (b-(n+1)*8) >> (b-8) << (m*8))
+
+#define ___bpf_swab16(x) ((__u16)( \
+ ___bpf_mvb(x, 16, 0, 1) | \
+ ___bpf_mvb(x, 16, 1, 0)))
+
+#define ___bpf_swab32(x) ((__u32)( \
+ ___bpf_mvb(x, 32, 0, 3) | \
+ ___bpf_mvb(x, 32, 1, 2) | \
+ ___bpf_mvb(x, 32, 2, 1) | \
+ ___bpf_mvb(x, 32, 3, 0)))
+
+#define ___bpf_swab64(x) ((__u64)( \
+ ___bpf_mvb(x, 64, 0, 7) | \
+ ___bpf_mvb(x, 64, 1, 6) | \
+ ___bpf_mvb(x, 64, 2, 5) | \
+ ___bpf_mvb(x, 64, 3, 4) | \
+ ___bpf_mvb(x, 64, 4, 3) | \
+ ___bpf_mvb(x, 64, 5, 2) | \
+ ___bpf_mvb(x, 64, 6, 1) | \
+ ___bpf_mvb(x, 64, 7, 0)))
/* LLVM's BPF target selects the endianness of the CPU
* it compiles on, or the user specifies (bpfel/bpfeb),
@@ -23,16 +50,16 @@
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
# define __bpf_ntohs(x) __builtin_bswap16(x)
# define __bpf_htons(x) __builtin_bswap16(x)
-# define __bpf_constant_ntohs(x) ___constant_swab16(x)
-# define __bpf_constant_htons(x) ___constant_swab16(x)
+# define __bpf_constant_ntohs(x) ___bpf_swab16(x)
+# define __bpf_constant_htons(x) ___bpf_swab16(x)
# define __bpf_ntohl(x) __builtin_bswap32(x)
# define __bpf_htonl(x) __builtin_bswap32(x)
-# define __bpf_constant_ntohl(x) ___constant_swab32(x)
-# define __bpf_constant_htonl(x) ___constant_swab32(x)
+# define __bpf_constant_ntohl(x) ___bpf_swab32(x)
+# define __bpf_constant_htonl(x) ___bpf_swab32(x)
# define __bpf_be64_to_cpu(x) __builtin_bswap64(x)
# define __bpf_cpu_to_be64(x) __builtin_bswap64(x)
-# define __bpf_constant_be64_to_cpu(x) ___constant_swab64(x)
-# define __bpf_constant_cpu_to_be64(x) ___constant_swab64(x)
+# define __bpf_constant_be64_to_cpu(x) ___bpf_swab64(x)
+# define __bpf_constant_cpu_to_be64(x) ___bpf_swab64(x)
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
# define __bpf_ntohs(x) (x)
# define __bpf_htons(x) (x)
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index f67dce2af802..a510d8ed716f 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -75,5 +75,6 @@ enum libbpf_tristate {
};
#define __kconfig __attribute__((section(".kconfig")))
+#define __ksym __attribute__((section(".ksyms")))
#endif
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 70c1b7ec2bd0..06cd1731c154 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -168,6 +168,11 @@ static inline bool btf_kflag(const struct btf_type *t)
return BTF_INFO_KFLAG(t->info);
}
+static inline bool btf_is_void(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_UNKN;
+}
+
static inline bool btf_is_int(const struct btf_type *t)
{
return btf_kind(t) == BTF_KIND_INT;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 11e4725b8b1c..51fd9b6c6a77 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -230,6 +230,7 @@ struct bpf_program {
struct bpf_insn *insns;
size_t insns_cnt, main_prog_cnt;
enum bpf_prog_type type;
+ bool load;
struct reloc_desc *reloc_desc;
int nr_reloc;
@@ -285,6 +286,7 @@ struct bpf_struct_ops {
#define BSS_SEC ".bss"
#define RODATA_SEC ".rodata"
#define KCONFIG_SEC ".kconfig"
+#define KSYMS_SEC ".ksyms"
#define STRUCT_OPS_SEC ".struct_ops"
enum libbpf_map_type {
@@ -310,6 +312,7 @@ struct bpf_map {
int map_ifindex;
int inner_map_fd;
struct bpf_map_def def;
+ __u32 numa_node;
__u32 btf_var_idx;
__u32 btf_key_type_id;
__u32 btf_value_type_id;
@@ -329,24 +332,39 @@ struct bpf_map {
enum extern_type {
EXT_UNKNOWN,
- EXT_CHAR,
- EXT_BOOL,
- EXT_INT,
- EXT_TRISTATE,
- EXT_CHAR_ARR,
+ EXT_KCFG,
+ EXT_KSYM,
+};
+
+enum kcfg_type {
+ KCFG_UNKNOWN,
+ KCFG_CHAR,
+ KCFG_BOOL,
+ KCFG_INT,
+ KCFG_TRISTATE,
+ KCFG_CHAR_ARR,
};
struct extern_desc {
- const char *name;
+ enum extern_type type;
int sym_idx;
int btf_id;
- enum extern_type type;
- int sz;
- int align;
- int data_off;
- bool is_signed;
- bool is_weak;
+ int sec_btf_id;
+ const char *name;
bool is_set;
+ bool is_weak;
+ union {
+ struct {
+ enum kcfg_type type;
+ int sz;
+ int align;
+ int data_off;
+ bool is_signed;
+ } kcfg;
+ struct {
+ unsigned long long addr;
+ } ksym;
+ };
};
static LIST_HEAD(bpf_objects_list);
@@ -524,6 +542,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,
prog->instances.fds = NULL;
prog->instances.nr = -1;
prog->type = BPF_PROG_TYPE_UNSPEC;
+ prog->load = true;
return 0;
errout:
@@ -1423,19 +1442,19 @@ static struct extern_desc *find_extern_by_name(const struct bpf_object *obj,
return NULL;
}
-static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
- char value)
+static int set_kcfg_value_tri(struct extern_desc *ext, void *ext_val,
+ char value)
{
- switch (ext->type) {
- case EXT_BOOL:
+ switch (ext->kcfg.type) {
+ case KCFG_BOOL:
if (value == 'm') {
- pr_warn("extern %s=%c should be tristate or char\n",
+ pr_warn("extern (kcfg) %s=%c should be tristate or char\n",
ext->name, value);
return -EINVAL;
}
*(bool *)ext_val = value == 'y' ? true : false;
break;
- case EXT_TRISTATE:
+ case KCFG_TRISTATE:
if (value == 'y')
*(enum libbpf_tristate *)ext_val = TRI_YES;
else if (value == 'm')
@@ -1443,14 +1462,14 @@ static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
else /* value == 'n' */
*(enum libbpf_tristate *)ext_val = TRI_NO;
break;
- case EXT_CHAR:
+ case KCFG_CHAR:
*(char *)ext_val = value;
break;
- case EXT_UNKNOWN:
- case EXT_INT:
- case EXT_CHAR_ARR:
+ case KCFG_UNKNOWN:
+ case KCFG_INT:
+ case KCFG_CHAR_ARR:
default:
- pr_warn("extern %s=%c should be bool, tristate, or char\n",
+ pr_warn("extern (kcfg) %s=%c should be bool, tristate, or char\n",
ext->name, value);
return -EINVAL;
}
@@ -1458,29 +1477,29 @@ static int set_ext_value_tri(struct extern_desc *ext, void *ext_val,
return 0;
}
-static int set_ext_value_str(struct extern_desc *ext, char *ext_val,
- const char *value)
+static int set_kcfg_value_str(struct extern_desc *ext, char *ext_val,
+ const char *value)
{
size_t len;
- if (ext->type != EXT_CHAR_ARR) {
- pr_warn("extern %s=%s should char array\n", ext->name, value);
+ if (ext->kcfg.type != KCFG_CHAR_ARR) {
+ pr_warn("extern (kcfg) %s=%s should be char array\n", ext->name, value);
return -EINVAL;
}
len = strlen(value);
if (value[len - 1] != '"') {
- pr_warn("extern '%s': invalid string config '%s'\n",
+ pr_warn("extern (kcfg) '%s': invalid string config '%s'\n",
ext->name, value);
return -EINVAL;
}
/* strip quotes */
len -= 2;
- if (len >= ext->sz) {
- pr_warn("extern '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
- ext->name, value, len, ext->sz - 1);
- len = ext->sz - 1;
+ if (len >= ext->kcfg.sz) {
+ pr_warn("extern (kcfg) '%s': long string config %s of (%zu bytes) truncated to %d bytes\n",
+ ext->name, value, len, ext->kcfg.sz - 1);
+ len = ext->kcfg.sz - 1;
}
memcpy(ext_val, value + 1, len);
ext_val[len] = '\0';
@@ -1507,11 +1526,11 @@ static int parse_u64(const char *value, __u64 *res)
return 0;
}
-static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v)
+static bool is_kcfg_value_in_range(const struct extern_desc *ext, __u64 v)
{
- int bit_sz = ext->sz * 8;
+ int bit_sz = ext->kcfg.sz * 8;
- if (ext->sz == 8)
+ if (ext->kcfg.sz == 8)
return true;
/* Validate that value stored in u64 fits in integer of `ext->sz`
@@ -1526,26 +1545,26 @@ static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v)
* For unsigned target integer, check that all the (64 - Y) bits are
* zero.
*/
- if (ext->is_signed)
+ if (ext->kcfg.is_signed)
return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz);
else
return (v >> bit_sz) == 0;
}
-static int set_ext_value_num(struct extern_desc *ext, void *ext_val,
- __u64 value)
+static int set_kcfg_value_num(struct extern_desc *ext, void *ext_val,
+ __u64 value)
{
- if (ext->type != EXT_INT && ext->type != EXT_CHAR) {
- pr_warn("extern %s=%llu should be integer\n",
+ if (ext->kcfg.type != KCFG_INT && ext->kcfg.type != KCFG_CHAR) {
+ pr_warn("extern (kcfg) %s=%llu should be integer\n",
ext->name, (unsigned long long)value);
return -EINVAL;
}
- if (!is_ext_value_in_range(ext, value)) {
- pr_warn("extern %s=%llu value doesn't fit in %d bytes\n",
- ext->name, (unsigned long long)value, ext->sz);
+ if (!is_kcfg_value_in_range(ext, value)) {
+ pr_warn("extern (kcfg) %s=%llu value doesn't fit in %d bytes\n",
+ ext->name, (unsigned long long)value, ext->kcfg.sz);
return -ERANGE;
}
- switch (ext->sz) {
+ switch (ext->kcfg.sz) {
case 1: *(__u8 *)ext_val = value; break;
case 2: *(__u16 *)ext_val = value; break;
case 4: *(__u32 *)ext_val = value; break;
@@ -1591,30 +1610,30 @@ static int bpf_object__process_kconfig_line(struct bpf_object *obj,
if (!ext || ext->is_set)
return 0;
- ext_val = data + ext->data_off;
+ ext_val = data + ext->kcfg.data_off;
value = sep + 1;
switch (*value) {
case 'y': case 'n': case 'm':
- err = set_ext_value_tri(ext, ext_val, *value);
+ err = set_kcfg_value_tri(ext, ext_val, *value);
break;
case '"':
- err = set_ext_value_str(ext, ext_val, value);
+ err = set_kcfg_value_str(ext, ext_val, value);
break;
default:
/* assume integer */
err = parse_u64(value, &num);
if (err) {
- pr_warn("extern %s=%s should be integer\n",
+ pr_warn("extern (kcfg) %s=%s should be integer\n",
ext->name, value);
return err;
}
- err = set_ext_value_num(ext, ext_val, num);
+ err = set_kcfg_value_num(ext, ext_val, num);
break;
}
if (err)
return err;
- pr_debug("extern %s=%s\n", ext->name, value);
+ pr_debug("extern (kcfg) %s=%s\n", ext->name, value);
return 0;
}
@@ -1685,16 +1704,20 @@ static int bpf_object__read_kconfig_mem(struct bpf_object *obj,
static int bpf_object__init_kconfig_map(struct bpf_object *obj)
{
- struct extern_desc *last_ext;
+ struct extern_desc *last_ext = NULL, *ext;
size_t map_sz;
- int err;
+ int i, err;
- if (obj->nr_extern == 0)
- return 0;
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ if (ext->type == EXT_KCFG)
+ last_ext = ext;
+ }
- last_ext = &obj->externs[obj->nr_extern - 1];
- map_sz = last_ext->data_off + last_ext->sz;
+ if (!last_ext)
+ return 0;
+ map_sz = last_ext->kcfg.data_off + last_ext->kcfg.sz;
err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG,
obj->efile.symbols_shndx,
NULL, map_sz);
@@ -1957,6 +1980,10 @@ static int parse_btf_map_def(struct bpf_object *obj,
return -EINVAL;
pr_debug("map '%s': found map_flags = %u.\n",
map->name, map->def.map_flags);
+ } else if (strcmp(name, "numa_node") == 0) {
+ if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
+ return -EINVAL;
+ pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
} else if (strcmp(name, "key_size") == 0) {
__u32 sz;
@@ -2479,22 +2506,33 @@ static inline bool libbpf_prog_needs_vmlinux_btf(struct bpf_program *prog)
static int bpf_object__load_vmlinux_btf(struct bpf_object *obj)
{
+ bool need_vmlinux_btf = false;
struct bpf_program *prog;
int err;
+ /* CO-RE relocations need kernel BTF */
+ if (obj->btf_ext && obj->btf_ext->field_reloc_info.len)
+ need_vmlinux_btf = true;
+
bpf_object__for_each_program(prog, obj) {
+ if (!prog->load)
+ continue;
if (libbpf_prog_needs_vmlinux_btf(prog)) {
- obj->btf_vmlinux = libbpf_find_kernel_btf();
- if (IS_ERR(obj->btf_vmlinux)) {
- err = PTR_ERR(obj->btf_vmlinux);
- pr_warn("Error loading vmlinux BTF: %d\n", err);
- obj->btf_vmlinux = NULL;
- return err;
- }
- return 0;
+ need_vmlinux_btf = true;
+ break;
}
}
+ if (!need_vmlinux_btf)
+ return 0;
+
+ obj->btf_vmlinux = libbpf_find_kernel_btf();
+ if (IS_ERR(obj->btf_vmlinux)) {
+ err = PTR_ERR(obj->btf_vmlinux);
+ pr_warn("Error loading vmlinux BTF: %d\n", err);
+ obj->btf_vmlinux = NULL;
+ return err;
+ }
return 0;
}
@@ -2709,8 +2747,33 @@ static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
return -ENOENT;
}
-static enum extern_type find_extern_type(const struct btf *btf, int id,
- bool *is_signed)
+static int find_extern_sec_btf_id(struct btf *btf, int ext_btf_id) {
+ const struct btf_var_secinfo *vs;
+ const struct btf_type *t;
+ int i, j, n;
+
+ if (!btf)
+ return -ESRCH;
+
+ n = btf__get_nr_types(btf);
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(btf, i);
+
+ if (!btf_is_datasec(t))
+ continue;
+
+ vs = btf_var_secinfos(t);
+ for (j = 0; j < btf_vlen(t); j++, vs++) {
+ if (vs->type == ext_btf_id)
+ return i;
+ }
+ }
+
+ return -ENOENT;
+}
+
+static enum kcfg_type find_kcfg_type(const struct btf *btf, int id,
+ bool *is_signed)
{
const struct btf_type *t;
const char *name;
@@ -2725,29 +2788,29 @@ static enum extern_type find_extern_type(const struct btf *btf, int id,
int enc = btf_int_encoding(t);
if (enc & BTF_INT_BOOL)
- return t->size == 1 ? EXT_BOOL : EXT_UNKNOWN;
+ return t->size == 1 ? KCFG_BOOL : KCFG_UNKNOWN;
if (is_signed)
*is_signed = enc & BTF_INT_SIGNED;
if (t->size == 1)
- return EXT_CHAR;
+ return KCFG_CHAR;
if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1)))
- return EXT_UNKNOWN;
- return EXT_INT;
+ return KCFG_UNKNOWN;
+ return KCFG_INT;
}
case BTF_KIND_ENUM:
if (t->size != 4)
- return EXT_UNKNOWN;
+ return KCFG_UNKNOWN;
if (strcmp(name, "libbpf_tristate"))
- return EXT_UNKNOWN;
- return EXT_TRISTATE;
+ return KCFG_UNKNOWN;
+ return KCFG_TRISTATE;
case BTF_KIND_ARRAY:
if (btf_array(t)->nelems == 0)
- return EXT_UNKNOWN;
- if (find_extern_type(btf, btf_array(t)->type, NULL) != EXT_CHAR)
- return EXT_UNKNOWN;
- return EXT_CHAR_ARR;
+ return KCFG_UNKNOWN;
+ if (find_kcfg_type(btf, btf_array(t)->type, NULL) != KCFG_CHAR)
+ return KCFG_UNKNOWN;
+ return KCFG_CHAR_ARR;
default:
- return EXT_UNKNOWN;
+ return KCFG_UNKNOWN;
}
}
@@ -2756,23 +2819,45 @@ static int cmp_externs(const void *_a, const void *_b)
const struct extern_desc *a = _a;
const struct extern_desc *b = _b;
- /* descending order by alignment requirements */
- if (a->align != b->align)
- return a->align > b->align ? -1 : 1;
- /* ascending order by size, within same alignment class */
- if (a->sz != b->sz)
- return a->sz < b->sz ? -1 : 1;
+ if (a->type != b->type)
+ return a->type < b->type ? -1 : 1;
+
+ if (a->type == EXT_KCFG) {
+ /* descending order by alignment requirements */
+ if (a->kcfg.align != b->kcfg.align)
+ return a->kcfg.align > b->kcfg.align ? -1 : 1;
+ /* ascending order by size, within same alignment class */
+ if (a->kcfg.sz != b->kcfg.sz)
+ return a->kcfg.sz < b->kcfg.sz ? -1 : 1;
+ }
+
/* resolve ties by name */
return strcmp(a->name, b->name);
}
+static int find_int_btf_id(const struct btf *btf)
+{
+ const struct btf_type *t;
+ int i, n;
+
+ n = btf__get_nr_types(btf);
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(btf, i);
+
+ if (btf_is_int(t) && btf_int_bits(t) == 32)
+ return i;
+ }
+
+ return 0;
+}
+
static int bpf_object__collect_externs(struct bpf_object *obj)
{
+ struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
const struct btf_type *t;
struct extern_desc *ext;
- int i, n, off, btf_id;
- struct btf_type *sec;
- const char *ext_name;
+ int i, n, off;
+ const char *ext_name, *sec_name;
Elf_Scn *scn;
GElf_Shdr sh;
@@ -2818,22 +2903,50 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
ext->name = btf__name_by_offset(obj->btf, t->name_off);
ext->sym_idx = i;
ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK;
- ext->sz = btf__resolve_size(obj->btf, t->type);
- if (ext->sz <= 0) {
- pr_warn("failed to resolve size of extern '%s': %d\n",
- ext_name, ext->sz);
- return ext->sz;
- }
- ext->align = btf__align_of(obj->btf, t->type);
- if (ext->align <= 0) {
- pr_warn("failed to determine alignment of extern '%s': %d\n",
- ext_name, ext->align);
- return -EINVAL;
- }
- ext->type = find_extern_type(obj->btf, t->type,
- &ext->is_signed);
- if (ext->type == EXT_UNKNOWN) {
- pr_warn("extern '%s' type is unsupported\n", ext_name);
+
+ ext->sec_btf_id = find_extern_sec_btf_id(obj->btf, ext->btf_id);
+ if (ext->sec_btf_id <= 0) {
+ pr_warn("failed to find BTF for extern '%s' [%d] section: %d\n",
+ ext_name, ext->btf_id, ext->sec_btf_id);
+ return ext->sec_btf_id;
+ }
+ sec = (void *)btf__type_by_id(obj->btf, ext->sec_btf_id);
+ sec_name = btf__name_by_offset(obj->btf, sec->name_off);
+
+ if (strcmp(sec_name, KCONFIG_SEC) == 0) {
+ kcfg_sec = sec;
+ ext->type = EXT_KCFG;
+ ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
+ if (ext->kcfg.sz <= 0) {
+ pr_warn("failed to resolve size of extern (kcfg) '%s': %d\n",
+ ext_name, ext->kcfg.sz);
+ return ext->kcfg.sz;
+ }
+ ext->kcfg.align = btf__align_of(obj->btf, t->type);
+ if (ext->kcfg.align <= 0) {
+ pr_warn("failed to determine alignment of extern (kcfg) '%s': %d\n",
+ ext_name, ext->kcfg.align);
+ return -EINVAL;
+ }
+ ext->kcfg.type = find_kcfg_type(obj->btf, t->type,
+ &ext->kcfg.is_signed);
+ if (ext->kcfg.type == KCFG_UNKNOWN) {
+ pr_warn("extern (kcfg) '%s' type is unsupported\n", ext_name);
+ return -ENOTSUP;
+ }
+ } else if (strcmp(sec_name, KSYMS_SEC) == 0) {
+ const struct btf_type *vt;
+
+ ksym_sec = sec;
+ ext->type = EXT_KSYM;
+
+ vt = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+ if (!btf_is_void(vt)) {
+ pr_warn("extern (ksym) '%s' is not typeless (void)\n", ext_name);
+ return -ENOTSUP;
+ }
+ } else {
+ pr_warn("unrecognized extern section '%s'\n", sec_name);
return -ENOTSUP;
}
}
@@ -2842,42 +2955,80 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
if (!obj->nr_extern)
return 0;
- /* sort externs by (alignment, size, name) and calculate their offsets
- * within a map */
+ /* sort externs by type, for kcfg ones also by (align, size, name) */
qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs);
- off = 0;
- for (i = 0; i < obj->nr_extern; i++) {
- ext = &obj->externs[i];
- ext->data_off = roundup(off, ext->align);
- off = ext->data_off + ext->sz;
- pr_debug("extern #%d: symbol %d, off %u, name %s\n",
- i, ext->sym_idx, ext->data_off, ext->name);
- }
- btf_id = btf__find_by_name(obj->btf, KCONFIG_SEC);
- if (btf_id <= 0) {
- pr_warn("no BTF info found for '%s' datasec\n", KCONFIG_SEC);
- return -ESRCH;
- }
+ /* for .ksyms section, we need to turn all externs into allocated
+ * variables in BTF to pass kernel verification; we do this by
+ * pretending that each extern is a 8-byte variable
+ */
+ if (ksym_sec) {
+ /* find existing 4-byte integer type in BTF to use for fake
+ * extern variables in DATASEC
+ */
+ int int_btf_id = find_int_btf_id(obj->btf);
- sec = (struct btf_type *)btf__type_by_id(obj->btf, btf_id);
- sec->size = off;
- n = btf_vlen(sec);
- for (i = 0; i < n; i++) {
- struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
-
- t = btf__type_by_id(obj->btf, vs->type);
- ext_name = btf__name_by_offset(obj->btf, t->name_off);
- ext = find_extern_by_name(obj, ext_name);
- if (!ext) {
- pr_warn("failed to find extern definition for BTF var '%s'\n",
- ext_name);
- return -ESRCH;
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ if (ext->type != EXT_KSYM)
+ continue;
+ pr_debug("extern (ksym) #%d: symbol %d, name %s\n",
+ i, ext->sym_idx, ext->name);
+ }
+
+ sec = ksym_sec;
+ n = btf_vlen(sec);
+ for (i = 0, off = 0; i < n; i++, off += sizeof(int)) {
+ struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
+ struct btf_type *vt;
+
+ vt = (void *)btf__type_by_id(obj->btf, vs->type);
+ ext_name = btf__name_by_offset(obj->btf, vt->name_off);
+ ext = find_extern_by_name(obj, ext_name);
+ if (!ext) {
+ pr_warn("failed to find extern definition for BTF var '%s'\n",
+ ext_name);
+ return -ESRCH;
+ }
+ btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ vt->type = int_btf_id;
+ vs->offset = off;
+ vs->size = sizeof(int);
}
- vs->offset = ext->data_off;
- btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ sec->size = off;
}
+ if (kcfg_sec) {
+ sec = kcfg_sec;
+ /* for kcfg externs calculate their offsets within a .kconfig map */
+ off = 0;
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ if (ext->type != EXT_KCFG)
+ continue;
+
+ ext->kcfg.data_off = roundup(off, ext->kcfg.align);
+ off = ext->kcfg.data_off + ext->kcfg.sz;
+ pr_debug("extern (kcfg) #%d: symbol %d, off %u, name %s\n",
+ i, ext->sym_idx, ext->kcfg.data_off, ext->name);
+ }
+ sec->size = off;
+ n = btf_vlen(sec);
+ for (i = 0; i < n; i++) {
+ struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i;
+
+ t = btf__type_by_id(obj->btf, vs->type);
+ ext_name = btf__name_by_offset(obj->btf, t->name_off);
+ ext = find_extern_by_name(obj, ext_name);
+ if (!ext) {
+ pr_warn("failed to find extern definition for BTF var '%s'\n",
+ ext_name);
+ return -ESRCH;
+ }
+ btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ vs->offset = ext->kcfg.data_off;
+ }
+ }
return 0;
}
@@ -3007,11 +3158,11 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
sym_idx);
return -LIBBPF_ERRNO__RELOC;
}
- pr_debug("found extern #%d '%s' (sym %d, off %u) for insn %u\n",
- i, ext->name, ext->sym_idx, ext->data_off, insn_idx);
+ pr_debug("found extern #%d '%s' (sym %d) for insn %u\n",
+ i, ext->name, ext->sym_idx, insn_idx);
reloc_desc->type = RELO_EXTERN;
reloc_desc->insn_idx = insn_idx;
- reloc_desc->sym_off = ext->data_off;
+ reloc_desc->sym_off = i; /* sym_off stores extern index */
return 0;
}
@@ -3222,20 +3373,27 @@ err_free_new_name:
return err;
}
-int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
+__u32 bpf_map__max_entries(const struct bpf_map *map)
{
- if (!map || !max_entries)
- return -EINVAL;
+ return map->def.max_entries;
+}
- /* If map already created, its attributes can't be changed. */
+int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
+{
if (map->fd >= 0)
return -EBUSY;
-
map->def.max_entries = max_entries;
-
return 0;
}
+int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
+{
+ if (!map || !max_entries)
+ return -EINVAL;
+
+ return bpf_map__set_max_entries(map, max_entries);
+}
+
static int
bpf_object__probe_loading(struct bpf_object *obj)
{
@@ -3603,6 +3761,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
create_attr.map_flags = def->map_flags;
create_attr.key_size = def->key_size;
create_attr.value_size = def->value_size;
+ create_attr.numa_node = map->numa_node;
if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
int nr_cpus;
@@ -4799,8 +4958,8 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
if (targ_btf_path)
targ_btf = btf__parse_elf(targ_btf_path, NULL);
else
- targ_btf = libbpf_find_kernel_btf();
- if (IS_ERR(targ_btf)) {
+ targ_btf = obj->btf_vmlinux;
+ if (IS_ERR_OR_NULL(targ_btf)) {
pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));
return PTR_ERR(targ_btf);
}
@@ -4847,7 +5006,9 @@ bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)
}
out:
- btf__free(targ_btf);
+ /* obj->btf_vmlinux is freed at the end of object load phase */
+ if (targ_btf != obj->btf_vmlinux)
+ btf__free(targ_btf);
if (!IS_ERR_OR_NULL(cand_cache)) {
hashmap__for_each_entry(cand_cache, entry, i) {
bpf_core_free_cands(entry->value);
@@ -4934,6 +5095,7 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
for (i = 0; i < prog->nr_reloc; i++) {
struct reloc_desc *relo = &prog->reloc_desc[i];
struct bpf_insn *insn = &prog->insns[relo->insn_idx];
+ struct extern_desc *ext;
if (relo->insn_idx + 1 >= (int)prog->insns_cnt) {
pr_warn("relocation out of range: '%s'\n",
@@ -4952,9 +5114,15 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)
insn[0].imm = obj->maps[relo->map_idx].fd;
break;
case RELO_EXTERN:
- insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
- insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
- insn[1].imm = relo->sym_off;
+ ext = &obj->externs[relo->sym_off];
+ if (ext->type == EXT_KCFG) {
+ insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
+ insn[0].imm = obj->maps[obj->kconfig_map_idx].fd;
+ insn[1].imm = ext->kcfg.data_off;
+ } else /* EXT_KSYM */ {
+ insn[0].imm = (__u32)ext->ksym.addr;
+ insn[1].imm = ext->ksym.addr >> 32;
+ }
break;
case RELO_CALL:
err = bpf_program__reloc_text(prog, obj, relo);
@@ -5287,6 +5455,12 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)
{
int err = 0, fd, i, btf_id;
+ if (prog->obj->loaded) {
+ pr_warn("prog '%s'('%s'): can't load after object was loaded\n",
+ prog->name, prog->section_name);
+ return -EINVAL;
+ }
+
if ((prog->type == BPF_PROG_TYPE_TRACING ||
prog->type == BPF_PROG_TYPE_LSM ||
prog->type == BPF_PROG_TYPE_EXT) && !prog->attach_btf_id) {
@@ -5375,16 +5549,21 @@ static bool bpf_program__is_function_storage(const struct bpf_program *prog,
static int
bpf_object__load_progs(struct bpf_object *obj, int log_level)
{
+ struct bpf_program *prog;
size_t i;
int err;
for (i = 0; i < obj->nr_programs; i++) {
- if (bpf_program__is_function_storage(&obj->programs[i], obj))
+ prog = &obj->programs[i];
+ if (bpf_program__is_function_storage(prog, obj))
continue;
- obj->programs[i].log_level |= log_level;
- err = bpf_program__load(&obj->programs[i],
- obj->license,
- obj->kern_version);
+ if (!prog->load) {
+ pr_debug("prog '%s'('%s'): skipped loading\n",
+ prog->name, prog->section_name);
+ continue;
+ }
+ prog->log_level |= log_level;
+ err = bpf_program__load(prog, obj->license, obj->kern_version);
if (err)
return err;
}
@@ -5573,56 +5752,114 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)
return 0;
}
+static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
+{
+ char sym_type, sym_name[500];
+ unsigned long long sym_addr;
+ struct extern_desc *ext;
+ int ret, err = 0;
+ FILE *f;
+
+ f = fopen("/proc/kallsyms", "r");
+ if (!f) {
+ err = -errno;
+ pr_warn("failed to open /proc/kallsyms: %d\n", err);
+ return err;
+ }
+
+ while (true) {
+ ret = fscanf(f, "%llx %c %499s%*[^\n]\n",
+ &sym_addr, &sym_type, sym_name);
+ if (ret == EOF && feof(f))
+ break;
+ if (ret != 3) {
+ pr_warn("failed to read kallsyms entry: %d\n", ret);
+ err = -EINVAL;
+ goto out;
+ }
+
+ ext = find_extern_by_name(obj, sym_name);
+ if (!ext || ext->type != EXT_KSYM)
+ continue;
+
+ if (ext->is_set && ext->ksym.addr != sym_addr) {
+ pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
+ sym_name, ext->ksym.addr, sym_addr);
+ err = -EINVAL;
+ goto out;
+ }
+ if (!ext->is_set) {
+ ext->is_set = true;
+ ext->ksym.addr = sym_addr;
+ pr_debug("extern (ksym) %s=0x%llx\n", sym_name, sym_addr);
+ }
+ }
+
+out:
+ fclose(f);
+ return err;
+}
+
static int bpf_object__resolve_externs(struct bpf_object *obj,
const char *extra_kconfig)
{
- bool need_config = false;
+ bool need_config = false, need_kallsyms = false;
struct extern_desc *ext;
+ void *kcfg_data = NULL;
int err, i;
- void *data;
if (obj->nr_extern == 0)
return 0;
- data = obj->maps[obj->kconfig_map_idx].mmaped;
+ if (obj->kconfig_map_idx >= 0)
+ kcfg_data = obj->maps[obj->kconfig_map_idx].mmaped;
for (i = 0; i < obj->nr_extern; i++) {
ext = &obj->externs[i];
- if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
- void *ext_val = data + ext->data_off;
+ if (ext->type == EXT_KCFG &&
+ strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) {
+ void *ext_val = kcfg_data + ext->kcfg.data_off;
__u32 kver = get_kernel_version();
if (!kver) {
pr_warn("failed to get kernel version\n");
return -EINVAL;
}
- err = set_ext_value_num(ext, ext_val, kver);
+ err = set_kcfg_value_num(ext, ext_val, kver);
if (err)
return err;
- pr_debug("extern %s=0x%x\n", ext->name, kver);
- } else if (strncmp(ext->name, "CONFIG_", 7) == 0) {
+ pr_debug("extern (kcfg) %s=0x%x\n", ext->name, kver);
+ } else if (ext->type == EXT_KCFG &&
+ strncmp(ext->name, "CONFIG_", 7) == 0) {
need_config = true;
+ } else if (ext->type == EXT_KSYM) {
+ need_kallsyms = true;
} else {
pr_warn("unrecognized extern '%s'\n", ext->name);
return -EINVAL;
}
}
if (need_config && extra_kconfig) {
- err = bpf_object__read_kconfig_mem(obj, extra_kconfig, data);
+ err = bpf_object__read_kconfig_mem(obj, extra_kconfig, kcfg_data);
if (err)
return -EINVAL;
need_config = false;
for (i = 0; i < obj->nr_extern; i++) {
ext = &obj->externs[i];
- if (!ext->is_set) {
+ if (ext->type == EXT_KCFG && !ext->is_set) {
need_config = true;
break;
}
}
}
if (need_config) {
- err = bpf_object__read_kconfig_file(obj, data);
+ err = bpf_object__read_kconfig_file(obj, kcfg_data);
+ if (err)
+ return -EINVAL;
+ }
+ if (need_kallsyms) {
+ err = bpf_object__read_kallsyms_file(obj);
if (err)
return -EINVAL;
}
@@ -5653,12 +5890,10 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
return -EINVAL;
if (obj->loaded) {
- pr_warn("object should not be loaded twice\n");
+ pr_warn("object '%s': load can't be attempted twice\n", obj->name);
return -EINVAL;
}
- obj->loaded = true;
-
err = bpf_object__probe_loading(obj);
err = err ? : bpf_object__probe_caps(obj);
err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
@@ -5673,6 +5908,8 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
btf__free(obj->btf_vmlinux);
obj->btf_vmlinux = NULL;
+ obj->loaded = true; /* doesn't matter if successfully or not */
+
if (err)
goto out;
@@ -6445,6 +6682,20 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)
return title;
}
+bool bpf_program__autoload(const struct bpf_program *prog)
+{
+ return prog->load;
+}
+
+int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
+{
+ if (prog->obj->loaded)
+ return -EINVAL;
+
+ prog->load = autoload;
+ return 0;
+}
+
int bpf_program__fd(const struct bpf_program *prog)
{
return bpf_program__nth_fd(prog, 0);
@@ -7094,6 +7345,71 @@ const char *bpf_map__name(const struct bpf_map *map)
return map ? map->name : NULL;
}
+enum bpf_map_type bpf_map__type(const struct bpf_map *map)
+{
+ return map->def.type;
+}
+
+int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
+ map->def.type = type;
+ return 0;
+}
+
+__u32 bpf_map__map_flags(const struct bpf_map *map)
+{
+ return map->def.map_flags;
+}
+
+int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
+ map->def.map_flags = flags;
+ return 0;
+}
+
+__u32 bpf_map__numa_node(const struct bpf_map *map)
+{
+ return map->numa_node;
+}
+
+int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
+ map->numa_node = numa_node;
+ return 0;
+}
+
+__u32 bpf_map__key_size(const struct bpf_map *map)
+{
+ return map->def.key_size;
+}
+
+int bpf_map__set_key_size(struct bpf_map *map, __u32 size)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
+ map->def.key_size = size;
+ return 0;
+}
+
+__u32 bpf_map__value_size(const struct bpf_map *map)
+{
+ return map->def.value_size;
+}
+
+int bpf_map__set_value_size(struct bpf_map *map, __u32 size)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
+ map->def.value_size = size;
+ return 0;
+}
+
__u32 bpf_map__btf_key_type_id(const struct bpf_map *map)
{
return map ? map->btf_key_type_id : 0;
@@ -7146,9 +7462,17 @@ bool bpf_map__is_internal(const struct bpf_map *map)
return map->libbpf_type != LIBBPF_MAP_UNSPEC;
}
-void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
+__u32 bpf_map__ifindex(const struct bpf_map *map)
{
+ return map->map_ifindex;
+}
+
+int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)
+{
+ if (map->fd >= 0)
+ return -EBUSY;
map->map_ifindex = ifindex;
+ return 0;
}
int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
@@ -8994,6 +9318,9 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
const struct bpf_sec_def *sec_def;
const char *sec_name = bpf_program__title(prog, false);
+ if (!prog->load)
+ continue;
+
sec_def = find_sec_def(sec_name);
if (!sec_def || !sec_def->attach_fn)
continue;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 334437af3014..2335971ed0bd 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -200,6 +200,8 @@ LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,
LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,
bool needs_copy);
+LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog);
+LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload);
/* returns program size in bytes */
LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog);
@@ -418,11 +420,38 @@ bpf_map__next(const struct bpf_map *map, const struct bpf_object *obj);
LIBBPF_API struct bpf_map *
bpf_map__prev(const struct bpf_map *map, const struct bpf_object *obj);
+/* get/set map FD */
LIBBPF_API int bpf_map__fd(const struct bpf_map *map);
+LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
+/* get map definition */
LIBBPF_API const struct bpf_map_def *bpf_map__def(const struct bpf_map *map);
+/* get map name */
LIBBPF_API const char *bpf_map__name(const struct bpf_map *map);
+/* get/set map type */
+LIBBPF_API enum bpf_map_type bpf_map__type(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_type(struct bpf_map *map, enum bpf_map_type type);
+/* get/set map size (max_entries) */
+LIBBPF_API __u32 bpf_map__max_entries(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries);
+LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
+/* get/set map flags */
+LIBBPF_API __u32 bpf_map__map_flags(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_map_flags(struct bpf_map *map, __u32 flags);
+/* get/set map NUMA node */
+LIBBPF_API __u32 bpf_map__numa_node(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_numa_node(struct bpf_map *map, __u32 numa_node);
+/* get/set map key size */
+LIBBPF_API __u32 bpf_map__key_size(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_key_size(struct bpf_map *map, __u32 size);
+/* get/set map value size */
+LIBBPF_API __u32 bpf_map__value_size(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_value_size(struct bpf_map *map, __u32 size);
+/* get map key/value BTF type IDs */
LIBBPF_API __u32 bpf_map__btf_key_type_id(const struct bpf_map *map);
LIBBPF_API __u32 bpf_map__btf_value_type_id(const struct bpf_map *map);
+/* get/set map if_index */
+LIBBPF_API __u32 bpf_map__ifindex(const struct bpf_map *map);
+LIBBPF_API int bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
typedef void (*bpf_map_clear_priv_t)(struct bpf_map *, void *);
LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
@@ -430,11 +459,8 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv,
LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);
LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,
const void *data, size_t size);
-LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd);
-LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);
LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);
LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);
-LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex);
LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path);
LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index f732c77b7ed0..6544d2cd1ed6 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -270,3 +270,22 @@ LIBBPF_0.0.9 {
ring_buffer__new;
ring_buffer__poll;
} LIBBPF_0.0.8;
+
+LIBBPF_0.1.0 {
+ global:
+ bpf_map__ifindex;
+ bpf_map__key_size;
+ bpf_map__map_flags;
+ bpf_map__max_entries;
+ bpf_map__numa_node;
+ bpf_map__set_key_size;
+ bpf_map__set_map_flags;
+ bpf_map__set_max_entries;
+ bpf_map__set_numa_node;
+ bpf_map__set_type;
+ bpf_map__set_value_size;
+ bpf_map__type;
+ bpf_map__value_size;
+ bpf_program__autoload;
+ bpf_program__set_autoload;
+} LIBBPF_0.0.9;
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 22aaec74ea0a..1f9c696b3edf 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -134,12 +134,12 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
$(call msg,CC,,$@)
$(CC) -c $(CFLAGS) -o $@ $<
-VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
$(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
../../../../vmlinux \
/sys/kernel/btf/vmlinux \
/boot/vmlinux-$(shell uname -r)
-VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
$(OUTPUT)/runqslower: $(BPFOBJ)
$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \
@@ -182,8 +182,13 @@ $(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR):
mkdir -p $@
$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR)
+ifeq ($(VMLINUX_H),)
$(call msg,GEN,,$@)
$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+else
+ $(call msg,CP,,$@)
+ cp "$(VMLINUX_H)" $@
+endif
# Get Clang's default includes on this system, as opposed to those seen by
# '-target bpf'. This fixes "missing" files on some architectures/distros,
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index e36dd1a1780d..acd08715be2e 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -7,8 +7,6 @@
#include <arpa/inet.h>
-#include <sys/epoll.h>
-
#include <linux/err.h>
#include <linux/in.h>
#include <linux/in6.h>
@@ -17,8 +15,13 @@
#include "network_helpers.h"
#define clean_errno() (errno == 0 ? "None" : strerror(errno))
-#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
- __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+#define log_err(MSG, ...) ({ \
+ int __save = errno; \
+ fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+ __FILE__, __LINE__, clean_errno(), \
+ ##__VA_ARGS__); \
+ errno = __save; \
+})
struct ipv4_packet pkt_v4 = {
.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
@@ -37,7 +40,34 @@ struct ipv6_packet pkt_v6 = {
.tcp.doff = 5,
};
-int start_server_with_port(int family, int type, __u16 port)
+static int settimeo(int fd, int timeout_ms)
+{
+ struct timeval timeout = { .tv_sec = 3 };
+
+ if (timeout_ms > 0) {
+ timeout.tv_sec = timeout_ms / 1000;
+ timeout.tv_usec = (timeout_ms % 1000) * 1000;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout,
+ sizeof(timeout))) {
+ log_err("Failed to set SO_RCVTIMEO");
+ return -1;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout,
+ sizeof(timeout))) {
+ log_err("Failed to set SO_SNDTIMEO");
+ return -1;
+ }
+
+ return 0;
+}
+
+#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
+
+int start_server(int family, int type, const char *addr_str, __u16 port,
+ int timeout_ms)
{
struct sockaddr_storage addr = {};
socklen_t len;
@@ -48,120 +78,119 @@ int start_server_with_port(int family, int type, __u16 port)
sin->sin_family = AF_INET;
sin->sin_port = htons(port);
+ if (addr_str &&
+ inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
+ log_err("inet_pton(AF_INET, %s)", addr_str);
+ return -1;
+ }
len = sizeof(*sin);
} else {
struct sockaddr_in6 *sin6 = (void *)&addr;
sin6->sin6_family = AF_INET6;
sin6->sin6_port = htons(port);
+ if (addr_str &&
+ inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
+ log_err("inet_pton(AF_INET6, %s)", addr_str);
+ return -1;
+ }
len = sizeof(*sin6);
}
- fd = socket(family, type | SOCK_NONBLOCK, 0);
+ fd = socket(family, type, 0);
if (fd < 0) {
log_err("Failed to create server socket");
return -1;
}
+ if (settimeo(fd, timeout_ms))
+ goto error_close;
+
if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
log_err("Failed to bind socket");
- close(fd);
- return -1;
+ goto error_close;
}
if (type == SOCK_STREAM) {
if (listen(fd, 1) < 0) {
log_err("Failed to listed on socket");
- close(fd);
- return -1;
+ goto error_close;
}
}
return fd;
-}
-int start_server(int family, int type)
-{
- return start_server_with_port(family, type, 0);
+error_close:
+ save_errno_close(fd);
+ return -1;
}
-static const struct timeval timeo_sec = { .tv_sec = 3 };
-static const size_t timeo_optlen = sizeof(timeo_sec);
-
-int connect_to_fd(int family, int type, int server_fd)
+static int connect_fd_to_addr(int fd,
+ const struct sockaddr_storage *addr,
+ socklen_t addrlen)
{
- int fd, save_errno;
-
- fd = socket(family, type, 0);
- if (fd < 0) {
- log_err("Failed to create client socket");
+ if (connect(fd, (const struct sockaddr *)addr, addrlen)) {
+ log_err("Failed to connect to server");
return -1;
}
- if (connect_fd_to_fd(fd, server_fd) < 0 && errno != EINPROGRESS) {
- save_errno = errno;
- close(fd);
- errno = save_errno;
- return -1;
- }
-
- return fd;
+ return 0;
}
-int connect_fd_to_fd(int client_fd, int server_fd)
+int connect_to_fd(int server_fd, int timeout_ms)
{
struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int save_errno;
+ struct sockaddr_in *addr_in;
+ socklen_t addrlen, optlen;
+ int fd, type;
- if (setsockopt(client_fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
- timeo_optlen)) {
- log_err("Failed to set SO_RCVTIMEO");
+ optlen = sizeof(type);
+ if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
+ log_err("getsockopt(SOL_TYPE)");
return -1;
}
- if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+ addrlen = sizeof(addr);
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
log_err("Failed to get server addr");
return -1;
}
- if (connect(client_fd, (const struct sockaddr *)&addr, len) < 0) {
- if (errno != EINPROGRESS) {
- save_errno = errno;
- log_err("Failed to connect to server");
- errno = save_errno;
- }
+ addr_in = (struct sockaddr_in *)&addr;
+ fd = socket(addr_in->sin_family, type, 0);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
return -1;
}
- return 0;
+ if (settimeo(fd, timeout_ms))
+ goto error_close;
+
+ if (connect_fd_to_addr(fd, &addr, addrlen))
+ goto error_close;
+
+ return fd;
+
+error_close:
+ save_errno_close(fd);
+ return -1;
}
-int connect_wait(int fd)
+int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
{
- struct epoll_event ev = {}, events[2];
- int timeout_ms = 1000;
- int efd, nfd;
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
- efd = epoll_create1(EPOLL_CLOEXEC);
- if (efd < 0) {
- log_err("Failed to open epoll fd");
+ if (settimeo(client_fd, timeout_ms))
return -1;
- }
-
- ev.events = EPOLLRDHUP | EPOLLOUT;
- ev.data.fd = fd;
- if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) < 0) {
- log_err("Failed to register fd=%d on epoll fd=%d", fd, efd);
- close(efd);
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get server addr");
return -1;
}
- nfd = epoll_wait(efd, events, ARRAY_SIZE(events), timeout_ms);
- if (nfd < 0)
- log_err("Failed to wait for I/O event on epoll fd=%d", efd);
+ if (connect_fd_to_addr(client_fd, &addr, len))
+ return -1;
- close(efd);
- return nfd;
+ return 0;
}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 6a8009605670..f580e82fda58 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -33,10 +33,9 @@ struct ipv6_packet {
} __packed;
extern struct ipv6_packet pkt_v6;
-int start_server(int family, int type);
-int start_server_with_port(int family, int type, __u16 port);
-int connect_to_fd(int family, int type, int server_fd);
-int connect_fd_to_fd(int client_fd, int server_fd);
-int connect_wait(int client_fd);
+int start_server(int family, int type, const char *addr, __u16 port,
+ int timeout_ms);
+int connect_to_fd(int server_fd, int timeout_ms);
+int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/autoload.c b/tools/testing/selftests/bpf/prog_tests/autoload.c
new file mode 100644
index 000000000000..3693f7d133eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/autoload.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_autoload.skel.h"
+
+void test_autoload(void)
+{
+ int duration = 0, err;
+ struct test_autoload* skel;
+
+ skel = test_autoload__open_and_load();
+ /* prog3 should be broken */
+ if (CHECK(skel, "skel_open_and_load", "unexpected success\n"))
+ goto cleanup;
+
+ skel = test_autoload__open();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ goto cleanup;
+
+ /* don't load prog3 */
+ bpf_program__set_autoload(skel->progs.prog3, false);
+
+ err = test_autoload__load(skel);
+ if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+ goto cleanup;
+
+ err = test_autoload__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ usleep(1);
+
+ CHECK(!skel->bss->prog1_called, "prog1", "not called\n");
+ CHECK(!skel->bss->prog2_called, "prog2", "not called\n");
+ CHECK(skel->bss->prog3_called, "prog3", "called?!\n");
+
+cleanup:
+ test_autoload__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 87c29dde1cf9..fed42755416d 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -5,7 +5,12 @@
#include "bpf_iter_netlink.skel.h"
#include "bpf_iter_bpf_map.skel.h"
#include "bpf_iter_task.skel.h"
+#include "bpf_iter_task_stack.skel.h"
#include "bpf_iter_task_file.skel.h"
+#include "bpf_iter_tcp4.skel.h"
+#include "bpf_iter_tcp6.skel.h"
+#include "bpf_iter_udp4.skel.h"
+#include "bpf_iter_udp6.skel.h"
#include "bpf_iter_test_kern1.skel.h"
#include "bpf_iter_test_kern2.skel.h"
#include "bpf_iter_test_kern3.skel.h"
@@ -106,6 +111,20 @@ static void test_task(void)
bpf_iter_task__destroy(skel);
}
+static void test_task_stack(void)
+{
+ struct bpf_iter_task_stack *skel;
+
+ skel = bpf_iter_task_stack__open_and_load();
+ if (CHECK(!skel, "bpf_iter_task_stack__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_task_stack);
+
+ bpf_iter_task_stack__destroy(skel);
+}
+
static void test_task_file(void)
{
struct bpf_iter_task_file *skel;
@@ -120,6 +139,62 @@ static void test_task_file(void)
bpf_iter_task_file__destroy(skel);
}
+static void test_tcp4(void)
+{
+ struct bpf_iter_tcp4 *skel;
+
+ skel = bpf_iter_tcp4__open_and_load();
+ if (CHECK(!skel, "bpf_iter_tcp4__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_tcp4);
+
+ bpf_iter_tcp4__destroy(skel);
+}
+
+static void test_tcp6(void)
+{
+ struct bpf_iter_tcp6 *skel;
+
+ skel = bpf_iter_tcp6__open_and_load();
+ if (CHECK(!skel, "bpf_iter_tcp6__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_tcp6);
+
+ bpf_iter_tcp6__destroy(skel);
+}
+
+static void test_udp4(void)
+{
+ struct bpf_iter_udp4 *skel;
+
+ skel = bpf_iter_udp4__open_and_load();
+ if (CHECK(!skel, "bpf_iter_udp4__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_udp4);
+
+ bpf_iter_udp4__destroy(skel);
+}
+
+static void test_udp6(void)
+{
+ struct bpf_iter_udp6 *skel;
+
+ skel = bpf_iter_udp6__open_and_load();
+ if (CHECK(!skel, "bpf_iter_udp6__open_and_load",
+ "skeleton open_and_load failed\n"))
+ return;
+
+ do_dummy_read(skel->progs.dump_udp6);
+
+ bpf_iter_udp6__destroy(skel);
+}
+
/* The expected string is less than 16 bytes */
static int do_read_with_fd(int iter_fd, const char *expected,
bool read_one_char)
@@ -392,8 +467,18 @@ void test_bpf_iter(void)
test_bpf_map();
if (test__start_subtest("task"))
test_task();
+ if (test__start_subtest("task_stack"))
+ test_task_stack();
if (test__start_subtest("task_file"))
test_task_file();
+ if (test__start_subtest("tcp4"))
+ test_tcp4();
+ if (test__start_subtest("tcp6"))
+ test_tcp6();
+ if (test__start_subtest("udp4"))
+ test_udp4();
+ if (test__start_subtest("udp6"))
+ test_udp6();
if (test__start_subtest("anon"))
test_anon_iter(false);
if (test__start_subtest("anon-read-one-char"))
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
index 059047af7df3..464edc1c1708 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
@@ -13,7 +13,7 @@ static void run_lookup_test(__u16 *g_serv_port, int out_sk)
socklen_t addr_len = sizeof(addr);
__u32 duration = 0;
- serv_sk = start_server(AF_INET6, SOCK_STREAM);
+ serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
return;
@@ -24,17 +24,13 @@ static void run_lookup_test(__u16 *g_serv_port, int out_sk)
*g_serv_port = addr.sin6_port;
/* Client outside of test cgroup should fail to connect by timeout. */
- err = connect_fd_to_fd(out_sk, serv_sk);
+ err = connect_fd_to_fd(out_sk, serv_sk, 1000);
if (CHECK(!err || errno != EINPROGRESS, "connect_fd_to_fd",
"unexpected result err %d errno %d\n", err, errno))
goto cleanup;
- err = connect_wait(out_sk);
- if (CHECK(err, "connect_wait", "unexpected result %d\n", err))
- goto cleanup;
-
/* Client inside test cgroup should connect just fine. */
- in_sk = connect_to_fd(AF_INET6, SOCK_STREAM, serv_sk);
+ in_sk = connect_to_fd(serv_sk, 0);
if (CHECK(in_sk < 0, "connect_to_fd", "errno %d\n", errno))
goto cleanup;
@@ -85,7 +81,7 @@ void test_cgroup_skb_sk_lookup(void)
* differs from that of testing cgroup. Moving selftests process to
* testing cgroup won't change cgroup id of an already created socket.
*/
- out_sk = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
+ out_sk = socket(AF_INET6, SOCK_STREAM, 0);
if (CHECK_FAIL(out_sk < 0))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
index 17bbf76812ca..9229db2f5ca5 100644
--- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
+++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
@@ -114,7 +114,7 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
goto close_bpf_object;
}
- fd = connect_to_fd(family, type, server_fd);
+ fd = connect_to_fd(server_fd, 0);
if (fd < 0) {
err = -1;
goto close_bpf_object;
@@ -137,25 +137,25 @@ void test_connect_force_port(void)
if (CHECK_FAIL(cgroup_fd < 0))
return;
- server_fd = start_server_with_port(AF_INET, SOCK_STREAM, 60123);
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 60123, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM));
close(server_fd);
- server_fd = start_server_with_port(AF_INET6, SOCK_STREAM, 60124);
+ server_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 60124, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM));
close(server_fd);
- server_fd = start_server_with_port(AF_INET, SOCK_DGRAM, 60123);
+ server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 60123, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM));
close(server_fd);
- server_fd = start_server_with_port(AF_INET6, SOCK_DGRAM, 60124);
+ server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 60124, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM));
diff --git a/tools/testing/selftests/bpf/prog_tests/endian.c b/tools/testing/selftests/bpf/prog_tests/endian.c
new file mode 100644
index 000000000000..1a11612ace6c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/endian.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include "test_endian.skel.h"
+
+static int duration;
+
+#define IN16 0x1234
+#define IN32 0x12345678U
+#define IN64 0x123456789abcdef0ULL
+
+#define OUT16 0x3412
+#define OUT32 0x78563412U
+#define OUT64 0xf0debc9a78563412ULL
+
+void test_endian(void)
+{
+ struct test_endian* skel;
+ struct test_endian__bss *bss;
+ int err;
+
+ skel = test_endian__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+ bss = skel->bss;
+
+ bss->in16 = IN16;
+ bss->in32 = IN32;
+ bss->in64 = IN64;
+
+ err = test_endian__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ usleep(1);
+
+ CHECK(bss->out16 != OUT16, "out16", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->out16, (__u64)OUT16);
+ CHECK(bss->out32 != OUT32, "out32", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->out32, (__u64)OUT32);
+ CHECK(bss->out64 != OUT64, "out16", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->out64, (__u64)OUT64);
+
+ CHECK(bss->const16 != OUT16, "const16", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->const16, (__u64)OUT16);
+ CHECK(bss->const32 != OUT32, "const32", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->const32, (__u64)OUT32);
+ CHECK(bss->const64 != OUT64, "const64", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->const64, (__u64)OUT64);
+cleanup:
+ test_endian__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c
new file mode 100644
index 000000000000..e3d6777226a8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <test_progs.h>
+#include "test_ksyms.skel.h"
+#include <sys/stat.h>
+
+static int duration;
+
+static __u64 kallsyms_find(const char *sym)
+{
+ char type, name[500];
+ __u64 addr, res = 0;
+ FILE *f;
+
+ f = fopen("/proc/kallsyms", "r");
+ if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno))
+ return 0;
+
+ while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) {
+ if (strcmp(name, sym) == 0) {
+ res = addr;
+ goto out;
+ }
+ }
+
+ CHECK(false, "not_found", "symbol %s not found\n", sym);
+out:
+ fclose(f);
+ return res;
+}
+
+void test_ksyms(void)
+{
+ __u64 link_fops_addr = kallsyms_find("bpf_link_fops");
+ const char *btf_path = "/sys/kernel/btf/vmlinux";
+ struct test_ksyms *skel;
+ struct test_ksyms__data *data;
+ struct stat st;
+ __u64 btf_size;
+ int err;
+
+ if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno))
+ return;
+ btf_size = st.st_size;
+
+ skel = test_ksyms__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
+ return;
+
+ err = test_ksyms__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ data = skel->data;
+ CHECK(data->out__bpf_link_fops != link_fops_addr, "bpf_link_fops",
+ "got 0x%llx, exp 0x%llx\n",
+ data->out__bpf_link_fops, link_fops_addr);
+ CHECK(data->out__bpf_link_fops1 != 0, "bpf_link_fops1",
+ "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0);
+ CHECK(data->out__btf_size != btf_size, "btf_size",
+ "got %llu, exp %llu\n", data->out__btf_size, btf_size);
+ CHECK(data->out__per_cpu_start != 0, "__per_cpu_start",
+ "got %llu, exp %llu\n", data->out__per_cpu_start, (__u64)0);
+
+cleanup:
+ test_ksyms__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
index c1168e4a9036..5a2a689dbb68 100644
--- a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
+++ b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
@@ -23,7 +23,7 @@ void test_load_bytes_relative(void)
if (CHECK_FAIL(cgroup_fd < 0))
return;
- server_fd = start_server(AF_INET, SOCK_STREAM);
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
@@ -49,7 +49,7 @@ void test_load_bytes_relative(void)
if (CHECK_FAIL(err))
goto close_bpf_object;
- client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd);
+ client_fd = connect_to_fd(server_fd, 0);
if (CHECK_FAIL(client_fd < 0))
goto close_bpf_object;
close(client_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
new file mode 100644
index 000000000000..c230a573c373
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "map_ptr_kern.skel.h"
+
+void test_map_ptr(void)
+{
+ struct map_ptr_kern *skel;
+ __u32 duration = 0, retval;
+ char buf[128];
+ int err;
+
+ skel = map_ptr_kern__open_and_load();
+ if (CHECK(!skel, "skel_open_load", "open_load failed\n"))
+ return;
+
+ err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4,
+ sizeof(pkt_v4), buf, NULL, &retval, NULL);
+
+ if (CHECK(err, "test_run", "err=%d errno=%d\n", err, errno))
+ goto cleanup;
+
+ if (CHECK(!retval, "retval", "retval=%d map_type=%u line=%u\n", retval,
+ skel->bss->g_map_type, skel->bss->g_line))
+ goto cleanup;
+
+cleanup:
+ map_ptr_kern__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index 9013a0c01eed..d207e968e6b1 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -118,7 +118,7 @@ static int run_test(int cgroup_fd, int server_fd)
goto close_bpf_object;
}
- client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd);
+ client_fd = connect_to_fd(server_fd, 0);
if (client_fd < 0) {
err = -1;
goto close_bpf_object;
@@ -161,7 +161,7 @@ void test_tcp_rtt(void)
if (CHECK_FAIL(cgroup_fd < 0))
return;
- server_fd = start_server(AF_INET, SOCK_STREAM);
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c
new file mode 100644
index 000000000000..c75525eab02c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/varlen.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_varlen.skel.h"
+
+#define CHECK_VAL(got, exp) \
+ CHECK((got) != (exp), "check", "got %ld != exp %ld\n", \
+ (long)(got), (long)(exp))
+
+void test_varlen(void)
+{
+ int duration = 0, err;
+ struct test_varlen* skel;
+ struct test_varlen__bss *bss;
+ struct test_varlen__data *data;
+ const char str1[] = "Hello, ";
+ const char str2[] = "World!";
+ const char exp_str[] = "Hello, \0World!\0";
+ const int size1 = sizeof(str1);
+ const int size2 = sizeof(str2);
+
+ skel = test_varlen__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+ bss = skel->bss;
+ data = skel->data;
+
+ err = test_varlen__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ bss->test_pid = getpid();
+
+ /* trigger everything */
+ memcpy(bss->buf_in1, str1, size1);
+ memcpy(bss->buf_in2, str2, size2);
+ bss->capture = true;
+ usleep(1);
+ bss->capture = false;
+
+ CHECK_VAL(bss->payload1_len1, size1);
+ CHECK_VAL(bss->payload1_len2, size2);
+ CHECK_VAL(bss->total1, size1 + size2);
+ CHECK(memcmp(bss->payload1, exp_str, size1 + size2), "content_check",
+ "doesn't match!");
+
+ CHECK_VAL(data->payload2_len1, size1);
+ CHECK_VAL(data->payload2_len2, size2);
+ CHECK_VAL(data->total2, size1 + size2);
+ CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check",
+ "doesn't match!");
+
+ CHECK_VAL(data->payload3_len1, size1);
+ CHECK_VAL(data->payload3_len2, size2);
+ CHECK_VAL(data->total3, size1 + size2);
+ CHECK(memcmp(data->payload3, exp_str, size1 + size2), "content_check",
+ "doesn't match!");
+
+ CHECK_VAL(data->payload4_len1, size1);
+ CHECK_VAL(data->payload4_len2, size2);
+ CHECK_VAL(data->total4, size1 + size2);
+ CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check",
+ "doesn't match!");
+cleanup:
+ test_varlen__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
new file mode 100644
index 000000000000..17db3bac518b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
+#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
+#define bpf_iter__netlink bpf_iter__netlink___not_used
+#define bpf_iter__task bpf_iter__task___not_used
+#define bpf_iter__task_file bpf_iter__task_file___not_used
+#define bpf_iter__tcp bpf_iter__tcp___not_used
+#define tcp6_sock tcp6_sock___not_used
+#define bpf_iter__udp bpf_iter__udp___not_used
+#define udp6_sock udp6_sock___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__bpf_map
+#undef bpf_iter__ipv6_route
+#undef bpf_iter__netlink
+#undef bpf_iter__task
+#undef bpf_iter__task_file
+#undef bpf_iter__tcp
+#undef tcp6_sock
+#undef bpf_iter__udp
+#undef udp6_sock
+
+struct bpf_iter_meta {
+ struct seq_file *seq;
+ __u64 session_id;
+ __u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__ipv6_route {
+ struct bpf_iter_meta *meta;
+ struct fib6_info *rt;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__netlink {
+ struct bpf_iter_meta *meta;
+ struct netlink_sock *sk;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+ struct bpf_iter_meta *meta;
+ struct task_struct *task;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task_file {
+ struct bpf_iter_meta *meta;
+ struct task_struct *task;
+ __u32 fd;
+ struct file *file;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__tcp {
+ struct bpf_iter_meta *meta;
+ struct sock_common *sk_common;
+ uid_t uid;
+} __attribute__((preserve_access_index));
+
+struct tcp6_sock {
+ struct tcp_sock tcp;
+ struct ipv6_pinfo inet6;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__udp {
+ struct bpf_iter_meta *meta;
+ struct udp_sock *udp_sk;
+ uid_t uid __attribute__((aligned(8)));
+ int bucket __attribute__((aligned(8)));
+} __attribute__((preserve_access_index));
+
+struct udp6_sock {
+ struct udp_sock udp;
+ struct ipv6_pinfo inet6;
+} __attribute__((preserve_access_index));
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
index b57bd6fef208..08651b23edba 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -1,27 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__bpf_map
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__bpf_map {
- struct bpf_iter_meta *meta;
- struct bpf_map *map;
-} __attribute__((preserve_access_index));
-
SEC("iter/bpf_map")
int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
index c8e9ca74c87b..d58d9f1642b5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -1,35 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__ipv6_route
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__ipv6_route {
- struct bpf_iter_meta *meta;
- struct fib6_info *rt;
-} __attribute__((preserve_access_index));
-
char _license[] SEC("license") = "GPL";
extern bool CONFIG_IPV6_SUBTREES __kconfig __weak;
-#define RTF_GATEWAY 0x0002
-#define IFNAMSIZ 16
-#define fib_nh_gw_family nh_common.nhc_gw_family
-#define fib_nh_gw6 nh_common.nhc_gw.ipv6
-#define fib_nh_dev nh_common.nhc_dev
-
SEC("iter/ipv6_route")
int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index 75ecf956a2df..b9c2756c5c97 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -1,11 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__netlink bpf_iter__netlink___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__netlink
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index ee754021f98e..4983087852a0 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -1,27 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
- struct bpf_iter_meta *meta;
- struct task_struct *task;
-} __attribute__((preserve_access_index));
-
SEC("iter/task")
int dump_task(struct bpf_iter__task *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index 0f0ec3db20ba..8b787baa2654 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -1,29 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task_file bpf_iter__task_file___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task_file
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task_file {
- struct bpf_iter_meta *meta;
- struct task_struct *task;
- __u32 fd;
- struct file *file;
-} __attribute__((preserve_access_index));
-
SEC("iter/task_file")
int dump_task_file(struct bpf_iter__task_file *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
new file mode 100644
index 000000000000..50e59a2e142e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define MAX_STACK_TRACE_DEPTH 64
+unsigned long entries[MAX_STACK_TRACE_DEPTH] = {};
+#define SIZE_OF_ULONG (sizeof(unsigned long))
+
+SEC("iter/task")
+int dump_task_stack(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ long i, retlen;
+
+ if (task == (void *)0)
+ return 0;
+
+ retlen = bpf_get_task_stack(task, entries,
+ MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, 0);
+ if (retlen < 0)
+ return 0;
+
+ BPF_SEQ_PRINTF(seq, "pid: %8u num_entries: %8u\n", task->pid,
+ retlen / SIZE_OF_ULONG);
+ for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) {
+ if (retlen > i * SIZE_OF_ULONG)
+ BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]);
+ }
+ BPF_SEQ_PRINTF(seq, "\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
new file mode 100644
index 000000000000..30fd587cb325
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int hlist_unhashed_lockless(const struct hlist_node *h)
+{
+ return !(h->pprev);
+}
+
+static int timer_pending(const struct timer_list * timer)
+{
+ return !hlist_unhashed_lockless(&timer->entry);
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+#define USER_HZ 100
+#define NSEC_PER_SEC 1000000000ULL
+static clock_t jiffies_to_clock_t(unsigned long x)
+{
+ /* The implementation here tailored to a particular
+ * setting of USER_HZ.
+ */
+ u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
+ u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
+
+ if ((tick_nsec % user_hz_nsec) == 0) {
+ if (CONFIG_HZ < USER_HZ)
+ return x * (USER_HZ / CONFIG_HZ);
+ else
+ return x / (CONFIG_HZ / USER_HZ);
+ }
+ return x * tick_nsec/user_hz_nsec;
+}
+
+static clock_t jiffies_delta_to_clock_t(long delta)
+{
+ if (delta <= 0)
+ return 0;
+
+ return jiffies_to_clock_t(delta);
+}
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+static bool
+inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
+{
+ return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+}
+
+static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
+{
+ return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
+}
+
+static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
+ uid_t uid, __u32 seq_num)
+{
+ const struct inet_connection_sock *icsk;
+ const struct fastopen_queue *fastopenq;
+ const struct inet_sock *inet;
+ unsigned long timer_expires;
+ const struct sock *sp;
+ __u16 destp, srcp;
+ __be32 dest, src;
+ int timer_active;
+ int rx_queue;
+ int state;
+
+ icsk = &tp->inet_conn;
+ inet = &icsk->icsk_inet;
+ sp = &inet->sk;
+ fastopenq = &icsk->icsk_accept_queue.fastopenq;
+
+ dest = inet->inet_daddr;
+ src = inet->inet_rcv_saddr;
+ destp = bpf_ntohs(inet->inet_dport);
+ srcp = bpf_ntohs(inet->inet_sport);
+
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ timer_active = 1;
+ timer_expires = icsk->icsk_timeout;
+ } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ timer_active = 4;
+ timer_expires = icsk->icsk_timeout;
+ } else if (timer_pending(&sp->sk_timer)) {
+ timer_active = 2;
+ timer_expires = sp->sk_timer.expires;
+ } else {
+ timer_active = 0;
+ timer_expires = bpf_jiffies64();
+ }
+
+ state = sp->sk_state;
+ if (state == TCP_LISTEN) {
+ rx_queue = sp->sk_ack_backlog;
+ } else {
+ rx_queue = tp->rcv_nxt - tp->copied_seq;
+ if (rx_queue < 0)
+ rx_queue = 0;
+ }
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+ seq_num, src, srcp, destp, destp);
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
+ state,
+ tp->write_seq - tp->snd_una, rx_queue,
+ timer_active,
+ jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
+ icsk->icsk_retransmits, uid,
+ icsk->icsk_probes_out,
+ sock_i_ino(sp),
+ sp->sk_refcnt.refs.counter);
+ BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
+ tp,
+ jiffies_to_clock_t(icsk->icsk_rto),
+ jiffies_to_clock_t(icsk->icsk_ack.ato),
+ (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
+ tp->snd_cwnd,
+ state == TCP_LISTEN ? fastopenq->max_qlen
+ : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
+ );
+
+ return 0;
+}
+
+static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
+ uid_t uid, __u32 seq_num)
+{
+ struct inet_timewait_sock *tw = &ttw->tw_sk;
+ __u16 destp, srcp;
+ __be32 dest, src;
+ long delta;
+
+ delta = tw->tw_timer.expires - bpf_jiffies64();
+ dest = tw->tw_daddr;
+ src = tw->tw_rcv_saddr;
+ destp = bpf_ntohs(tw->tw_dport);
+ srcp = bpf_ntohs(tw->tw_sport);
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+ seq_num, src, srcp, dest, destp);
+
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ tw->tw_substate, 0, 0,
+ 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
+ tw->tw_refcnt.refs.counter, tw);
+
+ return 0;
+}
+
+static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
+ uid_t uid, __u32 seq_num)
+{
+ struct inet_request_sock *irsk = &treq->req;
+ struct request_sock *req = &irsk->req;
+ long ttd;
+
+ ttd = req->rsk_timer.expires - bpf_jiffies64();
+
+ if (ttd < 0)
+ ttd = 0;
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+ seq_num, irsk->ir_loc_addr,
+ irsk->ir_num, irsk->ir_rmt_addr,
+ bpf_ntohs(irsk->ir_rmt_port));
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
+ req->num_timeout, uid, 0, 0, 0, req);
+
+ return 0;
+}
+
+SEC("iter/tcp")
+int dump_tcp4(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = ctx->sk_common;
+ struct seq_file *seq = ctx->meta->seq;
+ struct tcp_timewait_sock *tw;
+ struct tcp_request_sock *req;
+ struct tcp_sock *tp;
+ uid_t uid = ctx->uid;
+ __u32 seq_num;
+
+ if (sk_common == (void *)0)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " sl "
+ "local_address "
+ "rem_address "
+ "st tx_queue rx_queue tr tm->when retrnsmt"
+ " uid timeout inode\n");
+
+ if (sk_common->skc_family != AF_INET)
+ return 0;
+
+ tp = bpf_skc_to_tcp_sock(sk_common);
+ if (tp)
+ return dump_tcp_sock(seq, tp, uid, seq_num);
+
+ tw = bpf_skc_to_tcp_timewait_sock(sk_common);
+ if (tw)
+ return dump_tw_sock(seq, tw, uid, seq_num);
+
+ req = bpf_skc_to_tcp_request_sock(sk_common);
+ if (req)
+ return dump_req_sock(seq, req, uid, seq_num);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
new file mode 100644
index 000000000000..10dec4392031
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -0,0 +1,250 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int hlist_unhashed_lockless(const struct hlist_node *h)
+{
+ return !(h->pprev);
+}
+
+static int timer_pending(const struct timer_list * timer)
+{
+ return !hlist_unhashed_lockless(&timer->entry);
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+#define USER_HZ 100
+#define NSEC_PER_SEC 1000000000ULL
+static clock_t jiffies_to_clock_t(unsigned long x)
+{
+ /* The implementation here tailored to a particular
+ * setting of USER_HZ.
+ */
+ u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
+ u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
+
+ if ((tick_nsec % user_hz_nsec) == 0) {
+ if (CONFIG_HZ < USER_HZ)
+ return x * (USER_HZ / CONFIG_HZ);
+ else
+ return x / (CONFIG_HZ / USER_HZ);
+ }
+ return x * tick_nsec/user_hz_nsec;
+}
+
+static clock_t jiffies_delta_to_clock_t(long delta)
+{
+ if (delta <= 0)
+ return 0;
+
+ return jiffies_to_clock_t(delta);
+}
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+static bool
+inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
+{
+ return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+}
+
+static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
+{
+ return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
+}
+
+static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
+ uid_t uid, __u32 seq_num)
+{
+ const struct inet_connection_sock *icsk;
+ const struct fastopen_queue *fastopenq;
+ const struct in6_addr *dest, *src;
+ const struct inet_sock *inet;
+ unsigned long timer_expires;
+ const struct sock *sp;
+ __u16 destp, srcp;
+ int timer_active;
+ int rx_queue;
+ int state;
+
+ icsk = &tp->tcp.inet_conn;
+ inet = &icsk->icsk_inet;
+ sp = &inet->sk;
+ fastopenq = &icsk->icsk_accept_queue.fastopenq;
+
+ dest = &sp->sk_v6_daddr;
+ src = &sp->sk_v6_rcv_saddr;
+ destp = bpf_ntohs(inet->inet_dport);
+ srcp = bpf_ntohs(inet->inet_sport);
+
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ timer_active = 1;
+ timer_expires = icsk->icsk_timeout;
+ } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ timer_active = 4;
+ timer_expires = icsk->icsk_timeout;
+ } else if (timer_pending(&sp->sk_timer)) {
+ timer_active = 2;
+ timer_expires = sp->sk_timer.expires;
+ } else {
+ timer_active = 0;
+ timer_expires = bpf_jiffies64();
+ }
+
+ state = sp->sk_state;
+ if (state == TCP_LISTEN) {
+ rx_queue = sp->sk_ack_backlog;
+ } else {
+ rx_queue = tp->tcp.rcv_nxt - tp->tcp.copied_seq;
+ if (rx_queue < 0)
+ rx_queue = 0;
+ }
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+ seq_num,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp);
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
+ state,
+ tp->tcp.write_seq - tp->tcp.snd_una, rx_queue,
+ timer_active,
+ jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
+ icsk->icsk_retransmits, uid,
+ icsk->icsk_probes_out,
+ sock_i_ino(sp),
+ sp->sk_refcnt.refs.counter);
+ BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
+ tp,
+ jiffies_to_clock_t(icsk->icsk_rto),
+ jiffies_to_clock_t(icsk->icsk_ack.ato),
+ (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
+ tp->tcp.snd_cwnd,
+ state == TCP_LISTEN ? fastopenq->max_qlen
+ : (tcp_in_initial_slowstart(&tp->tcp) ? -1
+ : tp->tcp.snd_ssthresh)
+ );
+
+ return 0;
+}
+
+static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
+ uid_t uid, __u32 seq_num)
+{
+ struct inet_timewait_sock *tw = &ttw->tw_sk;
+ const struct in6_addr *dest, *src;
+ __u16 destp, srcp;
+ long delta;
+
+ delta = tw->tw_timer.expires - bpf_jiffies64();
+ dest = &tw->tw_v6_daddr;
+ src = &tw->tw_v6_rcv_saddr;
+ destp = bpf_ntohs(tw->tw_dport);
+ srcp = bpf_ntohs(tw->tw_sport);
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+ seq_num,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp);
+
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ tw->tw_substate, 0, 0,
+ 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
+ tw->tw_refcnt.refs.counter, tw);
+
+ return 0;
+}
+
+static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
+ uid_t uid, __u32 seq_num)
+{
+ struct inet_request_sock *irsk = &treq->req;
+ struct request_sock *req = &irsk->req;
+ struct in6_addr *src, *dest;
+ long ttd;
+
+ ttd = req->rsk_timer.expires - bpf_jiffies64();
+ src = &irsk->ir_v6_loc_addr;
+ dest = &irsk->ir_v6_rmt_addr;
+
+ if (ttd < 0)
+ ttd = 0;
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+ seq_num,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3],
+ irsk->ir_num,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3],
+ bpf_ntohs(irsk->ir_rmt_port));
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
+ req->num_timeout, uid, 0, 0, 0, req);
+
+ return 0;
+}
+
+SEC("iter/tcp")
+int dump_tcp6(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = ctx->sk_common;
+ struct seq_file *seq = ctx->meta->seq;
+ struct tcp_timewait_sock *tw;
+ struct tcp_request_sock *req;
+ struct tcp6_sock *tp;
+ uid_t uid = ctx->uid;
+ __u32 seq_num;
+
+ if (sk_common == (void *)0)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " sl "
+ "local_address "
+ "remote_address "
+ "st tx_queue rx_queue tr tm->when retrnsmt"
+ " uid timeout inode\n");
+
+ if (sk_common->skc_family != AF_INET6)
+ return 0;
+
+ tp = bpf_skc_to_tcp6_sock(sk_common);
+ if (tp)
+ return dump_tcp6_sock(seq, tp, uid, seq_num);
+
+ tw = bpf_skc_to_tcp_timewait_sock(sk_common);
+ if (tw)
+ return dump_tw_sock(seq, tw, uid, seq_num);
+
+ req = bpf_skc_to_tcp_request_sock(sk_common);
+ if (req)
+ return dump_req_sock(seq, req, uid, seq_num);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
index 13c2c90c835f..2a4647f20c46 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
@@ -1,25 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
- struct bpf_iter_meta *meta;
- struct task_struct *task;
-} __attribute__((preserve_access_index));
-
SEC("iter/task")
int dump_task(struct bpf_iter__task *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
index 0aa71b333cf3..ee49493dc125 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -1,25 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__bpf_map
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__bpf_map {
- struct bpf_iter_meta *meta;
- struct bpf_map *map;
-} __attribute__((preserve_access_index));
-
__u32 map1_id = 0, map2_id = 0;
__u32 map1_accessed = 0, map2_accessed = 0;
__u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
index dee1339e6905..d5e3df66ad9a 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
@@ -1,27 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
int count = 0;
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
- struct bpf_iter_meta *meta;
- struct task_struct *task;
-} __attribute__((preserve_access_index));
-
SEC("iter/task")
int dump_task(struct bpf_iter__task *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
new file mode 100644
index 000000000000..7053784575e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+SEC("iter/udp")
+int dump_udp4(struct bpf_iter__udp *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct udp_sock *udp_sk = ctx->udp_sk;
+ struct inet_sock *inet;
+ __u16 srcp, destp;
+ __be32 dest, src;
+ __u32 seq_num;
+ int rqueue;
+
+ if (udp_sk == (void *)0)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq,
+ " sl local_address rem_address st tx_queue "
+ "rx_queue tr tm->when retrnsmt uid timeout "
+ "inode ref pointer drops\n");
+
+ /* filter out udp6 sockets */
+ inet = &udp_sk->inet;
+ if (inet->sk.sk_family == AF_INET6)
+ return 0;
+
+ inet = &udp_sk->inet;
+ dest = inet->inet_daddr;
+ src = inet->inet_rcv_saddr;
+ srcp = bpf_ntohs(inet->inet_sport);
+ destp = bpf_ntohs(inet->inet_dport);
+ rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit;
+
+ BPF_SEQ_PRINTF(seq, "%5d: %08X:%04X %08X:%04X ",
+ ctx->bucket, src, srcp, dest, destp);
+
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
+ inet->sk.sk_state,
+ inet->sk.sk_wmem_alloc.refs.counter - 1,
+ rqueue,
+ 0, 0L, 0, ctx->uid, 0,
+ sock_i_ino(&inet->sk),
+ inet->sk.sk_refcnt.refs.counter, udp_sk,
+ inet->sk.sk_drops.counter);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
new file mode 100644
index 000000000000..c1175a6ecf43
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define IPV6_SEQ_DGRAM_HEADER \
+ " sl " \
+ "local_address " \
+ "remote_address " \
+ "st tx_queue rx_queue tr tm->when retrnsmt" \
+ " uid timeout inode ref pointer drops\n"
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+SEC("iter/udp")
+int dump_udp6(struct bpf_iter__udp *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct udp_sock *udp_sk = ctx->udp_sk;
+ const struct in6_addr *dest, *src;
+ struct udp6_sock *udp6_sk;
+ struct inet_sock *inet;
+ __u16 srcp, destp;
+ __u32 seq_num;
+ int rqueue;
+
+ if (udp_sk == (void *)0)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, IPV6_SEQ_DGRAM_HEADER);
+
+ udp6_sk = bpf_skc_to_udp6_sock(udp_sk);
+ if (udp6_sk == (void *)0)
+ return 0;
+
+ inet = &udp_sk->inet;
+ srcp = bpf_ntohs(inet->inet_sport);
+ destp = bpf_ntohs(inet->inet_dport);
+ rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit;
+ dest = &inet->sk.sk_v6_daddr;
+ src = &inet->sk.sk_v6_rcv_saddr;
+
+ BPF_SEQ_PRINTF(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+ ctx->bucket,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp);
+
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
+ inet->sk.sk_state,
+ inet->sk.sk_wmem_alloc.refs.counter - 1,
+ rqueue,
+ 0, 0L, 0, ctx->uid, 0,
+ sock_i_ino(&inet->sk),
+ inet->sk.sk_refcnt.refs.counter, udp_sk,
+ inet->sk.sk_drops.counter);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
new file mode 100644
index 000000000000..01378911252b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_TRACING_NET_H__
+#define __BPF_TRACING_NET_H__
+
+#define AF_INET 2
+#define AF_INET6 10
+
+#define ICSK_TIME_RETRANS 1
+#define ICSK_TIME_PROBE0 3
+#define ICSK_TIME_LOSS_PROBE 5
+#define ICSK_TIME_REO_TIMEOUT 6
+
+#define IFNAMSIZ 16
+
+#define RTF_GATEWAY 0x0002
+
+#define TCP_INFINITE_SSTHRESH 0x7fffffff
+#define TCP_PINGPONG_THRESH 3
+
+#define fib_nh_dev nh_common.nhc_dev
+#define fib_nh_gw_family nh_common.nhc_gw_family
+#define fib_nh_gw6 nh_common.nhc_gw.ipv6
+
+#define inet_daddr sk.__sk_common.skc_daddr
+#define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr
+#define inet_dport sk.__sk_common.skc_dport
+
+#define ir_loc_addr req.__req_common.skc_rcv_saddr
+#define ir_num req.__req_common.skc_num
+#define ir_rmt_addr req.__req_common.skc_daddr
+#define ir_rmt_port req.__req_common.skc_dport
+#define ir_v6_rmt_addr req.__req_common.skc_v6_daddr
+#define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr
+
+#define sk_family __sk_common.skc_family
+#define sk_rmem_alloc sk_backlog.rmem_alloc
+#define sk_refcnt __sk_common.skc_refcnt
+#define sk_state __sk_common.skc_state
+#define sk_v6_daddr __sk_common.skc_v6_daddr
+#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
+
+#define s6_addr32 in6_u.u6_addr32
+
+#define tw_daddr __tw_common.skc_daddr
+#define tw_rcv_saddr __tw_common.skc_rcv_saddr
+#define tw_dport __tw_common.skc_dport
+#define tw_refcnt __tw_common.skc_refcnt
+#define tw_v6_daddr __tw_common.skc_v6_daddr
+#define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index 1ab2c5eba86c..b1b2773c0b9d 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -104,6 +104,30 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx)
return 0;
}
+static __inline int set_keepalive(struct bpf_sock_addr *ctx)
+{
+ int zero = 0, one = 1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)))
+ return 1;
+ if (ctx->type == SOCK_STREAM) {
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one)))
+ return 1;
+ }
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero)))
+ return 1;
+
+ return 0;
+}
+
SEC("cgroup/connect4")
int connect_v4_prog(struct bpf_sock_addr *ctx)
{
@@ -121,6 +145,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
if (bind_to_device(ctx))
return 0;
+ if (set_keepalive(ctx))
+ return 0;
+
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
return 0;
else if (ctx->type == SOCK_STREAM)
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
new file mode 100644
index 000000000000..473665cac67e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -0,0 +1,686 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define LOOP_BOUND 0xf
+#define MAX_ENTRIES 8
+#define HALF_ENTRIES (MAX_ENTRIES >> 1)
+
+_Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND");
+
+enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC;
+__u32 g_line = 0;
+
+#define VERIFY_TYPE(type, func) ({ \
+ g_map_type = type; \
+ if (!func()) \
+ return 0; \
+})
+
+
+#define VERIFY(expr) ({ \
+ g_line = __LINE__; \
+ if (!(expr)) \
+ return 0; \
+})
+
+struct bpf_map_memory {
+ __u32 pages;
+} __attribute__((preserve_access_index));
+
+struct bpf_map {
+ enum bpf_map_type map_type;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 id;
+ struct bpf_map_memory memory;
+} __attribute__((preserve_access_index));
+
+static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size,
+ __u32 value_size, __u32 max_entries)
+{
+ VERIFY(map->map_type == g_map_type);
+ VERIFY(map->key_size == key_size);
+ VERIFY(map->value_size == value_size);
+ VERIFY(map->max_entries == max_entries);
+ VERIFY(map->id > 0);
+ VERIFY(map->memory.pages > 0);
+
+ return 1;
+}
+
+static inline int check_bpf_map_ptr(struct bpf_map *indirect,
+ struct bpf_map *direct)
+{
+ VERIFY(indirect->map_type == direct->map_type);
+ VERIFY(indirect->key_size == direct->key_size);
+ VERIFY(indirect->value_size == direct->value_size);
+ VERIFY(indirect->max_entries == direct->max_entries);
+ VERIFY(indirect->id == direct->id);
+ VERIFY(indirect->memory.pages == direct->memory.pages);
+
+ return 1;
+}
+
+static inline int check(struct bpf_map *indirect, struct bpf_map *direct,
+ __u32 key_size, __u32 value_size, __u32 max_entries)
+{
+ VERIFY(check_bpf_map_ptr(indirect, direct));
+ VERIFY(check_bpf_map_fields(indirect, key_size, value_size,
+ max_entries));
+ return 1;
+}
+
+static inline int check_default(struct bpf_map *indirect,
+ struct bpf_map *direct)
+{
+ VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32),
+ MAX_ENTRIES));
+ return 1;
+}
+
+typedef struct {
+ int counter;
+} atomic_t;
+
+struct bpf_htab {
+ struct bpf_map map;
+ atomic_t count;
+ __u32 n_buckets;
+ __u32 elem_size;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC); /* to test bpf_htab.count */
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_hash SEC(".maps");
+
+static inline int check_hash(void)
+{
+ struct bpf_htab *hash = (struct bpf_htab *)&m_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_hash;
+ int i;
+
+ VERIFY(check_default(&hash->map, map));
+
+ VERIFY(hash->n_buckets == MAX_ENTRIES);
+ VERIFY(hash->elem_size == 64);
+
+ VERIFY(hash->count.counter == 0);
+ for (i = 0; i < HALF_ENTRIES; ++i) {
+ const __u32 key = i;
+ const __u32 val = 1;
+
+ if (bpf_map_update_elem(hash, &key, &val, 0))
+ return 0;
+ }
+ VERIFY(hash->count.counter == HALF_ENTRIES);
+
+ return 1;
+}
+
+struct bpf_array {
+ struct bpf_map map;
+ __u32 elem_size;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_array SEC(".maps");
+
+static inline int check_array(void)
+{
+ struct bpf_array *array = (struct bpf_array *)&m_array;
+ struct bpf_map *map = (struct bpf_map *)&m_array;
+ int i, n_lookups = 0, n_keys = 0;
+
+ VERIFY(check_default(&array->map, map));
+
+ VERIFY(array->elem_size == 8);
+
+ for (i = 0; i < array->map.max_entries && i < LOOP_BOUND; ++i) {
+ const __u32 key = i;
+ __u32 *val = bpf_map_lookup_elem(array, &key);
+
+ ++n_lookups;
+ if (val)
+ ++n_keys;
+ }
+
+ VERIFY(n_lookups == MAX_ENTRIES);
+ VERIFY(n_keys == MAX_ENTRIES);
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_prog_array SEC(".maps");
+
+static inline int check_prog_array(void)
+{
+ struct bpf_array *prog_array = (struct bpf_array *)&m_prog_array;
+ struct bpf_map *map = (struct bpf_map *)&m_prog_array;
+
+ VERIFY(check_default(&prog_array->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_perf_event_array SEC(".maps");
+
+static inline int check_perf_event_array(void)
+{
+ struct bpf_array *perf_event_array = (struct bpf_array *)&m_perf_event_array;
+ struct bpf_map *map = (struct bpf_map *)&m_perf_event_array;
+
+ VERIFY(check_default(&perf_event_array->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_percpu_hash SEC(".maps");
+
+static inline int check_percpu_hash(void)
+{
+ struct bpf_htab *percpu_hash = (struct bpf_htab *)&m_percpu_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_percpu_hash;
+
+ VERIFY(check_default(&percpu_hash->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_percpu_array SEC(".maps");
+
+static inline int check_percpu_array(void)
+{
+ struct bpf_array *percpu_array = (struct bpf_array *)&m_percpu_array;
+ struct bpf_map *map = (struct bpf_map *)&m_percpu_array;
+
+ VERIFY(check_default(&percpu_array->map, map));
+
+ return 1;
+}
+
+struct bpf_stack_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u64);
+} m_stack_trace SEC(".maps");
+
+static inline int check_stack_trace(void)
+{
+ struct bpf_stack_map *stack_trace =
+ (struct bpf_stack_map *)&m_stack_trace;
+ struct bpf_map *map = (struct bpf_map *)&m_stack_trace;
+
+ VERIFY(check(&stack_trace->map, map, sizeof(__u32), sizeof(__u64),
+ MAX_ENTRIES));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_cgroup_array SEC(".maps");
+
+static inline int check_cgroup_array(void)
+{
+ struct bpf_array *cgroup_array = (struct bpf_array *)&m_cgroup_array;
+ struct bpf_map *map = (struct bpf_map *)&m_cgroup_array;
+
+ VERIFY(check_default(&cgroup_array->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_lru_hash SEC(".maps");
+
+static inline int check_lru_hash(void)
+{
+ struct bpf_htab *lru_hash = (struct bpf_htab *)&m_lru_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_lru_hash;
+
+ VERIFY(check_default(&lru_hash->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_lru_percpu_hash SEC(".maps");
+
+static inline int check_lru_percpu_hash(void)
+{
+ struct bpf_htab *lru_percpu_hash = (struct bpf_htab *)&m_lru_percpu_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_lru_percpu_hash;
+
+ VERIFY(check_default(&lru_percpu_hash->map, map));
+
+ return 1;
+}
+
+struct lpm_trie {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct lpm_key {
+ struct bpf_lpm_trie_key trie_key;
+ __u32 data;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, struct lpm_key);
+ __type(value, __u32);
+} m_lpm_trie SEC(".maps");
+
+static inline int check_lpm_trie(void)
+{
+ struct lpm_trie *lpm_trie = (struct lpm_trie *)&m_lpm_trie;
+ struct bpf_map *map = (struct bpf_map *)&m_lpm_trie;
+
+ VERIFY(check(&lpm_trie->map, map, sizeof(struct lpm_key), sizeof(__u32),
+ MAX_ENTRIES));
+
+ return 1;
+}
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+ });
+} m_array_of_maps SEC(".maps") = {
+ .values = { (void *)&inner_map, 0, 0, 0, 0, 0, 0, 0, 0 },
+};
+
+static inline int check_array_of_maps(void)
+{
+ struct bpf_array *array_of_maps = (struct bpf_array *)&m_array_of_maps;
+ struct bpf_map *map = (struct bpf_map *)&m_array_of_maps;
+
+ VERIFY(check_default(&array_of_maps->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+ __array(values, struct inner_map);
+} m_hash_of_maps SEC(".maps") = {
+ .values = {
+ [2] = &inner_map,
+ },
+};
+
+static inline int check_hash_of_maps(void)
+{
+ struct bpf_htab *hash_of_maps = (struct bpf_htab *)&m_hash_of_maps;
+ struct bpf_map *map = (struct bpf_map *)&m_hash_of_maps;
+
+ VERIFY(check_default(&hash_of_maps->map, map));
+
+ return 1;
+}
+
+struct bpf_dtab {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_devmap SEC(".maps");
+
+static inline int check_devmap(void)
+{
+ struct bpf_dtab *devmap = (struct bpf_dtab *)&m_devmap;
+ struct bpf_map *map = (struct bpf_map *)&m_devmap;
+
+ VERIFY(check_default(&devmap->map, map));
+
+ return 1;
+}
+
+struct bpf_stab {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_sockmap SEC(".maps");
+
+static inline int check_sockmap(void)
+{
+ struct bpf_stab *sockmap = (struct bpf_stab *)&m_sockmap;
+ struct bpf_map *map = (struct bpf_map *)&m_sockmap;
+
+ VERIFY(check_default(&sockmap->map, map));
+
+ return 1;
+}
+
+struct bpf_cpu_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_cpumap SEC(".maps");
+
+static inline int check_cpumap(void)
+{
+ struct bpf_cpu_map *cpumap = (struct bpf_cpu_map *)&m_cpumap;
+ struct bpf_map *map = (struct bpf_map *)&m_cpumap;
+
+ VERIFY(check_default(&cpumap->map, map));
+
+ return 1;
+}
+
+struct xsk_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_xskmap SEC(".maps");
+
+static inline int check_xskmap(void)
+{
+ struct xsk_map *xskmap = (struct xsk_map *)&m_xskmap;
+ struct bpf_map *map = (struct bpf_map *)&m_xskmap;
+
+ VERIFY(check_default(&xskmap->map, map));
+
+ return 1;
+}
+
+struct bpf_shtab {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_sockhash SEC(".maps");
+
+static inline int check_sockhash(void)
+{
+ struct bpf_shtab *sockhash = (struct bpf_shtab *)&m_sockhash;
+ struct bpf_map *map = (struct bpf_map *)&m_sockhash;
+
+ VERIFY(check_default(&sockhash->map, map));
+
+ return 1;
+}
+
+struct bpf_cgroup_storage_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, __u32);
+} m_cgroup_storage SEC(".maps");
+
+static inline int check_cgroup_storage(void)
+{
+ struct bpf_cgroup_storage_map *cgroup_storage =
+ (struct bpf_cgroup_storage_map *)&m_cgroup_storage;
+ struct bpf_map *map = (struct bpf_map *)&m_cgroup_storage;
+
+ VERIFY(check(&cgroup_storage->map, map,
+ sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0));
+
+ return 1;
+}
+
+struct reuseport_array {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_reuseport_sockarray SEC(".maps");
+
+static inline int check_reuseport_sockarray(void)
+{
+ struct reuseport_array *reuseport_sockarray =
+ (struct reuseport_array *)&m_reuseport_sockarray;
+ struct bpf_map *map = (struct bpf_map *)&m_reuseport_sockarray;
+
+ VERIFY(check_default(&reuseport_sockarray->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, __u32);
+} m_percpu_cgroup_storage SEC(".maps");
+
+static inline int check_percpu_cgroup_storage(void)
+{
+ struct bpf_cgroup_storage_map *percpu_cgroup_storage =
+ (struct bpf_cgroup_storage_map *)&m_percpu_cgroup_storage;
+ struct bpf_map *map = (struct bpf_map *)&m_percpu_cgroup_storage;
+
+ VERIFY(check(&percpu_cgroup_storage->map, map,
+ sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0));
+
+ return 1;
+}
+
+struct bpf_queue_stack {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_QUEUE);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(value, __u32);
+} m_queue SEC(".maps");
+
+static inline int check_queue(void)
+{
+ struct bpf_queue_stack *queue = (struct bpf_queue_stack *)&m_queue;
+ struct bpf_map *map = (struct bpf_map *)&m_queue;
+
+ VERIFY(check(&queue->map, map, 0, sizeof(__u32), MAX_ENTRIES));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(value, __u32);
+} m_stack SEC(".maps");
+
+static inline int check_stack(void)
+{
+ struct bpf_queue_stack *stack = (struct bpf_queue_stack *)&m_stack;
+ struct bpf_map *map = (struct bpf_map *)&m_stack;
+
+ VERIFY(check(&stack->map, map, 0, sizeof(__u32), MAX_ENTRIES));
+
+ return 1;
+}
+
+struct bpf_sk_storage_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_sk_storage SEC(".maps");
+
+static inline int check_sk_storage(void)
+{
+ struct bpf_sk_storage_map *sk_storage =
+ (struct bpf_sk_storage_map *)&m_sk_storage;
+ struct bpf_map *map = (struct bpf_map *)&m_sk_storage;
+
+ VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_devmap_hash SEC(".maps");
+
+static inline int check_devmap_hash(void)
+{
+ struct bpf_dtab *devmap_hash = (struct bpf_dtab *)&m_devmap_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_devmap_hash;
+
+ VERIFY(check_default(&devmap_hash->map, map));
+
+ return 1;
+}
+
+struct bpf_ringbuf_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} m_ringbuf SEC(".maps");
+
+static inline int check_ringbuf(void)
+{
+ struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf;
+ struct bpf_map *map = (struct bpf_map *)&m_ringbuf;
+
+ VERIFY(check(&ringbuf->map, map, 0, 0, 1 << 12));
+
+ return 1;
+}
+
+SEC("cgroup_skb/egress")
+int cg_skb(void *ctx)
+{
+ VERIFY_TYPE(BPF_MAP_TYPE_HASH, check_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_ARRAY, check_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_PROG_ARRAY, check_prog_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, check_perf_event_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_HASH, check_percpu_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, check_percpu_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_STACK_TRACE, check_stack_trace);
+ VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, check_cgroup_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_LRU_HASH, check_lru_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, check_lru_percpu_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_LPM_TRIE, check_lpm_trie);
+ VERIFY_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, check_array_of_maps);
+ VERIFY_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, check_hash_of_maps);
+ VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP, check_devmap);
+ VERIFY_TYPE(BPF_MAP_TYPE_SOCKMAP, check_sockmap);
+ VERIFY_TYPE(BPF_MAP_TYPE_CPUMAP, check_cpumap);
+ VERIFY_TYPE(BPF_MAP_TYPE_XSKMAP, check_xskmap);
+ VERIFY_TYPE(BPF_MAP_TYPE_SOCKHASH, check_sockhash);
+ VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, check_cgroup_storage);
+ VERIFY_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ check_reuseport_sockarray);
+ VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ check_percpu_cgroup_storage);
+ VERIFY_TYPE(BPF_MAP_TYPE_QUEUE, check_queue);
+ VERIFY_TYPE(BPF_MAP_TYPE_STACK, check_stack);
+ VERIFY_TYPE(BPF_MAP_TYPE_SK_STORAGE, check_sk_storage);
+ VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, check_devmap_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_RINGBUF, check_ringbuf);
+
+ return 1;
+}
+
+__u32 _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_autoload.c b/tools/testing/selftests/bpf/progs/test_autoload.c
new file mode 100644
index 000000000000..62c8cdec6d5d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_autoload.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+bool prog1_called = false;
+bool prog2_called = false;
+bool prog3_called = false;
+
+SEC("raw_tp/sys_enter")
+int prog1(const void *ctx)
+{
+ prog1_called = true;
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int prog2(const void *ctx)
+{
+ prog2_called = true;
+ return 0;
+}
+
+struct fake_kernel_struct {
+ int whatever;
+} __attribute__((preserve_access_index));
+
+SEC("fentry/unexisting-kprobe-will-fail-if-loaded")
+int prog3(const void *ctx)
+{
+ struct fake_kernel_struct *fake = (void *)ctx;
+ fake->whatever = 123;
+ prog3_called = true;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_endian.c b/tools/testing/selftests/bpf/progs/test_endian.c
new file mode 100644
index 000000000000..ddb687c5d125
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_endian.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define IN16 0x1234
+#define IN32 0x12345678U
+#define IN64 0x123456789abcdef0ULL
+
+__u16 in16 = 0;
+__u32 in32 = 0;
+__u64 in64 = 0;
+
+__u16 out16 = 0;
+__u32 out32 = 0;
+__u64 out64 = 0;
+
+__u16 const16 = 0;
+__u32 const32 = 0;
+__u64 const64 = 0;
+
+SEC("raw_tp/sys_enter")
+int sys_enter(const void *ctx)
+{
+ out16 = __builtin_bswap16(in16);
+ out32 = __builtin_bswap32(in32);
+ out64 = __builtin_bswap64(in64);
+ const16 = ___bpf_swab16(IN16);
+ const32 = ___bpf_swab32(IN32);
+ const64 = ___bpf_swab64(IN64);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
index 29817a703984..b6a6eb279e54 100644
--- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
+++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
@@ -57,8 +57,9 @@ struct {
SEC("raw_tracepoint/sys_enter")
int bpf_prog1(void *ctx)
{
- int max_len, max_buildid_len, usize, ksize, total_size;
+ int max_len, max_buildid_len, total_size;
struct stack_trace_t *data;
+ long usize, ksize;
void *raw_data;
__u32 key = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms.c b/tools/testing/selftests/bpf/progs/test_ksyms.c
new file mode 100644
index 000000000000..6c9cbb5a3bdf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u64 out__bpf_link_fops = -1;
+__u64 out__bpf_link_fops1 = -1;
+__u64 out__btf_size = -1;
+__u64 out__per_cpu_start = -1;
+
+extern const void bpf_link_fops __ksym;
+extern const void __start_BTF __ksym;
+extern const void __stop_BTF __ksym;
+extern const void __per_cpu_start __ksym;
+/* non-existing symbol, weak, default to zero */
+extern const void bpf_link_fops1 __ksym __weak;
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ out__bpf_link_fops = (__u64)&bpf_link_fops;
+ out__btf_size = (__u64)(&__stop_BTF - &__start_BTF);
+ out__per_cpu_start = (__u64)&__per_cpu_start;
+
+ out__bpf_link_fops1 = (__u64)&bpf_link_fops1;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c
new file mode 100644
index 000000000000..cd4b72c55dfe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_varlen.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#define MAX_LEN 256
+
+char buf_in1[MAX_LEN] = {};
+char buf_in2[MAX_LEN] = {};
+
+int test_pid = 0;
+bool capture = false;
+
+/* .bss */
+long payload1_len1 = 0;
+long payload1_len2 = 0;
+long total1 = 0;
+char payload1[MAX_LEN + MAX_LEN] = {};
+
+/* .data */
+int payload2_len1 = -1;
+int payload2_len2 = -1;
+int total2 = -1;
+char payload2[MAX_LEN + MAX_LEN] = { 1 };
+
+int payload3_len1 = -1;
+int payload3_len2 = -1;
+int total3= -1;
+char payload3[MAX_LEN + MAX_LEN] = { 1 };
+
+int payload4_len1 = -1;
+int payload4_len2 = -1;
+int total4= -1;
+char payload4[MAX_LEN + MAX_LEN] = { 1 };
+
+SEC("raw_tp/sys_enter")
+int handler64_unsigned(void *regs)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ void *payload = payload1;
+ u64 len;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid || !capture)
+ return 0;
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+ if (len <= MAX_LEN) {
+ payload += len;
+ payload1_len1 = len;
+ }
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+ if (len <= MAX_LEN) {
+ payload += len;
+ payload1_len2 = len;
+ }
+
+ total1 = payload - (void *)payload1;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int handler64_signed(void *regs)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ void *payload = payload3;
+ long len;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid || !capture)
+ return 0;
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+ if (len >= 0) {
+ payload += len;
+ payload3_len1 = len;
+ }
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+ if (len >= 0) {
+ payload += len;
+ payload3_len2 = len;
+ }
+ total3 = payload - (void *)payload3;
+
+ return 0;
+}
+
+SEC("tp/raw_syscalls/sys_enter")
+int handler32_unsigned(void *regs)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ void *payload = payload2;
+ u32 len;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid || !capture)
+ return 0;
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+ if (len <= MAX_LEN) {
+ payload += len;
+ payload2_len1 = len;
+ }
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+ if (len <= MAX_LEN) {
+ payload += len;
+ payload2_len2 = len;
+ }
+
+ total2 = payload - (void *)payload2;
+
+ return 0;
+}
+
+SEC("tp/raw_syscalls/sys_exit")
+int handler32_signed(void *regs)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ void *payload = payload4;
+ int len;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid || !capture)
+ return 0;
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+ if (len >= 0) {
+ payload += len;
+ payload4_len1 = len;
+ }
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+ if (len >= 0) {
+ payload += len;
+ payload4_len2 = len;
+ }
+ total4 = payload - (void *)payload4;
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_exit_getpid")
+int handler_exit(void *regs)
+{
+ long bla;
+
+ if (bpf_probe_read_kernel(&bla, sizeof(bla), 0))
+ return 1;
+ else
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
index 5611b564d3b1..29fa09d6a6c6 100644
--- a/tools/testing/selftests/bpf/progs/test_vmlinux.c
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -63,20 +63,20 @@ int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
return 0;
}
-SEC("kprobe/hrtimer_nanosleep")
-int BPF_KPROBE(handle__kprobe,
- ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+SEC("kprobe/hrtimer_start_range_ns")
+int BPF_KPROBE(handle__kprobe, struct hrtimer *timer, ktime_t tim, u64 delta_ns,
+ const enum hrtimer_mode mode)
{
- if (rqtp == MY_TV_NSEC)
+ if (tim == MY_TV_NSEC)
kprobe_called = true;
return 0;
}
-SEC("fentry/hrtimer_nanosleep")
-int BPF_PROG(handle__fentry,
- ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+SEC("fentry/hrtimer_start_range_ns")
+int BPF_PROG(handle__fentry, struct hrtimer *timer, ktime_t tim, u64 delta_ns,
+ const enum hrtimer_mode mode)
{
- if (rqtp == MY_TV_NSEC)
+ if (tim == MY_TV_NSEC)
fentry_called = true;
return 0;
}
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 54fa5fa688ce..104e833d0087 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -121,6 +121,24 @@ static void reset_affinity() {
}
}
+static void save_netns(void)
+{
+ env.saved_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (env.saved_netns_fd == -1) {
+ perror("open(/proc/self/ns/net)");
+ exit(-1);
+ }
+}
+
+static void restore_netns(void)
+{
+ if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) {
+ stdio_restore();
+ perror("setns(CLONE_NEWNS)");
+ exit(-1);
+ }
+}
+
void test__end_subtest()
{
struct prog_test_def *test = env.test;
@@ -138,8 +156,6 @@ void test__end_subtest()
test->test_num, test->subtest_num,
test->subtest_name, sub_error_cnt ? "FAIL" : "OK");
- reset_affinity();
-
free(test->subtest_name);
test->subtest_name = NULL;
}
@@ -366,6 +382,8 @@ enum ARG_KEYS {
ARG_TEST_NAME_BLACKLIST = 'b',
ARG_VERIFIER_STATS = 's',
ARG_VERBOSE = 'v',
+ ARG_GET_TEST_CNT = 'c',
+ ARG_LIST_TEST_NAMES = 'l',
};
static const struct argp_option opts[] = {
@@ -379,6 +397,10 @@ static const struct argp_option opts[] = {
"Output verifier statistics", },
{ "verbose", ARG_VERBOSE, "LEVEL", OPTION_ARG_OPTIONAL,
"Verbose output (use -vv or -vvv for progressively verbose output)" },
+ { "count", ARG_GET_TEST_CNT, NULL, 0,
+ "Get number of selected top-level tests " },
+ { "list", ARG_LIST_TEST_NAMES, NULL, 0,
+ "List test names that would run (without running them) " },
{},
};
@@ -511,6 +533,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
}
}
break;
+ case ARG_GET_TEST_CNT:
+ env->get_test_cnt = true;
+ break;
+ case ARG_LIST_TEST_NAMES:
+ env->list_test_names = true;
+ break;
case ARGP_KEY_ARG:
argp_usage(state);
break;
@@ -643,6 +671,7 @@ int main(int argc, char **argv)
return -1;
}
+ save_netns();
stdio_hijack();
for (i = 0; i < prog_test_cnt; i++) {
struct prog_test_def *test = &prog_test_defs[i];
@@ -654,6 +683,17 @@ int main(int argc, char **argv)
test->test_num, test->test_name))
continue;
+ if (env.get_test_cnt) {
+ env.succ_cnt++;
+ continue;
+ }
+
+ if (env.list_test_names) {
+ fprintf(env.stdout, "%s\n", test->test_name);
+ env.succ_cnt++;
+ continue;
+ }
+
test->run_test();
/* ensure last sub-test is finalized properly */
if (test->subtest_name)
@@ -673,19 +713,34 @@ int main(int argc, char **argv)
test->error_cnt ? "FAIL" : "OK");
reset_affinity();
+ restore_netns();
if (test->need_cgroup_cleanup)
cleanup_cgroup_environment();
}
stdio_restore();
+
+ if (env.get_test_cnt) {
+ printf("%d\n", env.succ_cnt);
+ goto out;
+ }
+
+ if (env.list_test_names)
+ goto out;
+
fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
+out:
free_str_set(&env.test_selector.blacklist);
free_str_set(&env.test_selector.whitelist);
free(env.test_selector.num_set);
free_str_set(&env.subtest_selector.blacklist);
free_str_set(&env.subtest_selector.whitelist);
free(env.subtest_selector.num_set);
+ close(env.saved_netns_fd);
+
+ if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
+ return EXIT_FAILURE;
return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index f4503c926aca..6e09bf738473 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -66,6 +66,8 @@ struct test_env {
enum verbosity verbosity;
bool jit_enabled;
+ bool get_test_cnt;
+ bool list_test_names;
struct prog_test_def *test;
FILE *stdout;
@@ -78,6 +80,8 @@ struct test_env {
int sub_succ_cnt; /* successful sub-tests */
int fail_cnt; /* total failed tests + sub-tests */
int skip_cnt; /* skipped tests */
+
+ int saved_netns_fd;
};
extern struct test_env env;
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c
new file mode 100644
index 000000000000..b52209db8250
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/map_ptr.c
@@ -0,0 +1,62 @@
+{
+ "bpf_map_ptr: read with negative offset rejected",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+ .result = REJECT,
+ .errstr = "R1 is bpf_array invalid negative access: off=-8",
+},
+{
+ "bpf_map_ptr: write rejected",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+ .result = REJECT,
+ .errstr = "only read from bpf_array is supported",
+},
+{
+ "bpf_map_ptr: read non-existent field rejected",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+ .result = REJECT,
+ .errstr = "cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4",
+},
+{
+ "bpf_map_ptr: read ops field accepted",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_6, 0),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 1 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "bpf_array access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN",
+ .result = ACCEPT,
+ .retval = 1,
+},
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
index cd26ee6b7b1d..1f2b8c4cb26d 100644
--- a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
+++ b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
@@ -56,7 +56,7 @@
.fixup_map_in_map = { 16 },
.fixup_map_array_48b = { 13 },
.result = REJECT,
- .errstr = "R0 invalid mem access 'map_ptr'",
+ .errstr = "only read from bpf_array is supported",
},
{
"cond: two branches returning different map pointers for lookup (tail, tail)",
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index 97ee658e1242..ed4e76b24649 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -836,3 +836,41 @@
.errstr = "R0 invalid mem access 'inv'",
.errstr_unpriv = "R0 pointer -= pointer prohibited",
},
+{
+ "32bit pkt_ptr -= scalar",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2),
+ BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_7),
+ BPF_ALU32_REG(BPF_SUB, BPF_REG_6, BPF_REG_4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "32bit scalar -= pkt_ptr",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 40),
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_8, 2),
+ BPF_ALU32_REG(BPF_MOV, BPF_REG_4, BPF_REG_6),
+ BPF_ALU32_REG(BPF_SUB, BPF_REG_4, BPF_REG_7),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
new file mode 100644
index 000000000000..ba1d53b9f815
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -0,0 +1,786 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+VNI_GEN=$RANDOM
+NSIM_ID=$((RANDOM % 1024))
+NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
+NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID
+NSIM_NETDEV=
+HAS_ETHTOOL=
+EXIT_STATUS=0
+num_cases=0
+num_errors=0
+
+clean_up_devs=( )
+
+function err_cnt {
+ echo "ERROR:" $@
+ EXIT_STATUS=1
+ ((num_errors++))
+ ((num_cases++))
+}
+
+function pass_cnt {
+ ((num_cases++))
+}
+
+function cleanup_tuns {
+ for dev in "${clean_up_devs[@]}"; do
+ [ -e /sys/class/net/$dev ] && ip link del dev $dev
+ done
+ clean_up_devs=( )
+}
+
+function cleanup_nsim {
+ if [ -e $NSIM_DEV_SYS ]; then
+ echo $NSIM_ID > /sys/bus/netdevsim/del_device
+ fi
+}
+
+function cleanup {
+ cleanup_tuns
+ cleanup_nsim
+}
+
+trap cleanup EXIT
+
+function new_vxlan {
+ local dev=$1
+ local dstport=$2
+ local lower=$3
+ local ipver=$4
+ local flags=$5
+
+ local group ipfl
+
+ [ "$ipver" != '6' ] && group=239.1.1.1 || group=fff1::1
+ [ "$ipver" != '6' ] || ipfl="-6"
+
+ [[ ! "$flags" =~ "external" ]] && flags="$flags id $((VNI_GEN++))"
+
+ ip $ipfl link add $dev type vxlan \
+ group $group \
+ dev $lower \
+ dstport $dstport \
+ $flags
+
+ ip link set dev $dev up
+
+ clean_up_devs=("${clean_up_devs[@]}" $dev)
+
+ check_tables
+}
+
+function new_geneve {
+ local dev=$1
+ local dstport=$2
+ local ipver=$3
+ local flags=$4
+
+ local group ipfl
+
+ [ "$ipver" != '6' ] && remote=1.1.1.2 || group=::2
+ [ "$ipver" != '6' ] || ipfl="-6"
+
+ [[ ! "$flags" =~ "external" ]] && flags="$flags vni $((VNI_GEN++))"
+
+ ip $ipfl link add $dev type geneve \
+ remote $remote \
+ dstport $dstport \
+ $flags
+
+ ip link set dev $dev up
+
+ clean_up_devs=("${clean_up_devs[@]}" $dev)
+
+ check_tables
+}
+
+function del_dev {
+ local dev=$1
+
+ ip link del dev $dev
+ check_tables
+}
+
+# Helpers for netdevsim port/type encoding
+function mke {
+ local port=$1
+ local type=$2
+
+ echo $((port << 16 | type))
+}
+
+function pre {
+ local val=$1
+
+ echo -e "port: $((val >> 16))\ttype: $((val & 0xffff))"
+}
+
+function pre_ethtool {
+ local val=$1
+ local port=$((val >> 16))
+ local type=$((val & 0xffff))
+
+ case $type in
+ 1)
+ type_name="vxlan"
+ ;;
+ 2)
+ type_name="geneve"
+ ;;
+ 4)
+ type_name="vxlan-gpe"
+ ;;
+ *)
+ type_name="bit X"
+ ;;
+ esac
+
+ echo "port $port, $type_name"
+}
+
+function check_table {
+ local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+ local -n expected=$2
+ local last=$3
+
+ read -a have < $path
+
+ if [ ${#expected[@]} -ne ${#have[@]} ]; then
+ echo "check_table: BAD NUMBER OF ITEMS"
+ return 0
+ fi
+
+ for i in "${!expected[@]}"; do
+ if [ -n "$HAS_ETHTOOL" -a ${expected[i]} -ne 0 ]; then
+ pp_expected=`pre_ethtool ${expected[i]}`
+ ethtool --show-tunnels $NSIM_NETDEV | grep "$pp_expected" >/dev/null
+ if [ $? -ne 0 -a $last -ne 0 ]; then
+ err_cnt "ethtool table $1 on port $port: $pfx - $msg"
+ echo " check_table: ethtool does not contain '$pp_expected'"
+ ethtool --show-tunnels $NSIM_NETDEV
+ return 0
+
+ fi
+ fi
+
+ if [ ${expected[i]} != ${have[i]} ]; then
+ if [ $last -ne 0 ]; then
+ err_cnt "table $1 on port $port: $pfx - $msg"
+ echo " check_table: wrong entry $i"
+ echo " expected: `pre ${expected[i]}`"
+ echo " have: `pre ${have[i]}`"
+ return 0
+ fi
+ return 1
+ fi
+ done
+
+ pass_cnt
+ return 0
+}
+
+function check_tables {
+ # Need retries in case we have workqueue making the changes
+ local retries=10
+
+ while ! check_table 0 exp0 $((retries == 0)); do
+ sleep 0.02
+ ((retries--))
+ done
+ while ! check_table 1 exp1 $((retries == 0)); do
+ sleep 0.02
+ ((retries--))
+ done
+}
+
+function print_table {
+ local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+ read -a have < $path
+
+ tree $NSIM_DEV_DFS/
+
+ echo "Port $port table $1:"
+
+ for i in "${!have[@]}"; do
+ echo " `pre ${have[i]}`"
+ done
+
+}
+
+function print_tables {
+ print_table 0
+ print_table 1
+}
+
+function get_netdev_name {
+ local -n old=$1
+
+ new=$(ls /sys/class/net)
+
+ for netdev in $new; do
+ for check in $old; do
+ [ $netdev == $check ] && break
+ done
+
+ if [ $netdev != $check ]; then
+ echo $netdev
+ break
+ fi
+ done
+}
+
+###
+### Code start
+###
+
+# Probe ethtool support
+ethtool -h | grep show-tunnels 2>&1 >/dev/null && HAS_ETHTOOL=y
+
+modprobe netdevsim
+
+# Basic test
+pfx="basic"
+
+for port in 0 1; do
+ old_netdevs=$(ls /sys/class/net)
+ if [ $port -eq 0 ]; then
+ echo $NSIM_ID > /sys/bus/netdevsim/new_device
+ else
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ echo 1 > $NSIM_DEV_SYS/new_port
+ fi
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+ msg="new NIC device created"
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ check_tables
+
+ msg="VxLAN v4 devices"
+ exp0=( `mke 4789 1` 0 0 0 )
+ new_vxlan vxlan0 4789 $NSIM_NETDEV
+ new_vxlan vxlan1 4789 $NSIM_NETDEV
+
+ msg="VxLAN v4 devices go down"
+ exp0=( 0 0 0 0 )
+ ifconfig vxlan1 down
+ ifconfig vxlan0 down
+ check_tables
+
+ msg="VxLAN v6 devices"
+ exp0=( `mke 4789 1` 0 0 0 )
+ new_vxlan vxlanA 4789 $NSIM_NETDEV 6
+
+ for ifc in vxlan0 vxlan1; do
+ ifconfig $ifc up
+ done
+
+ new_vxlan vxlanB 4789 $NSIM_NETDEV 6
+
+ msg="another VxLAN v6 devices"
+ exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+ new_vxlan vxlanC 4790 $NSIM_NETDEV 6
+
+ msg="Geneve device"
+ exp1=( `mke 6081 2` 0 0 0 )
+ new_geneve gnv0 6081
+
+ msg="NIC device goes down"
+ ifconfig $NSIM_NETDEV down
+ if [ $port -eq 1 ]; then
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ fi
+ check_tables
+ msg="NIC device goes up again"
+ ifconfig $NSIM_NETDEV up
+ exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+ exp1=( `mke 6081 2` 0 0 0 )
+ check_tables
+
+ cleanup_tuns
+
+ msg="tunnels destroyed"
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ check_tables
+
+ modprobe -r geneve
+ modprobe -r vxlan
+ modprobe -r udp_tunnel
+
+ check_tables
+done
+
+modprobe -r netdevsim
+
+# Module tests
+pfx="module tests"
+
+if modinfo netdevsim | grep udp_tunnel >/dev/null; then
+ err_cnt "netdevsim depends on udp_tunnel"
+else
+ pass_cnt
+fi
+
+modprobe netdevsim
+
+old_netdevs=$(ls /sys/class/net)
+port=0
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep
+echo 0 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+msg="create VxLANs"
+exp0=( 0 0 0 0 ) # sleep is longer than out wait
+new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+modprobe -r vxlan
+modprobe -r udp_tunnel
+
+msg="remove tunnels"
+exp0=( 0 0 0 0 )
+check_tables
+
+msg="create VxLANs"
+exp0=( 0 0 0 0 ) # sleep is longer than out wait
+new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+exp0=( 0 0 0 0 )
+
+modprobe -r netdevsim
+modprobe netdevsim
+
+# Overflow the table
+
+function overflow_table0 {
+ local pfx=$1
+
+ msg="create VxLANs 1/5"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="create VxLANs 2/5"
+ exp0=( `mke 10000 1` `mke 10001 1` 0 0 )
+ new_vxlan vxlan1 10001 $NSIM_NETDEV
+
+ msg="create VxLANs 3/5"
+ exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` 0 )
+ new_vxlan vxlan2 10002 $NSIM_NETDEV
+
+ msg="create VxLANs 4/5"
+ exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` `mke 10003 1` )
+ new_vxlan vxlan3 10003 $NSIM_NETDEV
+
+ msg="create VxLANs 5/5"
+ new_vxlan vxlan4 10004 $NSIM_NETDEV
+}
+
+function overflow_table1 {
+ local pfx=$1
+
+ msg="create GENEVE 1/5"
+ exp1=( `mke 20000 2` 0 0 0 )
+ new_geneve gnv0 20000
+
+ msg="create GENEVE 2/5"
+ exp1=( `mke 20000 2` `mke 20001 2` 0 0 )
+ new_geneve gnv1 20001
+
+ msg="create GENEVE 3/5"
+ exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` 0 )
+ new_geneve gnv2 20002
+
+ msg="create GENEVE 4/5"
+ exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` `mke 20003 2` )
+ new_geneve gnv3 20003
+
+ msg="create GENEVE 5/5"
+ new_geneve gnv4 20004
+}
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ overflow_table0 "overflow NIC table"
+ overflow_table1 "overflow NIC table"
+
+ msg="replace VxLAN in overflow table"
+ exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+ del_dev vxlan1
+
+ msg="vacate VxLAN in overflow table"
+ exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+ del_dev vxlan2
+
+ msg="replace GENEVE in overflow table"
+ exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` )
+ del_dev gnv1
+
+ msg="vacate GENEVE in overflow table"
+ exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` )
+ del_dev gnv2
+
+ msg="table sharing - share"
+ exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` )
+ new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external"
+
+ msg="table sharing - overflow"
+ new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external"
+ msg="table sharing - overflow v6"
+ new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external"
+
+ exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` )
+ del_dev gnv4
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Sync all
+pfx="sync all"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ overflow_table0 "overflow NIC table"
+ overflow_table1 "overflow NIC table"
+
+ msg="replace VxLAN in overflow table"
+ exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+ del_dev vxlan1
+
+ msg="vacate VxLAN in overflow table"
+ exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+ del_dev vxlan2
+
+ msg="replace GENEVE in overflow table"
+ exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` )
+ del_dev gnv1
+
+ msg="vacate GENEVE in overflow table"
+ exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` )
+ del_dev gnv2
+
+ msg="table sharing - share"
+ exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` )
+ new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external"
+
+ msg="table sharing - overflow"
+ new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external"
+ msg="table sharing - overflow v6"
+ new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external"
+
+ exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` )
+ del_dev gnv4
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Destroy full NIC
+pfx="destroy full"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ overflow_table0 "destroy NIC"
+ overflow_table1 "destroy NIC"
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# IPv4 only
+pfx="IPv4 only"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ msg="create VxLANs v6"
+ new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+ msg="create VxLANs v6"
+ new_vxlan vxlanA1 10000 $NSIM_NETDEV 6
+
+ ip link set dev vxlanA0 down
+ ip link set dev vxlanA0 up
+ check_tables
+
+ msg="create VxLANs v4"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="down VxLANs v4"
+ exp0=( 0 0 0 0 )
+ ip link set dev vxlan0 down
+ check_tables
+
+ msg="up VxLANs v4"
+ exp0=( `mke 10000 1` 0 0 0 )
+ ip link set dev vxlan0 up
+ check_tables
+
+ msg="destroy VxLANs v4"
+ exp0=( 0 0 0 0 )
+ del_dev vxlan0
+
+ msg="recreate VxLANs v4"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ del_dev vxlanA0
+ del_dev vxlanA1
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Failures
+pfx="error injection"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+
+ msg="1 - create VxLANs v6"
+ exp0=( 0 0 0 0 )
+ new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+ msg="1 - create VxLANs v4"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="1 - remove VxLANs v4"
+ del_dev vxlan0
+
+ msg="1 - remove VxLANs v6"
+ exp0=( 0 0 0 0 )
+ del_dev vxlanA0
+
+ msg="2 - create GENEVE"
+ exp1=( `mke 20000 2` 0 0 0 )
+ new_geneve gnv0 20000
+
+ msg="2 - destroy GENEVE"
+ echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+ exp1=( `mke 20000 2` 0 0 0 )
+ del_dev gnv0
+
+ msg="2 - create second GENEVE"
+ exp1=( 0 `mke 20001 2` 0 0 )
+ new_geneve gnv0 20001
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# netdev flags
+pfx="netdev flags"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ msg="create VxLANs v6"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+ msg="create VxLANs v4"
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="turn off"
+ exp0=( 0 0 0 0 )
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+ check_tables
+
+ msg="turn on"
+ exp0=( `mke 10000 1` 0 0 0 )
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+ check_tables
+
+ msg="remove both"
+ del_dev vxlanA0
+ exp0=( 0 0 0 0 )
+ del_dev vxlan0
+ check_tables
+
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+
+ msg="create VxLANs v4 - off"
+ exp0=( 0 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="created off - turn on"
+ exp0=( `mke 10000 1` 0 0 0 )
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+ check_tables
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# device initiated reset
+pfx="reset notification"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ ifconfig $NSIM_NETDEV up
+
+ msg="create VxLANs v6"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+ msg="create VxLANs v4"
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ check_tables
+
+ msg="NIC device goes down"
+ ifconfig $NSIM_NETDEV down
+ if [ $port -eq 1 ]; then
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ fi
+ check_tables
+
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ check_tables
+
+ msg="NIC device goes up again"
+ ifconfig $NSIM_NETDEV up
+ exp0=( `mke 10000 1` 0 0 0 )
+ check_tables
+
+ msg="remove both"
+ del_dev vxlanA0
+ exp0=( 0 0 0 0 )
+ del_dev vxlan0
+ check_tables
+
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ check_tables
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+modprobe -r netdevsim
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $num_cases checks"
+else
+ echo "FAILED $num_errors/$num_cases checks"
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 895ec992b2f1..9491bbaa0831 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -17,6 +17,8 @@ TEST_PROGS += route_localnet.sh
TEST_PROGS += reuseaddr_ports_exhausted.sh
TEST_PROGS += txtimestamp.sh
TEST_PROGS += vrf-xfrm-tests.sh
+TEST_PROGS += rxtimestamp.sh
+TEST_PROGS += devlink_port_split.py
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
new file mode 100755
index 000000000000..58bb7e9b88ce
--- /dev/null
+++ b/tools/testing/selftests/net/devlink_port_split.py
@@ -0,0 +1,277 @@
+#!/usr/bin/python3
+# SPDX-License-Identifier: GPL-2.0
+
+from subprocess import PIPE, Popen
+import json
+import time
+import argparse
+import collections
+import sys
+
+#
+# Test port split configuration using devlink-port lanes attribute.
+# The test is skipped in case the attribute is not available.
+#
+# First, check that all the ports with 1 lane fail to split.
+# Second, check that all the ports with more than 1 lane can be split
+# to all valid configurations (e.g., split to 2, split to 4 etc.)
+#
+
+
+Port = collections.namedtuple('Port', 'bus_info name')
+
+
+def run_command(cmd, should_fail=False):
+ """
+ Run a command in subprocess.
+ Return: Tuple of (stdout, stderr).
+ """
+
+ p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
+ stdout, stderr = p.communicate()
+ stdout, stderr = stdout.decode(), stderr.decode()
+
+ if stderr != "" and not should_fail:
+ print("Error sending command: %s" % cmd)
+ print(stdout)
+ print(stderr)
+ return stdout, stderr
+
+
+class devlink_ports(object):
+ """
+ Class that holds information on the devlink ports, required to the tests;
+ if_names: A list of interfaces in the devlink ports.
+ """
+
+ def get_if_names(dev):
+ """
+ Get a list of physical devlink ports.
+ Return: Array of tuples (bus_info/port, if_name).
+ """
+
+ arr = []
+
+ cmd = "devlink -j port show"
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+ ports = json.loads(stdout)['port']
+
+ for port in ports:
+ if dev in port:
+ if ports[port]['flavour'] == 'physical':
+ arr.append(Port(bus_info=port, name=ports[port]['netdev']))
+
+ return arr
+
+ def __init__(self, dev):
+ self.if_names = devlink_ports.get_if_names(dev)
+
+
+def get_max_lanes(port):
+ """
+ Get the $port's maximum number of lanes.
+ Return: number of lanes, e.g. 1, 2, 4 and 8.
+ """
+
+ cmd = "devlink -j port show %s" % port
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+ values = list(json.loads(stdout)['port'].values())[0]
+
+ if 'lanes' in values:
+ lanes = values['lanes']
+ else:
+ lanes = 0
+ return lanes
+
+
+def get_split_ability(port):
+ """
+ Get the $port split ability.
+ Return: split ability, true or false.
+ """
+
+ cmd = "devlink -j port show %s" % port.name
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+ values = list(json.loads(stdout)['port'].values())[0]
+
+ return values['splittable']
+
+
+def split(k, port, should_fail=False):
+ """
+ Split $port into $k ports.
+ If should_fail == True, the split should fail. Otherwise, should pass.
+ Return: Array of sub ports after splitting.
+ If the $port wasn't split, the array will be empty.
+ """
+
+ cmd = "devlink port split %s count %s" % (port.bus_info, k)
+ stdout, stderr = run_command(cmd, should_fail=should_fail)
+
+ if should_fail:
+ if not test(stderr != "", "%s is unsplittable" % port.name):
+ print("split an unsplittable port %s" % port.name)
+ return create_split_group(port, k)
+ else:
+ if stderr == "":
+ return create_split_group(port, k)
+ print("didn't split a splittable port %s" % port.name)
+
+ return []
+
+
+def unsplit(port):
+ """
+ Unsplit $port.
+ """
+
+ cmd = "devlink port unsplit %s" % port
+ stdout, stderr = run_command(cmd)
+ test(stderr == "", "Unsplit port %s" % port)
+
+
+def exists(port, dev):
+ """
+ Check if $port exists in the devlink ports.
+ Return: True is so, False otherwise.
+ """
+
+ return any(dev_port.name == port
+ for dev_port in devlink_ports.get_if_names(dev))
+
+
+def exists_and_lanes(ports, lanes, dev):
+ """
+ Check if every port in the list $ports exists in the devlink ports and has
+ $lanes number of lanes after splitting.
+ Return: True if both are True, False otherwise.
+ """
+
+ for port in ports:
+ max_lanes = get_max_lanes(port)
+ if not exists(port, dev):
+ print("port %s doesn't exist in devlink ports" % port)
+ return False
+ if max_lanes != lanes:
+ print("port %s has %d lanes, but %s were expected"
+ % (port, lanes, max_lanes))
+ return False
+ return True
+
+
+def test(cond, msg):
+ """
+ Check $cond and print a message accordingly.
+ Return: True is pass, False otherwise.
+ """
+
+ if cond:
+ print("TEST: %-60s [ OK ]" % msg)
+ else:
+ print("TEST: %-60s [FAIL]" % msg)
+
+ return cond
+
+
+def create_split_group(port, k):
+ """
+ Create the split group for $port.
+ Return: Array with $k elements, which are the split port group.
+ """
+
+ return list(port.name + "s" + str(i) for i in range(k))
+
+
+def split_unsplittable_port(port, k):
+ """
+ Test that splitting of unsplittable port fails.
+ """
+
+ # split to max
+ new_split_group = split(k, port, should_fail=True)
+
+ if new_split_group != []:
+ unsplit(port.bus_info)
+
+
+def split_splittable_port(port, k, lanes, dev):
+ """
+ Test that splitting of splittable port passes correctly.
+ """
+
+ new_split_group = split(k, port)
+
+ # Once the split command ends, it takes some time to the sub ifaces'
+ # to get their names. Use udevadm to continue only when all current udev
+ # events are handled.
+ cmd = "udevadm settle"
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+
+ if new_split_group != []:
+ test(exists_and_lanes(new_split_group, lanes/k, dev),
+ "split port %s into %s" % (port.name, k))
+
+ unsplit(port.bus_info)
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='A test for port splitting.')
+ parser.add_argument('--dev',
+ help='The devlink handle of the device under test. ' +
+ 'The default is the first registered devlink ' +
+ 'handle.')
+
+ return parser
+
+
+def main(cmdline=None):
+ parser = make_parser()
+ args = parser.parse_args(cmdline)
+
+ dev = args.dev
+ if not dev:
+ cmd = "devlink -j dev show"
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+
+ devs = json.loads(stdout)['dev']
+ dev = list(devs.keys())[0]
+
+ cmd = "devlink dev show %s" % dev
+ stdout, stderr = run_command(cmd)
+ if stderr != "":
+ print("devlink device %s can not be found" % dev)
+ sys.exit(1)
+
+ ports = devlink_ports(dev)
+
+ for port in ports.if_names:
+ max_lanes = get_max_lanes(port.name)
+
+ # If max lanes is 0, do not test port splitting at all
+ if max_lanes == 0:
+ continue
+
+ # If 1 lane, shouldn't be able to split
+ elif max_lanes == 1:
+ test(not get_split_ability(port),
+ "%s should not be able to split" % port.name)
+ split_unsplittable_port(port, max_lanes)
+
+ # Else, splitting should pass and all the split ports should exist.
+ else:
+ lane = max_lanes
+ test(get_split_ability(port),
+ "%s should be able to split" % port.name)
+ while lane > 1:
+ split_splittable_port(port, lane, max_lanes, dev)
+
+ lane //= 2
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh
index eb8e2a23bbb4..ea7a11a9f788 100755
--- a/tools/testing/selftests/net/forwarding/ethtool.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool.sh
@@ -50,23 +50,6 @@ cleanup()
h1_destroy
}
-different_speeds_get()
-{
- local dev1=$1; shift
- local dev2=$1; shift
- local with_mode=$1; shift
- local adver=$1; shift
-
- local -a speeds_arr
-
- speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver))
- if [[ ${#speeds_arr[@]} < 2 ]]; then
- check_err 1 "cannot check different speeds. There are not enough speeds"
- fi
-
- echo ${speeds_arr[0]} ${speeds_arr[1]}
-}
-
same_speeds_autoneg_off()
{
# Check that when each of the reported speeds is forced, the links come
diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
new file mode 100755
index 000000000000..4b42dfd4efd1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
@@ -0,0 +1,102 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ autoneg
+ autoneg_force_mode
+ no_cable
+"
+
+NUM_NETIFS=2
+source lib.sh
+source ethtool_lib.sh
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+ swp3=$NETIF_NO_CABLE
+}
+
+ethtool_extended_state_check()
+{
+ local dev=$1; shift
+ local expected_ext_state=$1; shift
+ local expected_ext_substate=${1:-""}; shift
+
+ local ext_state=$(ethtool $dev | grep "Link detected" \
+ | cut -d "(" -f2 | cut -d ")" -f1)
+ local ext_substate=$(echo $ext_state | cut -sd "," -f2 \
+ | sed -e 's/^[[:space:]]*//')
+ ext_state=$(echo $ext_state | cut -d "," -f1)
+
+ [[ $ext_state == $expected_ext_state ]]
+ check_err $? "Expected \"$expected_ext_state\", got \"$ext_state\""
+
+ [[ $ext_substate == $expected_ext_substate ]]
+ check_err $? "Expected \"$expected_ext_substate\", got \"$ext_substate\""
+}
+
+autoneg()
+{
+ RET=0
+
+ ip link set dev $swp1 up
+
+ sleep 4
+ ethtool_extended_state_check $swp1 "Autoneg" "No partner detected"
+
+ log_test "Autoneg, No partner detected"
+
+ ip link set dev $swp1 down
+}
+
+autoneg_force_mode()
+{
+ RET=0
+
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+
+ local -a speeds_arr=($(different_speeds_get $swp1 $swp2 0 0))
+ local speed1=${speeds_arr[0]}
+ local speed2=${speeds_arr[1]}
+
+ ethtool_set $swp1 speed $speed1 autoneg off
+ ethtool_set $swp2 speed $speed2 autoneg off
+
+ sleep 4
+ ethtool_extended_state_check $swp1 "Autoneg" \
+ "No partner detected during force mode"
+
+ ethtool_extended_state_check $swp2 "Autoneg" \
+ "No partner detected during force mode"
+
+ log_test "Autoneg, No partner detected during force mode"
+
+ ethtool -s $swp2 autoneg on
+ ethtool -s $swp1 autoneg on
+
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+}
+
+no_cable()
+{
+ RET=0
+
+ ip link set dev $swp3 up
+
+ sleep 1
+ ethtool_extended_state_check $swp3 "No cable"
+
+ log_test "No cable"
+
+ ip link set dev $swp3 down
+}
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
index 925d229a59d8..9188e624dec0 100644
--- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
@@ -67,3 +67,20 @@ common_speeds_get()
<(printf '%s\n' "${dev1_speeds[@]}" | sort -u) \
<(printf '%s\n' "${dev2_speeds[@]}" | sort -u)
}
+
+different_speeds_get()
+{
+ local dev1=$1; shift
+ local dev2=$1; shift
+ local with_mode=$1; shift
+ local adver=$1; shift
+
+ local -a speeds_arr
+
+ speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver))
+ if [[ ${#speeds_arr[@]} < 2 ]]; then
+ check_err 1 "cannot check different speeds. There are not enough speeds"
+ fi
+
+ echo ${speeds_arr[0]} ${speeds_arr[1]}
+}
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index e2adb533c8fc..b802c14d2950 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -14,6 +14,9 @@ NETIFS[p6]=veth5
NETIFS[p7]=veth6
NETIFS[p8]=veth7
+# Port that does not have a cable connected.
+NETIF_NO_CABLE=eth8
+
##############################################################################
# Defines
diff --git a/tools/testing/selftests/net/forwarding/pedit_l4port.sh b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
new file mode 100755
index 000000000000..5f20d289ee43
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
@@ -0,0 +1,198 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on egress of $swp2, the
+# traffic is acted upon by a pedit action. An ingress filter installed on $h2 verifies that the
+# packet looks like expected.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +----|-----------------+ +----------------|-----+
+# | |
+# +----|----------------------------------------------------------------|-----+
+# | SW | | |
+# | +-|----------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | +--------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ test_udp_sport
+ test_udp_dport
+ test_tcp_sport
+ test_tcp_dport
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+: ${HIT_TIMEOUT:=2000} # ms
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add name br1 up type bridge vlan_filtering 1
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2
+}
+
+do_test_pedit_l4port_one()
+{
+ local pedit_locus=$1; shift
+ local pedit_prot=$1; shift
+ local pedit_action=$1; shift
+ local match_prot=$1; shift
+ local match_flower=$1; shift
+ local mz_flags=$1; shift
+ local saddr=$1; shift
+ local daddr=$1; shift
+
+ tc filter add $pedit_locus handle 101 pref 1 \
+ flower action pedit ex munge $pedit_action
+ tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \
+ flower skip_hw $match_flower action pass
+
+ RET=0
+
+ $MZ $mz_flags $h1 -c 10 -d 20msec -p 100 \
+ -a own -b $h2mac -q -t $pedit_prot sp=54321,dp=12345
+
+ local pkts
+ pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
+ tc_rule_handle_stats_get "dev $h2 ingress" 101)
+ check_err $? "Expected to get 10 packets, but got $pkts."
+
+ pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101)
+ ((pkts >= 10))
+ check_err $? "Expected to get 10 packets on pedit rule, but got $pkts."
+
+ log_test "$pedit_locus pedit $pedit_action"
+
+ tc filter del dev $h2 ingress pref 1
+ tc filter del $pedit_locus pref 1
+}
+
+do_test_pedit_l4port()
+{
+ local locus=$1; shift
+ local prot=$1; shift
+ local pedit_port=$1; shift
+ local flower_port=$1; shift
+ local port
+
+ for port in 1 11111 65535; do
+ do_test_pedit_l4port_one "$locus" "$prot" \
+ "$prot $pedit_port set $port" \
+ ip "ip_proto $prot $flower_port $port" \
+ "-A 192.0.2.1 -B 192.0.2.2"
+ done
+}
+
+test_udp_sport()
+{
+ do_test_pedit_l4port "dev $swp1 ingress" udp sport src_port
+ do_test_pedit_l4port "dev $swp2 egress" udp sport src_port
+}
+
+test_udp_dport()
+{
+ do_test_pedit_l4port "dev $swp1 ingress" udp dport dst_port
+ do_test_pedit_l4port "dev $swp2 egress" udp dport dst_port
+}
+
+test_tcp_sport()
+{
+ do_test_pedit_l4port "dev $swp1 ingress" tcp sport src_port
+ do_test_pedit_l4port "dev $swp2 egress" tcp sport src_port
+}
+
+test_tcp_dport()
+{
+ do_test_pedit_l4port "dev $swp1 ingress" tcp dport dst_port
+ do_test_pedit_l4port "dev $swp2 egress" tcp dport dst_port
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh
new file mode 100755
index 000000000000..e714bae473fb
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/sch_red.sh
@@ -0,0 +1,492 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends one stream of traffic from H1 through a TBF shaper, to a RED
+# within TBF shaper on $swp3. The two shapers have the same configuration, and
+# thus the resulting stream should fill all available bandwidth on the latter
+# shaper. A second stream is sent from H2 also via $swp3, and used to inject
+# additional traffic. Since all available bandwidth is taken, this traffic has
+# to go to backlog.
+#
+# +--------------------------+ +--------------------------+
+# | H1 | | H2 |
+# | + $h1 | | + $h2 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | TBF 10Mbps | | | |
+# +-----|--------------------+ +-----|--------------------+
+# | |
+# +-----|------------------------------------------------|--------------------+
+# | SW | | |
+# | +--|------------------------------------------------|----------------+ |
+# | | + $swp1 + $swp2 | |
+# | | BR | |
+# | | | |
+# | | + $swp3 | |
+# | | | TBF 10Mbps / RED | |
+# | +--------------------------------|-----------------------------------+ |
+# | | |
+# +-----------------------------------|---------------------------------------+
+# |
+# +-----|--------------------+
+# | H3 | |
+# | + $h1 |
+# | 192.0.2.3/28 |
+# | |
+# +--------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ecn_test
+ ecn_nodrop_test
+ red_test
+ red_qevent_test
+ ecn_qevent_test
+"
+
+NUM_NETIFS=6
+CHECK_TC="yes"
+source lib.sh
+
+BACKLOG=30000
+PKTSZ=1400
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+ mtu_set $h1 10000
+ tc qdisc replace dev $h1 root handle 1: tbf \
+ rate 10Mbit burst 10K limit 1M
+}
+
+h1_destroy()
+{
+ tc qdisc del dev $h1 root
+ mtu_restore $h1
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+ mtu_set $h2 10000
+}
+
+h2_destroy()
+{
+ mtu_restore $h2
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+h3_create()
+{
+ simple_if_init $h3 192.0.2.3/28
+ mtu_set $h3 10000
+}
+
+h3_destroy()
+{
+ mtu_restore $h3
+ simple_if_fini $h3 192.0.2.3/28
+}
+
+switch_create()
+{
+ ip link add dev br up type bridge
+ ip link set dev $swp1 up master br
+ ip link set dev $swp2 up master br
+ ip link set dev $swp3 up master br
+
+ mtu_set $swp1 10000
+ mtu_set $swp2 10000
+ mtu_set $swp3 10000
+
+ tc qdisc replace dev $swp3 root handle 1: tbf \
+ rate 10Mbit burst 10K limit 1M
+ ip link add name _drop_test up type dummy
+}
+
+switch_destroy()
+{
+ ip link del dev _drop_test
+ tc qdisc del dev $swp3 root
+
+ mtu_restore $h3
+ mtu_restore $h2
+ mtu_restore $h1
+
+ ip link set dev $swp3 down nomaster
+ ip link set dev $swp2 down nomaster
+ ip link set dev $swp1 down nomaster
+ ip link del dev br
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ h2=${NETIFS[p3]}
+ swp2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ h3_mac=$(mac_get $h3)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.3 " from host 1"
+ ping_test $h2 192.0.2.3 " from host 2"
+}
+
+get_qdisc_backlog()
+{
+ qdisc_stats_get $swp3 11: .backlog
+}
+
+get_nmarked()
+{
+ qdisc_stats_get $swp3 11: .marked
+}
+
+get_qdisc_npackets()
+{
+ qdisc_stats_get $swp3 11: .packets
+}
+
+get_nmirrored()
+{
+ link_stats_get _drop_test tx packets
+}
+
+send_packets()
+{
+ local proto=$1; shift
+ local pkts=$1; shift
+
+ $MZ $h2 -p $PKTSZ -a own -b $h3_mac -A 192.0.2.2 -B 192.0.2.3 -t $proto -q -c $pkts "$@"
+}
+
+# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
+# success. After 10 failed attempts it bails out and returns 1. It dumps the
+# backlog size to stdout.
+build_backlog()
+{
+ local size=$1; shift
+ local proto=$1; shift
+
+ local i=0
+
+ while :; do
+ local cur=$(get_qdisc_backlog)
+ local diff=$((size - cur))
+ local pkts=$(((diff + PKTSZ - 1) / PKTSZ))
+
+ if ((cur >= size)); then
+ echo $cur
+ return 0
+ elif ((i++ > 10)); then
+ echo $cur
+ return 1
+ fi
+
+ send_packets $proto $pkts "$@"
+ sleep 1
+ done
+}
+
+check_marking()
+{
+ local cond=$1; shift
+
+ local npackets_0=$(get_qdisc_npackets)
+ local nmarked_0=$(get_nmarked)
+ sleep 5
+ local npackets_1=$(get_qdisc_npackets)
+ local nmarked_1=$(get_nmarked)
+
+ local nmarked_d=$((nmarked_1 - nmarked_0))
+ local npackets_d=$((npackets_1 - npackets_0))
+ local pct=$((100 * nmarked_d / npackets_d))
+
+ echo $pct
+ ((pct $cond))
+}
+
+check_mirroring()
+{
+ local cond=$1; shift
+
+ local npackets_0=$(get_qdisc_npackets)
+ local nmirrored_0=$(get_nmirrored)
+ sleep 5
+ local npackets_1=$(get_qdisc_npackets)
+ local nmirrored_1=$(get_nmirrored)
+
+ local nmirrored_d=$((nmirrored_1 - nmirrored_0))
+ local npackets_d=$((npackets_1 - npackets_0))
+ local pct=$((100 * nmirrored_d / npackets_d))
+
+ echo $pct
+ ((pct $cond))
+}
+
+ecn_test_common()
+{
+ local name=$1; shift
+ local limit=$1; shift
+ local backlog
+ local pct
+
+ # Build the below-the-limit backlog using UDP. We could use TCP just
+ # fine, but this way we get a proof that UDP is accepted when queue
+ # length is below the limit. The main stream is using TCP, and if the
+ # limit is misconfigured, we would see this traffic being ECN marked.
+ RET=0
+ backlog=$(build_backlog $((2 * limit / 3)) udp)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ log_test "$name backlog < limit"
+
+ # Now push TCP, because non-TCP traffic would be early-dropped after the
+ # backlog crosses the limit, and we want to make sure that the backlog
+ # is above the limit.
+ RET=0
+ backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking ">= 95")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
+ log_test "$name backlog > limit"
+}
+
+do_ecn_test()
+{
+ local limit=$1; shift
+ local name=ECN
+
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t tcp -q tos=0x01 &
+ sleep 1
+
+ ecn_test_common "$name" $limit
+
+ # Up there we saw that UDP gets accepted when backlog is below the
+ # limit. Now that it is above, it should all get dropped, and backlog
+ # building should fail.
+ RET=0
+ build_backlog $((2 * limit)) udp >/dev/null
+ check_fail $? "UDP traffic went into backlog instead of being early-dropped"
+ log_test "$name backlog > limit: UDP early-dropped"
+
+ stop_traffic
+ sleep 1
+}
+
+do_ecn_nodrop_test()
+{
+ local limit=$1; shift
+ local name="ECN nodrop"
+
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t tcp -q tos=0x01 &
+ sleep 1
+
+ ecn_test_common "$name" $limit
+
+ # Up there we saw that UDP gets accepted when backlog is below the
+ # limit. Now that it is above, in nodrop mode, make sure it goes to
+ # backlog as well.
+ RET=0
+ build_backlog $((2 * limit)) udp >/dev/null
+ check_err $? "UDP traffic was early-dropped instead of getting into backlog"
+ log_test "$name backlog > limit: UDP not dropped"
+
+ stop_traffic
+ sleep 1
+}
+
+do_red_test()
+{
+ local limit=$1; shift
+ local backlog
+ local pct
+
+ # Use ECN-capable TCP to verify there's no marking even though the queue
+ # is above limit.
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t tcp -q tos=0x01 &
+
+ # Pushing below the queue limit should work.
+ RET=0
+ backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ log_test "RED backlog < limit"
+
+ # Pushing above should not.
+ RET=0
+ backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
+ check_fail $? "Traffic went into backlog instead of being early-dropped"
+ pct=$(check_marking "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ log_test "RED backlog > limit"
+
+ stop_traffic
+ sleep 1
+}
+
+do_red_qevent_test()
+{
+ local limit=$1; shift
+ local backlog
+ local base
+ local now
+ local pct
+
+ RET=0
+
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t udp -q &
+ sleep 1
+
+ tc filter add block 10 pref 1234 handle 102 matchall skip_hw \
+ action mirred egress mirror dev _drop_test
+
+ # Push to the queue until it's at the limit. The configured limit is
+ # rounded by the qdisc, so this is the best we can do to get to the real
+ # limit.
+ build_backlog $((3 * limit / 2)) udp >/dev/null
+
+ base=$(get_nmirrored)
+ send_packets udp 100
+ sleep 1
+ now=$(get_nmirrored)
+ ((now >= base + 100))
+ check_err $? "Dropped packets not observed: 100 expected, $((now - base)) seen"
+
+ tc filter del block 10 pref 1234 handle 102 matchall
+
+ base=$(get_nmirrored)
+ send_packets udp 100
+ sleep 1
+ now=$(get_nmirrored)
+ ((now == base))
+ check_err $? "Dropped packets still observed: 0 expected, $((now - base)) seen"
+
+ log_test "RED early_dropped packets mirrored"
+
+ stop_traffic
+ sleep 1
+}
+
+do_ecn_qevent_test()
+{
+ local limit=$1; shift
+ local name=ECN
+
+ RET=0
+
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t tcp -q tos=0x01 &
+ sleep 1
+
+ tc filter add block 10 pref 1234 handle 102 matchall skip_hw \
+ action mirred egress mirror dev _drop_test
+
+ backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_mirroring "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected == 0."
+
+ backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_mirroring ">= 95")
+ check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected >= 95."
+
+ tc filter del block 10 pref 1234 handle 102 matchall
+
+ log_test "ECN marked packets mirrored"
+
+ stop_traffic
+ sleep 1
+}
+
+install_qdisc()
+{
+ local -a args=("$@")
+
+ tc qdisc replace dev $swp3 parent 1:1 handle 11: red \
+ limit 1M avpkt $PKTSZ probability 1 \
+ min $BACKLOG max $((BACKLOG + 1)) burst 38 "${args[@]}"
+ sleep 1
+}
+
+uninstall_qdisc()
+{
+ tc qdisc del dev $swp3 parent 1:1
+}
+
+ecn_test()
+{
+ install_qdisc ecn
+ do_ecn_test $BACKLOG
+ uninstall_qdisc
+}
+
+ecn_nodrop_test()
+{
+ install_qdisc ecn nodrop
+ do_ecn_nodrop_test $BACKLOG
+ uninstall_qdisc
+}
+
+red_test()
+{
+ install_qdisc
+ do_red_test $BACKLOG
+ uninstall_qdisc
+}
+
+red_qevent_test()
+{
+ install_qdisc qevent early_drop block 10
+ do_red_qevent_test $BACKLOG
+ uninstall_qdisc
+}
+
+ecn_qevent_test()
+{
+ install_qdisc ecn qevent mark block 10
+ do_ecn_qevent_test $BACKLOG
+ uninstall_qdisc
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index f50976ee7d44..aa254aefc2c3 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -5,7 +5,7 @@ KSFT_KHDR_INSTALL := 1
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include
-TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh
+TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh
TEST_GEN_FILES = mptcp_connect pm_nl_ctl
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
new file mode 100755
index 000000000000..39edce4f541c
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns="ns1-$rndh"
+ksft_skip=4
+test_cnt=1
+ret=0
+pids=()
+
+flush_pids()
+{
+ # mptcp_connect in join mode will sleep a bit before completing,
+ # give it some time
+ sleep 1.1
+
+ for pid in ${pids[@]}; do
+ [ -d /proc/$pid ] && kill -SIGUSR1 $pid >/dev/null 2>&1
+ done
+ pids=()
+}
+
+cleanup()
+{
+ ip netns del $ns
+ for pid in ${pids[@]}; do
+ [ -d /proc/$pid ] && kill -9 $pid >/dev/null 2>&1
+ done
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+ss -h | grep -q MPTCP
+if [ $? -ne 0 ];then
+ echo "SKIP: ss tool does not support MPTCP"
+ exit $ksft_skip
+fi
+
+__chk_nr()
+{
+ local condition="$1"
+ local expected=$2
+ local msg nr
+
+ shift 2
+ msg=$*
+ nr=$(ss -inmHMN $ns | $condition)
+
+ printf "%-50s" "$msg"
+ if [ $nr != $expected ]; then
+ echo "[ fail ] expected $expected found $nr"
+ ret=$test_cnt
+ else
+ echo "[ ok ]"
+ fi
+ test_cnt=$((test_cnt+1))
+}
+
+chk_msk_nr()
+{
+ __chk_nr "grep -c token:" $*
+}
+
+chk_msk_fallback_nr()
+{
+ __chk_nr "grep -c fallback" $*
+}
+
+chk_msk_remote_key_nr()
+{
+ __chk_nr "grep -c remote_key" $*
+}
+
+
+trap cleanup EXIT
+ip netns add $ns
+ip -n $ns link set dev lo up
+
+echo "a" | ip netns exec $ns ./mptcp_connect -p 10000 -l 0.0.0.0 -t 100 >/dev/null &
+sleep 0.1
+pids[0]=$!
+chk_msk_nr 0 "no msk on netns creation"
+
+echo "b" | ip netns exec $ns ./mptcp_connect -p 10000 127.0.0.1 -j -t 100 >/dev/null &
+sleep 0.1
+pids[1]=$!
+chk_msk_nr 2 "after MPC handshake "
+chk_msk_remote_key_nr 2 "....chk remote_key"
+chk_msk_fallback_nr 0 "....chk no fallback"
+flush_pids
+
+
+echo "a" | ip netns exec $ns ./mptcp_connect -p 10001 -s TCP -l 0.0.0.0 -t 100 >/dev/null &
+pids[0]=$!
+sleep 0.1
+echo "b" | ip netns exec $ns ./mptcp_connect -p 10001 127.0.0.1 -j -t 100 >/dev/null &
+pids[1]=$!
+sleep 0.1
+chk_msk_fallback_nr 1 "check fallback"
+flush_pids
+
+NR_CLIENTS=100
+for I in `seq 1 $NR_CLIENTS`; do
+ echo "a" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) -l 0.0.0.0 -t 100 -w 10 >/dev/null &
+ pids[$((I*2))]=$!
+done
+sleep 0.1
+
+for I in `seq 1 $NR_CLIENTS`; do
+ echo "b" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) 127.0.0.1 -t 100 -w 10 >/dev/null &
+ pids[$((I*2 + 1))]=$!
+done
+sleep 1.5
+
+chk_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
+flush_pids
+
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index cedee5b952ba..cad6f73a5fd0 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -11,6 +11,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
+#include <signal.h>
#include <unistd.h>
#include <sys/poll.h>
@@ -36,6 +37,7 @@ extern int optind;
static int poll_timeout = 10 * 1000;
static bool listen_mode;
+static bool quit;
enum cfg_mode {
CFG_MODE_POLL,
@@ -52,11 +54,12 @@ static int pf = AF_INET;
static int cfg_sndbuf;
static int cfg_rcvbuf;
static bool cfg_join;
+static int cfg_wait;
static void die_usage(void)
{
fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
- "[-l] connect_address\n");
+ "[-l] [-w sec] connect_address\n");
fprintf(stderr, "\t-6 use ipv6\n");
fprintf(stderr, "\t-t num -- set poll timeout to num\n");
fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
@@ -65,9 +68,15 @@ static void die_usage(void)
fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n");
fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n");
fprintf(stderr, "\t-u -- check mptcp ulp\n");
+ fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
exit(1);
}
+static void handle_signal(int nr)
+{
+ quit = true;
+}
+
static const char *getxinfo_strerr(int err)
{
if (err == EAI_SYSTEM)
@@ -418,8 +427,8 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
}
/* leave some time for late join/announce */
- if (cfg_join)
- usleep(400000);
+ if (cfg_wait)
+ usleep(cfg_wait);
close(peerfd);
return 0;
@@ -812,11 +821,12 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:")) != -1) {
+ while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:w:")) != -1) {
switch (c) {
case 'j':
cfg_join = true;
cfg_mode = CFG_MODE_POLL;
+ cfg_wait = 400000;
break;
case 'l':
listen_mode = true;
@@ -850,6 +860,9 @@ static void parse_opts(int argc, char **argv)
case 'R':
cfg_rcvbuf = parse_int(optarg);
break;
+ case 'w':
+ cfg_wait = atoi(optarg)*1000000;
+ break;
}
}
@@ -865,6 +878,7 @@ int main(int argc, char *argv[])
{
init_rng();
+ signal(SIGUSR1, handle_signal);
parse_opts(argc, argv);
if (tcpulp_audit)
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index acf02e156d20..c0589e071f20 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -3,7 +3,7 @@
time_start=$(date +%s)
-optstring="S:R:d:e:l:r:h4cm:"
+optstring="S:R:d:e:l:r:h4cm:f:t"
ret=0
sin=""
sout=""
@@ -21,6 +21,8 @@ testmode=""
sndbuf=0
rcvbuf=0
options_log=true
+do_tcp=0
+filesize=0
if [ $tc_loss -eq 100 ];then
tc_loss=1%
@@ -40,9 +42,11 @@ usage() {
echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)"
echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
+ echo -e "\t-f: size of file to transfer in bytes (default random)"
echo -e "\t-S: set sndbuf value (default: use kernel default)"
echo -e "\t-R: set rcvbuf value (default: use kernel default)"
echo -e "\t-m: test mode (poll, sendfile; default: poll)"
+ echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)"
}
while getopts "$optstring" option;do
@@ -94,6 +98,12 @@ while getopts "$optstring" option;do
"m")
testmode="$OPTARG"
;;
+ "f")
+ filesize="$OPTARG"
+ ;;
+ "t")
+ do_tcp=$((do_tcp+1))
+ ;;
"?")
usage $0
exit 1
@@ -385,10 +395,14 @@ do_transfer()
capuser="-Z $SUDO_USER"
fi
- local capfile="${listener_ns}-${connector_ns}-${cl_proto}-${srv_proto}-${connect_addr}.pcap"
+ local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}"
+ local capopt="-i any -s 65535 -B 32768 ${capuser}"
+
+ ip netns exec ${listener_ns} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
+ local cappid_listener=$!
- ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
- local cappid=$!
+ ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
+ local cappid_connector=$!
sleep 1
fi
@@ -413,7 +427,8 @@ do_transfer()
if $capture; then
sleep 1
- kill $cappid
+ kill ${cappid_listener}
+ kill ${cappid_connector}
fi
local duration
@@ -449,20 +464,25 @@ make_file()
{
local name=$1
local who=$2
+ local SIZE=$filesize
+ local ksize
+ local rem
- local SIZE TSIZE
- SIZE=$((RANDOM % (1024 * 8)))
- TSIZE=$((SIZE * 1024))
+ if [ $SIZE -eq 0 ]; then
+ local MAXSIZE=$((1024 * 1024 * 8))
+ local MINSIZE=$((1024 * 256))
+
+ SIZE=$(((RANDOM * RANDOM + MINSIZE) % MAXSIZE))
+ fi
- dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
+ ksize=$((SIZE / 1024))
+ rem=$((SIZE - (ksize * 1024)))
- SIZE=$((RANDOM % 1024))
- SIZE=$((SIZE + 128))
- TSIZE=$((TSIZE + SIZE))
- dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
+ dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null
+ dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null
echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
- echo "Created $name (size $TSIZE) containing data sent by $who"
+ echo "Created $name (size $(du -b "$name")) containing data sent by $who"
}
run_tests_lo()
@@ -497,9 +517,11 @@ run_tests_lo()
return 1
fi
- # don't bother testing fallback tcp except for loopback case.
- if [ ${listener_ns} != ${connector_ns} ]; then
- return 0
+ if [ $do_tcp -eq 0 ]; then
+ # don't bother testing fallback tcp except for loopback case.
+ if [ ${listener_ns} != ${connector_ns} ]; then
+ return 0
+ fi
fi
do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr}
@@ -516,6 +538,15 @@ run_tests_lo()
return 1
fi
+ if [ $do_tcp -gt 1 ] ;then
+ do_transfer ${listener_ns} ${connector_ns} TCP TCP ${connect_addr} ${local_addr}
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ return 1
+ fi
+ fi
+
return 0
}
diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index 422e7761254d..221fdece47d4 100644
--- a/tools/testing/selftests/net/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
@@ -44,6 +44,7 @@ struct test_case {
struct options sockopt;
struct tstamps expected;
bool enabled;
+ bool warn_on_fail;
};
struct sof_flag {
@@ -67,44 +68,44 @@ static struct socket_type socket_types[] = {
static struct test_case test_cases[] = {
{ {}, {} },
{
- { so_timestamp: 1 },
- { tstamp: true }
+ { .so_timestamp = 1 },
+ { .tstamp = true }
},
{
- { so_timestampns: 1 },
- { tstampns: true }
+ { .so_timestampns = 1 },
+ { .tstampns = true }
},
{
- { so_timestamp: 1, so_timestampns: 1 },
- { tstampns: true }
+ { .so_timestamp = 1, .so_timestampns = 1 },
+ { .tstampns = true }
},
{
- { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE },
+ { .so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE },
{}
},
{
/* Loopback device does not support hw timestamps. */
- { so_timestamping: SOF_TIMESTAMPING_RX_HARDWARE },
+ { .so_timestamping = SOF_TIMESTAMPING_RX_HARDWARE },
{}
},
{
- { so_timestamping: SOF_TIMESTAMPING_SOFTWARE },
- {}
+ { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE },
+ .warn_on_fail = true
},
{
- { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE
+ { .so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE
| SOF_TIMESTAMPING_RX_HARDWARE },
{}
},
{
- { so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+ { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
| SOF_TIMESTAMPING_RX_SOFTWARE },
- { swtstamp: true }
+ { .swtstamp = true }
},
{
- { so_timestamp: 1, so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+ { .so_timestamp = 1, .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
| SOF_TIMESTAMPING_RX_SOFTWARE },
- { tstamp: true, swtstamp: true }
+ { .tstamp = true, .swtstamp = true }
},
};
@@ -115,6 +116,9 @@ static struct option long_options[] = {
{ "tcp", no_argument, 0, 't' },
{ "udp", no_argument, 0, 'u' },
{ "ip", no_argument, 0, 'i' },
+ { "strict", no_argument, 0, 'S' },
+ { "ipv4", no_argument, 0, '4' },
+ { "ipv6", no_argument, 0, '6' },
{ NULL, 0, NULL, 0 },
};
@@ -270,37 +274,55 @@ void config_so_flags(int rcv, struct options o)
error(1, errno, "Failed to set SO_TIMESTAMPING");
}
-bool run_test_case(struct socket_type s, struct test_case t)
+bool run_test_case(struct socket_type *s, int test_num, char ip_version,
+ bool strict)
{
- int port = (s.type == SOCK_RAW) ? 0 : next_port++;
+ union {
+ struct sockaddr_in6 addr6;
+ struct sockaddr_in addr4;
+ struct sockaddr addr_un;
+ } addr;
int read_size = op_size;
- struct sockaddr_in addr;
+ int src, dst, rcv, port;
+ socklen_t addr_size;
bool failed = false;
- int src, dst, rcv;
- src = socket(AF_INET, s.type, s.protocol);
+ port = (s->type == SOCK_RAW) ? 0 : next_port++;
+ memset(&addr, 0, sizeof(addr));
+ if (ip_version == '4') {
+ addr.addr4.sin_family = AF_INET;
+ addr.addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ addr.addr4.sin_port = htons(port);
+ addr_size = sizeof(addr.addr4);
+ if (s->type == SOCK_RAW)
+ read_size += 20; /* for IPv4 header */
+ } else {
+ addr.addr6.sin6_family = AF_INET6;
+ addr.addr6.sin6_addr = in6addr_loopback;
+ addr.addr6.sin6_port = htons(port);
+ addr_size = sizeof(addr.addr6);
+ }
+ printf("Starting testcase %d over ipv%c...\n", test_num, ip_version);
+ src = socket(addr.addr_un.sa_family, s->type,
+ s->protocol);
if (src < 0)
error(1, errno, "Failed to open src socket");
- dst = socket(AF_INET, s.type, s.protocol);
+ dst = socket(addr.addr_un.sa_family, s->type,
+ s->protocol);
if (dst < 0)
error(1, errno, "Failed to open dst socket");
- memset(&addr, 0, sizeof(addr));
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- addr.sin_port = htons(port);
-
- if (bind(dst, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ if (bind(dst, &addr.addr_un, addr_size) < 0)
error(1, errno, "Failed to bind to port %d", port);
- if (s.type == SOCK_STREAM && (listen(dst, 1) < 0))
+ if (s->type == SOCK_STREAM && (listen(dst, 1) < 0))
error(1, errno, "Failed to listen");
- if (connect(src, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ if (connect(src, &addr.addr_un, addr_size) < 0)
error(1, errno, "Failed to connect");
- if (s.type == SOCK_STREAM) {
+ if (s->type == SOCK_STREAM) {
rcv = accept(dst, NULL, NULL);
if (rcv < 0)
error(1, errno, "Failed to accept");
@@ -309,17 +331,22 @@ bool run_test_case(struct socket_type s, struct test_case t)
rcv = dst;
}
- config_so_flags(rcv, t.sockopt);
+ config_so_flags(rcv, test_cases[test_num].sockopt);
usleep(20000); /* setsockopt for SO_TIMESTAMPING is asynchronous */
do_send(src);
- if (s.type == SOCK_RAW)
- read_size += 20; /* for IP header */
- failed = do_recv(rcv, read_size, t.expected);
+ failed = do_recv(rcv, read_size, test_cases[test_num].expected);
close(rcv);
close(src);
+ if (failed) {
+ printf("FAILURE in testcase %d over ipv%c ", test_num,
+ ip_version);
+ print_test_case(&test_cases[test_num]);
+ if (!strict && test_cases[test_num].warn_on_fail)
+ failed = false;
+ }
return failed;
}
@@ -327,6 +354,9 @@ int main(int argc, char **argv)
{
bool all_protocols = true;
bool all_tests = true;
+ bool cfg_ipv4 = false;
+ bool cfg_ipv6 = false;
+ bool strict = false;
int arg_index = 0;
int failures = 0;
int s, t;
@@ -363,6 +393,15 @@ int main(int argc, char **argv)
all_protocols = false;
socket_types[0].enabled = true;
break;
+ case 'S':
+ strict = true;
+ break;
+ case '4':
+ cfg_ipv4 = true;
+ break;
+ case '6':
+ cfg_ipv6 = true;
+ break;
default:
error(1, 0, "Failed to parse parameters.");
}
@@ -376,13 +415,14 @@ int main(int argc, char **argv)
for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
if (!all_tests && !test_cases[t].enabled)
continue;
-
- printf("Starting testcase %d...\n", t);
- if (run_test_case(socket_types[s], test_cases[t])) {
- failures++;
- printf("FAILURE in test case ");
- print_test_case(&test_cases[t]);
- }
+ if (cfg_ipv4 || !cfg_ipv6)
+ if (run_test_case(&socket_types[s], t, '4',
+ strict))
+ failures++;
+ if (cfg_ipv6 || !cfg_ipv4)
+ if (run_test_case(&socket_types[s], t, '6',
+ strict))
+ failures++;
}
}
if (!failures)
diff --git a/tools/testing/selftests/net/rxtimestamp.sh b/tools/testing/selftests/net/rxtimestamp.sh
new file mode 100755
index 000000000000..91631e88bf46
--- /dev/null
+++ b/tools/testing/selftests/net/rxtimestamp.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+./in_netns.sh ./rxtimestamp $@
diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
new file mode 100755
index 000000000000..5274f4a1fba1
--- /dev/null
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -0,0 +1,390 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is designed for testing the new VRF strict_mode functionality.
+
+ret=0
+
+# identifies the "init" network namespace which is often called root network
+# namespace.
+INIT_NETNS_NAME="init"
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+ip_expand_args()
+{
+ local nsname=$1
+ local nsarg=""
+
+ if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+ nsarg="-netns ${nsname}"
+ fi
+
+ echo "${nsarg}"
+}
+
+vrf_count()
+{
+ local nsname=$1
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} -o link show type vrf | wc -l
+}
+
+count_vrf_by_table_id()
+{
+ local nsname=$1
+ local tableid=$2
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} -d -o link show type vrf | grep "table ${tableid}" | wc -l
+}
+
+add_vrf()
+{
+ local nsname=$1
+ local vrfname=$2
+ local vrftable=$3
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} link add ${vrfname} type vrf table ${vrftable} &>/dev/null
+}
+
+add_vrf_and_check()
+{
+ local nsname=$1
+ local vrfname=$2
+ local vrftable=$3
+ local cnt
+ local rc
+
+ add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$?
+
+ cnt=$(count_vrf_by_table_id ${nsname} ${vrftable})
+
+ log_test ${rc} 0 "${nsname}: add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}"
+}
+
+add_vrf_and_check_fail()
+{
+ local nsname=$1
+ local vrfname=$2
+ local vrftable=$3
+ local cnt
+ local rc
+
+ add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$?
+
+ cnt=$(count_vrf_by_table_id ${nsname} ${vrftable})
+
+ log_test ${rc} 2 "${nsname}: CANNOT add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}"
+}
+
+del_vrf_and_check()
+{
+ local nsname=$1
+ local vrfname=$2
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} link del ${vrfname}
+ log_test $? 0 "${nsname}: remove vrf ${vrfname}"
+}
+
+config_vrf_and_check()
+{
+ local nsname=$1
+ local addr=$2
+ local vrfname=$3
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} link set dev ${vrfname} up && \
+ ip ${nsarg} addr add ${addr} dev ${vrfname}
+ log_test $? 0 "${nsname}: vrf ${vrfname} up, addr ${addr}"
+}
+
+read_strict_mode()
+{
+ local nsname=$1
+ local rval
+ local rc=0
+ local nsexec=""
+
+ if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+ # a custom network namespace is provided
+ nsexec="ip netns exec ${nsname}"
+ fi
+
+ rval="$(${nsexec} bash -c "cat /proc/sys/net/vrf/strict_mode" | \
+ grep -E "^[0-1]$")" &> /dev/null
+ if [ $? -ne 0 ]; then
+ # set errors
+ rval=255
+ rc=1
+ fi
+
+ # on success, rval can be only 0 or 1; on error, rval is equal to 255
+ echo ${rval}
+ return ${rc}
+}
+
+read_strict_mode_compare_and_check()
+{
+ local nsname=$1
+ local expected=$2
+ local res
+
+ res="$(read_strict_mode ${nsname})"
+ log_test ${res} ${expected} "${nsname}: check strict_mode=${res}"
+}
+
+set_strict_mode()
+{
+ local nsname=$1
+ local val=$2
+ local nsexec=""
+
+ if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+ # a custom network namespace is provided
+ nsexec="ip netns exec ${nsname}"
+ fi
+
+ ${nsexec} bash -c "echo ${val} >/proc/sys/net/vrf/strict_mode" &>/dev/null
+}
+
+enable_strict_mode()
+{
+ local nsname=$1
+
+ set_strict_mode ${nsname} 1
+}
+
+disable_strict_mode()
+{
+ local nsname=$1
+
+ set_strict_mode ${nsname} 0
+}
+
+disable_strict_mode_and_check()
+{
+ local nsname=$1
+
+ disable_strict_mode ${nsname}
+ log_test $? 0 "${nsname}: disable strict_mode (=0)"
+}
+
+enable_strict_mode_and_check()
+{
+ local nsname=$1
+
+ enable_strict_mode ${nsname}
+ log_test $? 0 "${nsname}: enable strict_mode (=1)"
+}
+
+enable_strict_mode_and_check_fail()
+{
+ local nsname=$1
+
+ enable_strict_mode ${nsname}
+ log_test $? 1 "${nsname}: CANNOT enable strict_mode"
+}
+
+strict_mode_check_default()
+{
+ local nsname=$1
+ local strictmode
+ local vrfcnt
+
+ vrfcnt=$(vrf_count ${nsname})
+ strictmode=$(read_strict_mode ${nsname})
+ log_test ${strictmode} 0 "${nsname}: strict_mode=0 by default, ${vrfcnt} vrfs"
+}
+
+setup()
+{
+ modprobe vrf
+
+ ip netns add testns
+ ip netns exec testns ip link set lo up
+}
+
+cleanup()
+{
+ ip netns del testns 2>/dev/null
+
+ ip link del vrf100 2>/dev/null
+ ip link del vrf101 2>/dev/null
+ ip link del vrf102 2>/dev/null
+
+ echo 0 >/proc/sys/net/vrf/strict_mode 2>/dev/null
+}
+
+vrf_strict_mode_tests_init()
+{
+ vrf_strict_mode_check_support init
+
+ strict_mode_check_default init
+
+ add_vrf_and_check init vrf100 100
+ config_vrf_and_check init 172.16.100.1/24 vrf100
+
+ enable_strict_mode_and_check init
+
+ add_vrf_and_check_fail init vrf101 100
+
+ disable_strict_mode_and_check init
+
+ add_vrf_and_check init vrf101 100
+ config_vrf_and_check init 172.16.101.1/24 vrf101
+
+ enable_strict_mode_and_check_fail init
+
+ del_vrf_and_check init vrf101
+
+ enable_strict_mode_and_check init
+
+ add_vrf_and_check init vrf102 102
+ config_vrf_and_check init 172.16.102.1/24 vrf102
+
+ # the strict_modle is enabled in the init
+}
+
+vrf_strict_mode_tests_testns()
+{
+ vrf_strict_mode_check_support testns
+
+ strict_mode_check_default testns
+
+ enable_strict_mode_and_check testns
+
+ add_vrf_and_check testns vrf100 100
+ config_vrf_and_check testns 10.0.100.1/24 vrf100
+
+ add_vrf_and_check_fail testns vrf101 100
+
+ add_vrf_and_check_fail testns vrf102 100
+
+ add_vrf_and_check testns vrf200 200
+
+ disable_strict_mode_and_check testns
+
+ add_vrf_and_check testns vrf101 100
+
+ add_vrf_and_check testns vrf102 100
+
+ #the strict_mode is disabled in the testns
+}
+
+vrf_strict_mode_tests_mix()
+{
+ read_strict_mode_compare_and_check init 1
+
+ read_strict_mode_compare_and_check testns 0
+
+ del_vrf_and_check testns vrf101
+
+ del_vrf_and_check testns vrf102
+
+ disable_strict_mode_and_check init
+
+ enable_strict_mode_and_check testns
+
+ enable_strict_mode_and_check init
+ enable_strict_mode_and_check init
+
+ disable_strict_mode_and_check testns
+ disable_strict_mode_and_check testns
+
+ read_strict_mode_compare_and_check init 1
+
+ read_strict_mode_compare_and_check testns 0
+}
+
+vrf_strict_mode_tests()
+{
+ log_section "VRF strict_mode test on init network namespace"
+ vrf_strict_mode_tests_init
+
+ log_section "VRF strict_mode test on testns network namespace"
+ vrf_strict_mode_tests_testns
+
+ log_section "VRF strict_mode test mixing init and testns network namespaces"
+ vrf_strict_mode_tests_mix
+}
+
+vrf_strict_mode_check_support()
+{
+ local nsname=$1
+ local output
+ local rc
+
+ output="$(lsmod | grep '^vrf' | awk '{print $1}')"
+ if [ -z "${output}" ]; then
+ modinfo vrf || return $?
+ fi
+
+ # we do not care about the value of the strict_mode; we only check if
+ # the strict_mode parameter is available or not.
+ read_strict_mode ${nsname} &>/dev/null; rc=$?
+ log_test ${rc} 0 "${nsname}: net.vrf.strict_mode is available"
+
+ return ${rc}
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit 0
+fi
+
+cleanup &> /dev/null
+
+setup
+vrf_strict_mode_tests
+cleanup
+
+print_log_test_results
+
+exit $ret