184 files changed, 13255 insertions, 1741 deletions
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 3820608faf57..2d3c8c8f558a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -75,7 +75,7 @@ TEST_PROGS := test_kmod.sh \
 	test_xsk.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
-	with_tunnels.sh \
+	with_tunnels.sh ima_setup.sh \
 	test_xdp_vlan.sh test_bpftool.py
 
 # Compile but not part of 'make run_tests'
@@ -168,9 +168,17 @@ $(OUTPUT)/%:%.c
 	$(call msg,BINARY,,$@)
 	$(Q)$(LINK.c) $^ $(LDLIBS) -o $@
 
-$(OUTPUT)/urandom_read: urandom_read.c
+# Filter out -static for liburandom_read.so and its dependent targets so that static builds
+# do not fail. Static builds leave urandom_read relying on system-wide shared libraries.
+$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c
+	$(call msg,LIB,,$@)
+	$(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $^ $(LDLIBS) -fPIC -shared -o $@
+
+$(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
 	$(call msg,BINARY,,$@)
-	$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $< $(LDLIBS) -Wl,--build-id=sha1 -o $@
+	$(Q)$(CC) $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^)  \
+		  liburandom_read.so $(LDLIBS)	       			       \
+		  -Wl,-rpath=. -Wl,--build-id=sha1 -o $@
 
 $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
 	$(call msg,MOD,,$@)
@@ -328,12 +336,8 @@ SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
 
 LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
 		linked_vars.skel.h linked_maps.skel.h 			\
-		test_subskeleton.skel.h test_subskeleton_lib.skel.h
-
-# In the subskeleton case, we want the test_subskeleton_lib.subskel.h file
-# but that's created as a side-effect of the skel.h generation.
-test_subskeleton.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o test_subskeleton.o
-test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o
+		test_subskeleton.skel.h test_subskeleton_lib.skel.h	\
+		test_usdt.skel.h
 
 LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
 	test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \
@@ -346,6 +350,11 @@ test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
 linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o
 linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o
 linked_maps.skel.h-deps := linked_maps1.o linked_maps2.o
+# In the subskeleton case, we want the test_subskeleton_lib.subskel.h file
+# but that's created as a side-effect of the skel.h generation.
+test_subskeleton.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o test_subskeleton.o
+test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o
+test_usdt.skel.h-deps := test_usdt.o test_usdt_multispec.o
 
 LINKED_BPF_SRCS := $(patsubst %.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
 
@@ -400,6 +409,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 		     $(TRUNNER_BPF_PROGS_DIR)/*.h			\
 		     $$(INCLUDE_DIR)/vmlinux.h				\
 		     $(wildcard $(BPFDIR)/bpf_*.h)			\
+		     $(wildcard $(BPFDIR)/*.bpf.h)			\
 		     | $(TRUNNER_OUTPUT) $$(BPFOBJ)
 	$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@,			\
 					  $(TRUNNER_BPF_CFLAGS))
@@ -415,11 +425,11 @@ $(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
 
 $(TRUNNER_BPF_LSKELS): %.lskel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
 	$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
-	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$<
-	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o)
-	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o)
-	$(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
-	$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=_lskel)) > $$@
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$<
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o)
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o)
+	$(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+	$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.o=_lskel)) > $$@
 
 $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
 	$$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o))
@@ -491,6 +501,7 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c	\
 			 btf_helpers.c flow_dissector_load.h		\
 			 cap_helpers.c
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko	\
+		       $(OUTPUT)/liburandom_read.so			\
 		       ima_setup.sh					\
 		       $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index f973320e6dbf..f061cc20e776 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -8,7 +8,6 @@
 #include <fcntl.h>
 #include <pthread.h>
 #include <sys/sysinfo.h>
-#include <sys/resource.h>
 #include <signal.h>
 #include "bench.h"
 #include "testing_helpers.h"
diff --git a/tools/testing/selftests/bpf/bpf_rlimit.h b/tools/testing/selftests/bpf/bpf_rlimit.h
deleted file mode 100644
index 9dac9b30f8ef..000000000000
--- a/tools/testing/selftests/bpf/bpf_rlimit.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <sys/resource.h>
-#include <stdio.h>
-
-static  __attribute__((constructor)) void bpf_rlimit_ctor(void)
-{
-	struct rlimit rlim_old, rlim_new = {
-		.rlim_cur	= RLIM_INFINITY,
-		.rlim_max	= RLIM_INFINITY,
-	};
-
-	getrlimit(RLIMIT_MEMLOCK, &rlim_old);
-	/* For the sake of running the test cases, we temporarily
-	 * set rlimit to infinity in order for kernel to focus on
-	 * errors from actual test cases and not getting noise
-	 * from hitting memlock limits. The limit is on per-process
-	 * basis and not a global one, hence destructor not really
-	 * needed here.
-	 */
-	if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) {
-		perror("Unable to lift memlock rlimit");
-		/* Trying out lower limit, but expect potential test
-		 * case failures from this!
-		 */
-		rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
-		rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
-		setrlimit(RLIMIT_MEMLOCK, &rlim_new);
-	}
-}
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index b1ede6f0b821..82a7c9de95f9 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -16,6 +16,10 @@ BPF_PROG(name, args)
 #define SOL_TCP 6
 #endif
 
+#ifndef TCP_CA_NAME_MAX
+#define TCP_CA_NAME_MAX	16
+#endif
+
 #define tcp_jiffies32 ((__u32)bpf_jiffies64())
 
 struct sock_common {
@@ -81,6 +85,7 @@ struct tcp_sock {
 	__u32	lsndtime;
 	__u32	prior_cwnd;
 	__u64	tcp_mstamp;	/* most recent packet received/sent */
+	bool	is_mptcp;
 } __attribute__((preserve_access_index));
 
 static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk)
@@ -225,4 +230,12 @@ static __always_inline bool tcp_cc_eq(const char *a, const char *b)
 extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
 extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
 
+struct mptcp_sock {
+	struct inet_connection_sock	sk;
+
+	__u32		token;
+	struct sock	*first;
+	char		ca_name[TCP_CA_NAME_MAX];
+} __attribute__((preserve_access_index));
+
 #endif
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 763db63a3890..3b3edc0fc8a6 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -53,3 +53,7 @@ CONFIG_NF_DEFRAG_IPV4=y
 CONFIG_NF_DEFRAG_IPV6=y
 CONFIG_NF_CONNTRACK=y
 CONFIG_USERFAULTFD=y
+CONFIG_FPROBE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_MPTCP=y
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
index 87fd1aa323a9..c8be6406777f 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.c
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -11,7 +11,6 @@
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
-#include "bpf_rlimit.h"
 #include "flow_dissector_load.h"
 
 const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector";
@@ -25,9 +24,8 @@ static void load_and_attach_program(void)
 	int prog_fd, ret;
 	struct bpf_object *obj;
 
-	ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
-	if (ret)
-		error(1, 0, "failed to enable libbpf strict mode: %d", ret);
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
 
 	ret = bpf_flow_load(&obj, cfg_path_name, cfg_prog_name,
 			    cfg_map_name, NULL, &prog_fd, NULL);
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
index 3a7b82bd9e94..e021cc67dc02 100644
--- a/tools/testing/selftests/bpf/get_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -20,7 +20,6 @@
 
 #include "cgroup_helpers.h"
 #include "testing_helpers.h"
-#include "bpf_rlimit.h"
 
 #define CHECK(condition, tag, format...) ({		\
 	int __ret = !!(condition);			\
@@ -67,6 +66,9 @@ int main(int argc, char **argv)
 	if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno))
 		return 1;
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
 	if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno))
 		goto cleanup_cgroup_env;
diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
new file mode 100644
index 000000000000..f472d28ad11a
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <test_maps.h>
+
+#define OUTER_MAP_ENTRIES 10
+
+static __u32 get_map_id_from_fd(int map_fd)
+{
+	struct bpf_map_info map_info = {};
+	uint32_t info_len = sizeof(map_info);
+	int ret;
+
+	ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+	CHECK(ret < 0, "Finding map info failed", "error:%s\n",
+	      strerror(errno));
+
+	return map_info.id;
+}
+
+/* This creates number of OUTER_MAP_ENTRIES maps that will be stored
+ * in outer map and return the created map_fds
+ */
+static void create_inner_maps(enum bpf_map_type map_type,
+			      __u32 *inner_map_fds)
+{
+	int map_fd, map_index, ret;
+	__u32 map_key = 0, map_id;
+	char map_name[15];
+
+	for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++) {
+		memset(map_name, 0, sizeof(map_name));
+		sprintf(map_name, "inner_map_fd_%d", map_index);
+		map_fd = bpf_map_create(map_type, map_name, sizeof(__u32),
+					sizeof(__u32), 1, NULL);
+		CHECK(map_fd < 0,
+		      "inner bpf_map_create() failed",
+		      "map_type=(%d) map_name(%s), error:%s\n",
+		      map_type, map_name, strerror(errno));
+
+		/* keep track of the inner map fd as it is required
+		 * to add records in outer map
+		 */
+		inner_map_fds[map_index] = map_fd;
+
+		/* Add entry into this created map
+		 * eg: map1 key = 0, value = map1's map id
+		 *     map2 key = 0, value = map2's map id
+		 */
+		map_id = get_map_id_from_fd(map_fd);
+		ret = bpf_map_update_elem(map_fd, &map_key, &map_id, 0);
+		CHECK(ret != 0,
+		      "bpf_map_update_elem failed",
+		      "map_type=(%d) map_name(%s), error:%s\n",
+		      map_type, map_name, strerror(errno));
+	}
+}
+
+static int create_outer_map(enum bpf_map_type map_type, __u32 inner_map_fd)
+{
+	int outer_map_fd;
+	LIBBPF_OPTS(bpf_map_create_opts, attr);
+
+	attr.inner_map_fd = inner_map_fd;
+	outer_map_fd = bpf_map_create(map_type, "outer_map", sizeof(__u32),
+				      sizeof(__u32), OUTER_MAP_ENTRIES,
+				      &attr);
+	CHECK(outer_map_fd < 0,
+	      "outer bpf_map_create()",
+	      "map_type=(%d), error:%s\n",
+	      map_type, strerror(errno));
+
+	return outer_map_fd;
+}
+
+static void validate_fetch_results(int outer_map_fd,
+				   __u32 *fetched_keys, __u32 *fetched_values,
+				   __u32 max_entries_fetched)
+{
+	__u32 inner_map_key, inner_map_value;
+	int inner_map_fd, entry, err;
+	__u32 outer_map_value;
+
+	for (entry = 0; entry < max_entries_fetched; ++entry) {
+		outer_map_value = fetched_values[entry];
+		inner_map_fd = bpf_map_get_fd_by_id(outer_map_value);
+		CHECK(inner_map_fd < 0,
+		      "Failed to get inner map fd",
+		      "from id(%d), error=%s\n",
+		      outer_map_value, strerror(errno));
+		err = bpf_map_get_next_key(inner_map_fd, NULL, &inner_map_key);
+		CHECK(err != 0,
+		      "Failed to get inner map key",
+		      "error=%s\n", strerror(errno));
+
+		err = bpf_map_lookup_elem(inner_map_fd, &inner_map_key,
+					  &inner_map_value);
+
+		close(inner_map_fd);
+
+		CHECK(err != 0,
+		      "Failed to get inner map value",
+		      "for key(%d), error=%s\n",
+		      inner_map_key, strerror(errno));
+
+		/* Actual value validation */
+		CHECK(outer_map_value != inner_map_value,
+		      "Failed to validate inner map value",
+		      "fetched(%d) and lookedup(%d)!\n",
+		      outer_map_value, inner_map_value);
+	}
+}
+
+static void fetch_and_validate(int outer_map_fd,
+			       struct bpf_map_batch_opts *opts,
+			       __u32 batch_size, bool delete_entries)
+{
+	__u32 *fetched_keys, *fetched_values, total_fetched = 0;
+	__u32 batch_key = 0, fetch_count, step_size;
+	int err, max_entries = OUTER_MAP_ENTRIES;
+	__u32 value_size = sizeof(__u32);
+
+	/* Total entries needs to be fetched */
+	fetched_keys = calloc(max_entries, value_size);
+	fetched_values = calloc(max_entries, value_size);
+	CHECK((!fetched_keys || !fetched_values),
+	      "Memory allocation failed for fetched_keys or fetched_values",
+	      "error=%s\n", strerror(errno));
+
+	for (step_size = batch_size;
+	     step_size <= max_entries;
+	     step_size += batch_size) {
+		fetch_count = step_size;
+		err = delete_entries
+		      ? bpf_map_lookup_and_delete_batch(outer_map_fd,
+				      total_fetched ? &batch_key : NULL,
+				      &batch_key,
+				      fetched_keys + total_fetched,
+				      fetched_values + total_fetched,
+				      &fetch_count, opts)
+		      : bpf_map_lookup_batch(outer_map_fd,
+				      total_fetched ? &batch_key : NULL,
+				      &batch_key,
+				      fetched_keys + total_fetched,
+				      fetched_values + total_fetched,
+				      &fetch_count, opts);
+
+		if (err && errno == ENOSPC) {
+			/* Fetch again with higher batch size */
+			total_fetched = 0;
+			continue;
+		}
+
+		CHECK((err < 0 && (errno != ENOENT)),
+		      "lookup with steps failed",
+		      "error: %s\n", strerror(errno));
+
+		/* Update the total fetched number */
+		total_fetched += fetch_count;
+		if (err)
+			break;
+	}
+
+	CHECK((total_fetched != max_entries),
+	      "Unable to fetch expected entries !",
+	      "total_fetched(%d) and max_entries(%d) error: (%d):%s\n",
+	      total_fetched, max_entries, errno, strerror(errno));
+
+	/* validate the fetched entries */
+	validate_fetch_results(outer_map_fd, fetched_keys,
+			       fetched_values, total_fetched);
+	printf("batch_op(%s) is successful with batch_size(%d)\n",
+	       delete_entries ? "LOOKUP_AND_DELETE" : "LOOKUP", batch_size);
+
+	free(fetched_keys);
+	free(fetched_values);
+}
+
+static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
+				  enum bpf_map_type inner_map_type)
+{
+	__u32 *outer_map_keys, *inner_map_fds;
+	__u32 max_entries = OUTER_MAP_ENTRIES;
+	LIBBPF_OPTS(bpf_map_batch_opts, opts);
+	__u32 value_size = sizeof(__u32);
+	int batch_size[2] = {5, 10};
+	__u32 map_index, op_index;
+	int outer_map_fd, ret;
+
+	outer_map_keys = calloc(max_entries, value_size);
+	inner_map_fds = calloc(max_entries, value_size);
+	CHECK((!outer_map_keys || !inner_map_fds),
+	      "Memory allocation failed for outer_map_keys or inner_map_fds",
+	      "error=%s\n", strerror(errno));
+
+	create_inner_maps(inner_map_type, inner_map_fds);
+
+	outer_map_fd = create_outer_map(outer_map_type, *inner_map_fds);
+	/* create outer map keys */
+	for (map_index = 0; map_index < max_entries; map_index++)
+		outer_map_keys[map_index] =
+			((outer_map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+			 ? 9 : 1000) - map_index;
+
+	/* batch operation - map_update */
+	ret = bpf_map_update_batch(outer_map_fd, outer_map_keys,
+				   inner_map_fds, &max_entries, &opts);
+	CHECK(ret != 0,
+	      "Failed to update the outer map batch ops",
+	      "error=%s\n", strerror(errno));
+
+	/* batch operation - map_lookup */
+	for (op_index = 0; op_index < 2; ++op_index)
+		fetch_and_validate(outer_map_fd, &opts,
+				   batch_size[op_index], false);
+
+	/* batch operation - map_lookup_delete */
+	if (outer_map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+		fetch_and_validate(outer_map_fd, &opts,
+				   max_entries, true /*delete*/);
+
+	/* close all map fds */
+	for (map_index = 0; map_index < max_entries; map_index++)
+		close(inner_map_fds[map_index]);
+	close(outer_map_fd);
+
+	free(inner_map_fds);
+	free(outer_map_keys);
+}
+
+void test_map_in_map_batch_ops_array(void)
+{
+	_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY);
+	printf("%s:PASS with inner ARRAY map\n", __func__);
+	_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH);
+	printf("%s:PASS with inner HASH map\n", __func__);
+}
+
+void test_map_in_map_batch_ops_hash(void)
+{
+	_map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY);
+	printf("%s:PASS with inner ARRAY map\n", __func__);
+	_map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH);
+	printf("%s:PASS with inner HASH map\n", __func__);
+}
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 2bb1f9b3841d..59cf81ec55af 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -21,6 +21,10 @@
 #include "network_helpers.h"
 #include "test_progs.h"
 
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
 #define clean_errno() (errno == 0 ? "None" : strerror(errno))
 #define log_err(MSG, ...) ({						\
 			int __save = errno;				\
@@ -73,13 +77,13 @@ int settimeo(int fd, int timeout_ms)
 
 #define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
 
-static int __start_server(int type, const struct sockaddr *addr,
+static int __start_server(int type, int protocol, const struct sockaddr *addr,
 			  socklen_t addrlen, int timeout_ms, bool reuseport)
 {
 	int on = 1;
 	int fd;
 
-	fd = socket(addr->sa_family, type, 0);
+	fd = socket(addr->sa_family, type, protocol);
 	if (fd < 0) {
 		log_err("Failed to create server socket");
 		return -1;
@@ -113,8 +117,8 @@ error_close:
 	return -1;
 }
 
-int start_server(int family, int type, const char *addr_str, __u16 port,
-		 int timeout_ms)
+static int start_server_proto(int family, int type, int protocol,
+			      const char *addr_str, __u16 port, int timeout_ms)
 {
 	struct sockaddr_storage addr;
 	socklen_t addrlen;
@@ -122,10 +126,23 @@ int start_server(int family, int type, const char *addr_str, __u16 port,
 	if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
 		return -1;
 
-	return __start_server(type, (struct sockaddr *)&addr,
+	return __start_server(type, protocol, (struct sockaddr *)&addr,
 			      addrlen, timeout_ms, false);
 }
 
+int start_server(int family, int type, const char *addr_str, __u16 port,
+		 int timeout_ms)
+{
+	return start_server_proto(family, type, 0, addr_str, port, timeout_ms);
+}
+
+int start_mptcp_server(int family, const char *addr_str, __u16 port,
+		       int timeout_ms)
+{
+	return start_server_proto(family, SOCK_STREAM, IPPROTO_MPTCP, addr_str,
+				  port, timeout_ms);
+}
+
 int *start_reuseport_server(int family, int type, const char *addr_str,
 			    __u16 port, int timeout_ms, unsigned int nr_listens)
 {
@@ -144,7 +161,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str,
 	if (!fds)
 		return NULL;
 
-	fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen,
+	fds[0] = __start_server(type, 0, (struct sockaddr *)&addr, addrlen,
 				timeout_ms, true);
 	if (fds[0] == -1)
 		goto close_fds;
@@ -154,7 +171,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str,
 		goto close_fds;
 
 	for (; nr_fds < nr_listens; nr_fds++) {
-		fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr,
+		fds[nr_fds] = __start_server(type, 0, (struct sockaddr *)&addr,
 					     addrlen, timeout_ms, true);
 		if (fds[nr_fds] == -1)
 			goto close_fds;
@@ -247,7 +264,7 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
 	struct sockaddr_storage addr;
 	struct sockaddr_in *addr_in;
 	socklen_t addrlen, optlen;
-	int fd, type;
+	int fd, type, protocol;
 
 	if (!opts)
 		opts = &default_opts;
@@ -258,6 +275,11 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
 		return -1;
 	}
 
+	if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) {
+		log_err("getsockopt(SOL_PROTOCOL)");
+		return -1;
+	}
+
 	addrlen = sizeof(addr);
 	if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
 		log_err("Failed to get server addr");
@@ -265,7 +287,7 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
 	}
 
 	addr_in = (struct sockaddr_in *)&addr;
-	fd = socket(addr_in->sin_family, type, 0);
+	fd = socket(addr_in->sin_family, type, protocol);
 	if (fd < 0) {
 		log_err("Failed to create client socket");
 		return -1;
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index a4b3b2f9877b..f882c691b790 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -42,6 +42,8 @@ extern struct ipv6_packet pkt_v6;
 int settimeo(int fd, int timeout_ms);
 int start_server(int family, int type, const char *addr, __u16 port,
 		 int timeout_ms);
+int start_mptcp_server(int family, const char *addr, __u16 port,
+		       int timeout_ms);
 int *start_reuseport_server(int family, int type, const char *addr_str,
 			    __u16 port, int timeout_ms,
 			    unsigned int nr_listens);
diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
new file mode 100644
index 000000000000..b17bfa0e0aac
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#include "test_progs.h"
+#include "testing_helpers.h"
+
+static void init_test_filter_set(struct test_filter_set *set)
+{
+	set->cnt = 0;
+	set->tests = NULL;
+}
+
+static void free_test_filter_set(struct test_filter_set *set)
+{
+	int i, j;
+
+	for (i = 0; i < set->cnt; i++) {
+		for (j = 0; j < set->tests[i].subtest_cnt; j++)
+			free((void *)set->tests[i].subtests[j]);
+		free(set->tests[i].subtests);
+		free(set->tests[i].name);
+	}
+
+	free(set->tests);
+	init_test_filter_set(set);
+}
+
+static void test_parse_test_list(void)
+{
+	struct test_filter_set set;
+
+	init_test_filter_set(&set);
+
+	ASSERT_OK(parse_test_list("arg_parsing", &set, true), "parsing");
+	if (!ASSERT_EQ(set.cnt, 1, "test filters count"))
+		goto error;
+	if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+		goto error;
+	ASSERT_EQ(set.tests[0].subtest_cnt, 0, "subtest filters count");
+	ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "subtest name");
+	free_test_filter_set(&set);
+
+	ASSERT_OK(parse_test_list("arg_parsing,bpf_cookie", &set, true),
+		  "parsing");
+	if (!ASSERT_EQ(set.cnt, 2, "count of test filters"))
+		goto error;
+	if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+		goto error;
+	ASSERT_EQ(set.tests[0].subtest_cnt, 0, "subtest filters count");
+	ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count");
+	ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name");
+	ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name");
+	free_test_filter_set(&set);
+
+	ASSERT_OK(parse_test_list("arg_parsing/arg_parsing,bpf_cookie",
+				  &set,
+				  true),
+		  "parsing");
+	if (!ASSERT_EQ(set.cnt, 2, "count of test filters"))
+		goto error;
+	if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+		goto error;
+	if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count"))
+		goto error;
+	ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count");
+	ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name");
+	ASSERT_OK(strcmp("arg_parsing", set.tests[0].subtests[0]),
+		  "subtest name");
+	ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name");
+	free_test_filter_set(&set);
+
+	ASSERT_OK(parse_test_list("arg_parsing/arg_parsing", &set, true),
+		  "parsing");
+	ASSERT_OK(parse_test_list("bpf_cookie", &set, true), "parsing");
+	ASSERT_OK(parse_test_list("send_signal", &set, true), "parsing");
+	if (!ASSERT_EQ(set.cnt, 3, "count of test filters"))
+		goto error;
+	if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+		goto error;
+	if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count"))
+		goto error;
+	ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count");
+	ASSERT_EQ(set.tests[2].subtest_cnt, 0, "subtest filters count");
+	ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name");
+	ASSERT_OK(strcmp("arg_parsing", set.tests[0].subtests[0]),
+		  "subtest name");
+	ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name");
+	ASSERT_OK(strcmp("send_signal", set.tests[2].name), "test name");
+	free_test_filter_set(&set);
+
+	ASSERT_OK(parse_test_list("bpf_cookie/trace", &set, false), "parsing");
+	if (!ASSERT_EQ(set.cnt, 1, "count of test filters"))
+		goto error;
+	if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+		goto error;
+	if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count"))
+		goto error;
+	ASSERT_OK(strcmp("*bpf_cookie*", set.tests[0].name), "test name");
+	ASSERT_OK(strcmp("*trace*", set.tests[0].subtests[0]), "subtest name");
+error:
+	free_test_filter_set(&set);
+}
+
+void test_arg_parsing(void)
+{
+	if (test__start_subtest("test_parse_test_list"))
+		test_parse_test_list();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index d48f6e533e1e..08c0601b3e84 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -11,15 +11,22 @@ static void trigger_func(void)
 	asm volatile ("");
 }
 
+/* attach point for byname uprobe */
+static void trigger_func2(void)
+{
+	asm volatile ("");
+}
+
 void test_attach_probe(void)
 {
 	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
-	int duration = 0;
 	struct bpf_link *kprobe_link, *kretprobe_link;
 	struct bpf_link *uprobe_link, *uretprobe_link;
 	struct test_attach_probe* skel;
 	ssize_t uprobe_offset, ref_ctr_offset;
+	struct bpf_link *uprobe_err_link;
 	bool legacy;
+	char *mem;
 
 	/* Check if new-style kprobe/uprobe API is supported.
 	 * Kernels that support new FD-based kprobe and uprobe BPF attachment
@@ -43,11 +50,12 @@ void test_attach_probe(void)
 		return;
 
 	skel = test_attach_probe__open_and_load();
-	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
 		return;
-	if (CHECK(!skel->bss, "check_bss", ".bss wasn't mmap()-ed\n"))
+	if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
 		goto cleanup;
 
+	/* manual-attach kprobe/kretprobe */
 	kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
 						 false /* retprobe */,
 						 SYS_NANOSLEEP_KPROBE_NAME);
@@ -62,6 +70,13 @@ void test_attach_probe(void)
 		goto cleanup;
 	skel->links.handle_kretprobe = kretprobe_link;
 
+	/* auto-attachable kprobe and kretprobe */
+	skel->links.handle_kprobe_auto = bpf_program__attach(skel->progs.handle_kprobe_auto);
+	ASSERT_OK_PTR(skel->links.handle_kprobe_auto, "attach_kprobe_auto");
+
+	skel->links.handle_kretprobe_auto = bpf_program__attach(skel->progs.handle_kretprobe_auto);
+	ASSERT_OK_PTR(skel->links.handle_kretprobe_auto, "attach_kretprobe_auto");
+
 	if (!legacy)
 		ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before");
 
@@ -90,25 +105,75 @@ void test_attach_probe(void)
 		goto cleanup;
 	skel->links.handle_uretprobe = uretprobe_link;
 
-	/* trigger & validate kprobe && kretprobe */
-	usleep(1);
+	/* verify auto-attach fails for old-style uprobe definition */
+	uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname);
+	if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP,
+		       "auto-attach should fail for old-style name"))
+		goto cleanup;
+
+	uprobe_opts.func_name = "trigger_func2";
+	uprobe_opts.retprobe = false;
+	uprobe_opts.ref_ctr_offset = 0;
+	skel->links.handle_uprobe_byname =
+			bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname,
+							0 /* this pid */,
+							"/proc/self/exe",
+							0, &uprobe_opts);
+	if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname, "attach_uprobe_byname"))
+		goto cleanup;
 
-	if (CHECK(skel->bss->kprobe_res != 1, "check_kprobe_res",
-		  "wrong kprobe res: %d\n", skel->bss->kprobe_res))
+	/* verify auto-attach works */
+	skel->links.handle_uretprobe_byname =
+			bpf_program__attach(skel->progs.handle_uretprobe_byname);
+	if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname, "attach_uretprobe_byname"))
 		goto cleanup;
-	if (CHECK(skel->bss->kretprobe_res != 2, "check_kretprobe_res",
-		  "wrong kretprobe res: %d\n", skel->bss->kretprobe_res))
+
+	/* test attach by name for a library function, using the library
+	 * as the binary argument. libc.so.6 will be resolved via dlopen()/dlinfo().
+	 */
+	uprobe_opts.func_name = "malloc";
+	uprobe_opts.retprobe = false;
+	skel->links.handle_uprobe_byname2 =
+			bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname2,
+							0 /* this pid */,
+							"libc.so.6",
+							0, &uprobe_opts);
+	if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname2, "attach_uprobe_byname2"))
+		goto cleanup;
+
+	uprobe_opts.func_name = "free";
+	uprobe_opts.retprobe = true;
+	skel->links.handle_uretprobe_byname2 =
+			bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_byname2,
+							-1 /* any pid */,
+							"libc.so.6",
+							0, &uprobe_opts);
+	if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2"))
 		goto cleanup;
 
+	/* trigger & validate kprobe && kretprobe */
+	usleep(1);
+
+	/* trigger & validate shared library u[ret]probes attached by name */
+	mem = malloc(1);
+	free(mem);
+
 	/* trigger & validate uprobe & uretprobe */
 	trigger_func();
 
-	if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res",
-		  "wrong uprobe res: %d\n", skel->bss->uprobe_res))
-		goto cleanup;
-	if (CHECK(skel->bss->uretprobe_res != 4, "check_uretprobe_res",
-		  "wrong uretprobe res: %d\n", skel->bss->uretprobe_res))
-		goto cleanup;
+	/* trigger & validate uprobe attached by name */
+	trigger_func2();
+
+	ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res");
+	ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res");
+	ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res");
+	ASSERT_EQ(skel->bss->kretprobe2_res, 22, "check_kretprobe_auto_res");
+	ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res");
+	ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res");
+	ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res");
+	ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res");
+	ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res");
+	ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res");
 
 cleanup:
 	test_attach_probe__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 923a6139b2d8..83ef55e3caa4 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -4,8 +4,11 @@
 #include <pthread.h>
 #include <sched.h>
 #include <sys/syscall.h>
+#include <sys/mman.h>
 #include <unistd.h>
 #include <test_progs.h>
+#include <network_helpers.h>
+#include <bpf/btf.h>
 #include "test_bpf_cookie.skel.h"
 #include "kprobe_multi.skel.h"
 
@@ -410,6 +413,88 @@ cleanup:
 	bpf_link__destroy(link);
 }
 
+static void tracing_subtest(struct test_bpf_cookie *skel)
+{
+	__u64 cookie;
+	int prog_fd;
+	int fentry_fd = -1, fexit_fd = -1, fmod_ret_fd = -1;
+	LIBBPF_OPTS(bpf_test_run_opts, opts);
+	LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+
+	skel->bss->fentry_res = 0;
+	skel->bss->fexit_res = 0;
+
+	cookie = 0x10000000000000L;
+	prog_fd = bpf_program__fd(skel->progs.fentry_test1);
+	link_opts.tracing.cookie = cookie;
+	fentry_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_FENTRY, &link_opts);
+	if (!ASSERT_GE(fentry_fd, 0, "fentry.link_create"))
+		goto cleanup;
+
+	cookie = 0x20000000000000L;
+	prog_fd = bpf_program__fd(skel->progs.fexit_test1);
+	link_opts.tracing.cookie = cookie;
+	fexit_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_FEXIT, &link_opts);
+	if (!ASSERT_GE(fexit_fd, 0, "fexit.link_create"))
+		goto cleanup;
+
+	cookie = 0x30000000000000L;
+	prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
+	link_opts.tracing.cookie = cookie;
+	fmod_ret_fd = bpf_link_create(prog_fd, 0, BPF_MODIFY_RETURN, &link_opts);
+	if (!ASSERT_GE(fmod_ret_fd, 0, "fmod_ret.link_create"))
+		goto cleanup;
+
+	prog_fd = bpf_program__fd(skel->progs.fentry_test1);
+	bpf_prog_test_run_opts(prog_fd, &opts);
+
+	prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
+	bpf_prog_test_run_opts(prog_fd, &opts);
+
+	ASSERT_EQ(skel->bss->fentry_res, 0x10000000000000L, "fentry_res");
+	ASSERT_EQ(skel->bss->fexit_res, 0x20000000000000L, "fexit_res");
+	ASSERT_EQ(skel->bss->fmod_ret_res, 0x30000000000000L, "fmod_ret_res");
+
+cleanup:
+	if (fentry_fd >= 0)
+		close(fentry_fd);
+	if (fexit_fd >= 0)
+		close(fexit_fd);
+	if (fmod_ret_fd >= 0)
+		close(fmod_ret_fd);
+}
+
+int stack_mprotect(void);
+
+static void lsm_subtest(struct test_bpf_cookie *skel)
+{
+	__u64 cookie;
+	int prog_fd;
+	int lsm_fd = -1;
+	LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+
+	skel->bss->lsm_res = 0;
+
+	cookie = 0x90000000000090L;
+	prog_fd = bpf_program__fd(skel->progs.test_int_hook);
+	link_opts.tracing.cookie = cookie;
+	lsm_fd = bpf_link_create(prog_fd, 0, BPF_LSM_MAC, &link_opts);
+	if (!ASSERT_GE(lsm_fd, 0, "lsm.link_create"))
+		goto cleanup;
+
+	stack_mprotect();
+	if (!ASSERT_EQ(errno, EPERM, "stack_mprotect"))
+		goto cleanup;
+
+	usleep(1);
+
+	ASSERT_EQ(skel->bss->lsm_res, 0x90000000000090L, "fentry_res");
+
+cleanup:
+	if (lsm_fd >= 0)
+		close(lsm_fd);
+}
+
 void test_bpf_cookie(void)
 {
 	struct test_bpf_cookie *skel;
@@ -432,6 +517,10 @@ void test_bpf_cookie(void)
 		tp_subtest(skel);
 	if (test__start_subtest("perf_event"))
 		pe_subtest(skel);
+	if (test__start_subtest("trampoline"))
+		tracing_subtest(skel);
+	if (test__start_subtest("lsm"))
+		lsm_subtest(skel);
 
 	test_bpf_cookie__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 5142a7d130b2..7ff5fa93d056 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -26,6 +26,7 @@
 #include "bpf_iter_bpf_sk_storage_map.skel.h"
 #include "bpf_iter_test_kern5.skel.h"
 #include "bpf_iter_test_kern6.skel.h"
+#include "bpf_iter_bpf_link.skel.h"
 
 static int duration;
 
@@ -34,8 +35,7 @@ static void test_btf_id_or_null(void)
 	struct bpf_iter_test_kern3 *skel;
 
 	skel = bpf_iter_test_kern3__open_and_load();
-	if (CHECK(skel, "bpf_iter_test_kern3__open_and_load",
-		  "skeleton open_and_load unexpectedly succeeded\n")) {
+	if (!ASSERT_ERR_PTR(skel, "bpf_iter_test_kern3__open_and_load")) {
 		bpf_iter_test_kern3__destroy(skel);
 		return;
 	}
@@ -52,7 +52,7 @@ static void do_dummy_read(struct bpf_program *prog)
 		return;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	/* not check contents, but ensure read() ends without error */
@@ -87,8 +87,7 @@ static void test_ipv6_route(void)
 	struct bpf_iter_ipv6_route *skel;
 
 	skel = bpf_iter_ipv6_route__open_and_load();
-	if (CHECK(!skel, "bpf_iter_ipv6_route__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_ipv6_route__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_ipv6_route);
@@ -101,8 +100,7 @@ static void test_netlink(void)
 	struct bpf_iter_netlink *skel;
 
 	skel = bpf_iter_netlink__open_and_load();
-	if (CHECK(!skel, "bpf_iter_netlink__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_netlink__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_netlink);
@@ -115,8 +113,7 @@ static void test_bpf_map(void)
 	struct bpf_iter_bpf_map *skel;
 
 	skel = bpf_iter_bpf_map__open_and_load();
-	if (CHECK(!skel, "bpf_iter_bpf_map__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_map__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_bpf_map);
@@ -129,8 +126,7 @@ static void test_task(void)
 	struct bpf_iter_task *skel;
 
 	skel = bpf_iter_task__open_and_load();
-	if (CHECK(!skel, "bpf_iter_task__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_task);
@@ -161,8 +157,7 @@ static void test_task_stack(void)
 	struct bpf_iter_task_stack *skel;
 
 	skel = bpf_iter_task_stack__open_and_load();
-	if (CHECK(!skel, "bpf_iter_task_stack__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_task_stack__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_task_stack);
@@ -183,24 +178,22 @@ static void test_task_file(void)
 	void *ret;
 
 	skel = bpf_iter_task_file__open_and_load();
-	if (CHECK(!skel, "bpf_iter_task_file__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_task_file__open_and_load"))
 		return;
 
 	skel->bss->tgid = getpid();
 
-	if (CHECK(pthread_create(&thread_id, NULL, &do_nothing, NULL),
-		  "pthread_create", "pthread_create failed\n"))
+	if (!ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing, NULL),
+		  "pthread_create"))
 		goto done;
 
 	do_dummy_read(skel->progs.dump_task_file);
 
-	if (CHECK(pthread_join(thread_id, &ret) || ret != NULL,
-		  "pthread_join", "pthread_join failed\n"))
+	if (!ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL,
+		  "pthread_join"))
 		goto done;
 
-	CHECK(skel->bss->count != 0, "check_count",
-	      "invalid non pthread file visit count %d\n", skel->bss->count);
+	ASSERT_EQ(skel->bss->count, 0, "check_count");
 
 done:
 	bpf_iter_task_file__destroy(skel);
@@ -224,7 +217,7 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
 		return ret;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	err = read_fd_into_buffer(iter_fd, buf, TASKBUFSZ);
@@ -238,9 +231,8 @@ static int do_btf_read(struct bpf_iter_task_btf *skel)
 	if (CHECK(err < 0, "read", "read failed: %s\n", strerror(errno)))
 		goto free_link;
 
-	CHECK(strstr(taskbuf, "(struct task_struct)") == NULL,
-	      "check for btf representation of task_struct in iter data",
-	      "struct task_struct not found");
+	ASSERT_HAS_SUBSTR(taskbuf, "(struct task_struct)",
+	      "check for btf representation of task_struct in iter data");
 free_link:
 	if (iter_fd > 0)
 		close(iter_fd);
@@ -255,8 +247,7 @@ static void test_task_btf(void)
 	int ret;
 
 	skel = bpf_iter_task_btf__open_and_load();
-	if (CHECK(!skel, "bpf_iter_task_btf__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_task_btf__open_and_load"))
 		return;
 
 	bss = skel->bss;
@@ -265,12 +256,10 @@ static void test_task_btf(void)
 	if (ret)
 		goto cleanup;
 
-	if (CHECK(bss->tasks == 0, "check if iterated over tasks",
-		  "no task iteration, did BPF program run?\n"))
+	if (!ASSERT_NEQ(bss->tasks, 0, "no task iteration, did BPF program run?"))
 		goto cleanup;
 
-	CHECK(bss->seq_err != 0, "check for unexpected err",
-	      "bpf_seq_printf_btf returned %ld", bss->seq_err);
+	ASSERT_EQ(bss->seq_err, 0, "check for unexpected err");
 
 cleanup:
 	bpf_iter_task_btf__destroy(skel);
@@ -281,8 +270,7 @@ static void test_tcp4(void)
 	struct bpf_iter_tcp4 *skel;
 
 	skel = bpf_iter_tcp4__open_and_load();
-	if (CHECK(!skel, "bpf_iter_tcp4__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_tcp4__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_tcp4);
@@ -295,8 +283,7 @@ static void test_tcp6(void)
 	struct bpf_iter_tcp6 *skel;
 
 	skel = bpf_iter_tcp6__open_and_load();
-	if (CHECK(!skel, "bpf_iter_tcp6__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_tcp6__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_tcp6);
@@ -309,8 +296,7 @@ static void test_udp4(void)
 	struct bpf_iter_udp4 *skel;
 
 	skel = bpf_iter_udp4__open_and_load();
-	if (CHECK(!skel, "bpf_iter_udp4__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_udp4__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_udp4);
@@ -323,8 +309,7 @@ static void test_udp6(void)
 	struct bpf_iter_udp6 *skel;
 
 	skel = bpf_iter_udp6__open_and_load();
-	if (CHECK(!skel, "bpf_iter_udp6__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_udp6__open_and_load"))
 		return;
 
 	do_dummy_read(skel->progs.dump_udp6);
@@ -349,7 +334,7 @@ static void test_unix(void)
 static int do_read_with_fd(int iter_fd, const char *expected,
 			   bool read_one_char)
 {
-	int err = -1, len, read_buf_len, start;
+	int len, read_buf_len, start;
 	char buf[16] = {};
 
 	read_buf_len = read_one_char ? 1 : 16;
@@ -363,9 +348,7 @@ static int do_read_with_fd(int iter_fd, const char *expected,
 	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
 		return -1;
 
-	err = strcmp(buf, expected);
-	if (CHECK(err, "read", "incorrect read result: buf %s, expected %s\n",
-		  buf, expected))
+	if (!ASSERT_STREQ(buf, expected, "read"))
 		return -1;
 
 	return 0;
@@ -378,19 +361,17 @@ static void test_anon_iter(bool read_one_char)
 	int iter_fd, err;
 
 	skel = bpf_iter_test_kern1__open_and_load();
-	if (CHECK(!skel, "bpf_iter_test_kern1__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern1__open_and_load"))
 		return;
 
 	err = bpf_iter_test_kern1__attach(skel);
-	if (CHECK(err, "bpf_iter_test_kern1__attach",
-		  "skeleton attach failed\n")) {
+	if (!ASSERT_OK(err, "bpf_iter_test_kern1__attach")) {
 		goto out;
 	}
 
 	link = skel->links.dump_task;
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto out;
 
 	do_read_with_fd(iter_fd, "abcd", read_one_char);
@@ -423,8 +404,7 @@ static void test_file_iter(void)
 	int err;
 
 	skel1 = bpf_iter_test_kern1__open_and_load();
-	if (CHECK(!skel1, "bpf_iter_test_kern1__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel1, "bpf_iter_test_kern1__open_and_load"))
 		return;
 
 	link = bpf_program__attach_iter(skel1->progs.dump_task, NULL);
@@ -447,12 +427,11 @@ static void test_file_iter(void)
 	 * should change.
 	 */
 	skel2 = bpf_iter_test_kern2__open_and_load();
-	if (CHECK(!skel2, "bpf_iter_test_kern2__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel2, "bpf_iter_test_kern2__open_and_load"))
 		goto unlink_path;
 
 	err = bpf_link__update_program(link, skel2->progs.dump_task);
-	if (CHECK(err, "update_prog", "update_prog failed\n"))
+	if (!ASSERT_OK(err, "update_prog"))
 		goto destroy_skel2;
 
 	do_read(path, "ABCD");
@@ -478,8 +457,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 	char *buf;
 
 	skel = bpf_iter_test_kern4__open();
-	if (CHECK(!skel, "bpf_iter_test_kern4__open",
-		  "skeleton open failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern4__open"))
 		return;
 
 	/* create two maps: bpf program will only do bpf_seq_write
@@ -515,8 +493,8 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 	}
 	skel->rodata->ret1 = ret1;
 
-	if (CHECK(bpf_iter_test_kern4__load(skel),
-		  "bpf_iter_test_kern4__load", "skeleton load failed\n"))
+	if (!ASSERT_OK(bpf_iter_test_kern4__load(skel),
+		  "bpf_iter_test_kern4__load"))
 		goto free_map2;
 
 	/* setup filtering map_id in bpf program */
@@ -538,7 +516,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 		goto free_map2;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	buf = malloc(expected_read_len);
@@ -574,22 +552,16 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 			goto free_buf;
 	}
 
-	if (CHECK(total_read_len != expected_read_len, "read",
-		  "total len %u, expected len %u\n", total_read_len,
-		  expected_read_len))
+	if (!ASSERT_EQ(total_read_len, expected_read_len, "read"))
 		goto free_buf;
 
-	if (CHECK(skel->bss->map1_accessed != 1, "map1_accessed",
-		  "expected 1 actual %d\n", skel->bss->map1_accessed))
+	if (!ASSERT_EQ(skel->bss->map1_accessed, 1, "map1_accessed"))
 		goto free_buf;
 
-	if (CHECK(skel->bss->map2_accessed != 2, "map2_accessed",
-		  "expected 2 actual %d\n", skel->bss->map2_accessed))
+	if (!ASSERT_EQ(skel->bss->map2_accessed, 2, "map2_accessed"))
 		goto free_buf;
 
-	CHECK(skel->bss->map2_seqnum1 != skel->bss->map2_seqnum2,
-	      "map2_seqnum", "two different seqnum %lld %lld\n",
-	      skel->bss->map2_seqnum1, skel->bss->map2_seqnum2);
+	ASSERT_EQ(skel->bss->map2_seqnum1, skel->bss->map2_seqnum2, "map2_seqnum");
 
 free_buf:
 	free(buf);
@@ -622,15 +594,13 @@ static void test_bpf_hash_map(void)
 	char buf[64];
 
 	skel = bpf_iter_bpf_hash_map__open();
-	if (CHECK(!skel, "bpf_iter_bpf_hash_map__open",
-		  "skeleton open failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_hash_map__open"))
 		return;
 
 	skel->bss->in_test_mode = true;
 
 	err = bpf_iter_bpf_hash_map__load(skel);
-	if (CHECK(!skel, "bpf_iter_bpf_hash_map__load",
-		  "skeleton load failed\n"))
+	if (!ASSERT_OK(err, "bpf_iter_bpf_hash_map__load"))
 		goto out;
 
 	/* iterator with hashmap2 and hashmap3 should fail */
@@ -659,7 +629,7 @@ static void test_bpf_hash_map(void)
 		expected_val += val;
 
 		err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
-		if (CHECK(err, "map_update", "map_update failed\n"))
+		if (!ASSERT_OK(err, "map_update"))
 			goto out;
 	}
 
@@ -669,7 +639,7 @@ static void test_bpf_hash_map(void)
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	/* do some tests */
@@ -679,17 +649,11 @@ static void test_bpf_hash_map(void)
 		goto close_iter;
 
 	/* test results */
-	if (CHECK(skel->bss->key_sum_a != expected_key_a,
-		  "key_sum_a", "got %u expected %u\n",
-		  skel->bss->key_sum_a, expected_key_a))
+	if (!ASSERT_EQ(skel->bss->key_sum_a, expected_key_a, "key_sum_a"))
 		goto close_iter;
-	if (CHECK(skel->bss->key_sum_b != expected_key_b,
-		  "key_sum_b", "got %u expected %u\n",
-		  skel->bss->key_sum_b, expected_key_b))
+	if (!ASSERT_EQ(skel->bss->key_sum_b, expected_key_b, "key_sum_b"))
 		goto close_iter;
-	if (CHECK(skel->bss->val_sum != expected_val,
-		  "val_sum", "got %llu expected %llu\n",
-		  skel->bss->val_sum, expected_val))
+	if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
 		goto close_iter;
 
 close_iter:
@@ -718,16 +682,14 @@ static void test_bpf_percpu_hash_map(void)
 	void *val;
 
 	skel = bpf_iter_bpf_percpu_hash_map__open();
-	if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__open",
-		  "skeleton open failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__open"))
 		return;
 
 	skel->rodata->num_cpus = bpf_num_possible_cpus();
 	val = malloc(8 * bpf_num_possible_cpus());
 
 	err = bpf_iter_bpf_percpu_hash_map__load(skel);
-	if (CHECK(!skel, "bpf_iter_bpf_percpu_hash_map__load",
-		  "skeleton load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__load"))
 		goto out;
 
 	/* update map values here */
@@ -745,7 +707,7 @@ static void test_bpf_percpu_hash_map(void)
 		}
 
 		err = bpf_map_update_elem(map_fd, &key, val, BPF_ANY);
-		if (CHECK(err, "map_update", "map_update failed\n"))
+		if (!ASSERT_OK(err, "map_update"))
 			goto out;
 	}
 
@@ -758,7 +720,7 @@ static void test_bpf_percpu_hash_map(void)
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	/* do some tests */
@@ -768,17 +730,11 @@ static void test_bpf_percpu_hash_map(void)
 		goto close_iter;
 
 	/* test results */
-	if (CHECK(skel->bss->key_sum_a != expected_key_a,
-		  "key_sum_a", "got %u expected %u\n",
-		  skel->bss->key_sum_a, expected_key_a))
+	if (!ASSERT_EQ(skel->bss->key_sum_a, expected_key_a, "key_sum_a"))
 		goto close_iter;
-	if (CHECK(skel->bss->key_sum_b != expected_key_b,
-		  "key_sum_b", "got %u expected %u\n",
-		  skel->bss->key_sum_b, expected_key_b))
+	if (!ASSERT_EQ(skel->bss->key_sum_b, expected_key_b, "key_sum_b"))
 		goto close_iter;
-	if (CHECK(skel->bss->val_sum != expected_val,
-		  "val_sum", "got %u expected %u\n",
-		  skel->bss->val_sum, expected_val))
+	if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
 		goto close_iter;
 
 close_iter:
@@ -803,8 +759,7 @@ static void test_bpf_array_map(void)
 	int len, start;
 
 	skel = bpf_iter_bpf_array_map__open_and_load();
-	if (CHECK(!skel, "bpf_iter_bpf_array_map__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_array_map__open_and_load"))
 		return;
 
 	map_fd = bpf_map__fd(skel->maps.arraymap1);
@@ -817,7 +772,7 @@ static void test_bpf_array_map(void)
 			first_val = val;
 
 		err = bpf_map_update_elem(map_fd, &i, &val, BPF_ANY);
-		if (CHECK(err, "map_update", "map_update failed\n"))
+		if (!ASSERT_OK(err, "map_update"))
 			goto out;
 	}
 
@@ -830,7 +785,7 @@ static void test_bpf_array_map(void)
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	/* do some tests */
@@ -850,21 +805,16 @@ static void test_bpf_array_map(void)
 		  res_first_key, res_first_val, first_val))
 		goto close_iter;
 
-	if (CHECK(skel->bss->key_sum != expected_key,
-		  "key_sum", "got %u expected %u\n",
-		  skel->bss->key_sum, expected_key))
+	if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum"))
 		goto close_iter;
-	if (CHECK(skel->bss->val_sum != expected_val,
-		  "val_sum", "got %llu expected %llu\n",
-		  skel->bss->val_sum, expected_val))
+	if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
 		goto close_iter;
 
 	for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
 		err = bpf_map_lookup_elem(map_fd, &i, &val);
-		if (CHECK(err, "map_lookup", "map_lookup failed\n"))
+		if (!ASSERT_OK(err, "map_lookup"))
 			goto out;
-		if (CHECK(i != val, "invalid_val",
-			  "got value %llu expected %u\n", val, i))
+		if (!ASSERT_EQ(i, val, "invalid_val"))
 			goto out;
 	}
 
@@ -889,16 +839,14 @@ static void test_bpf_percpu_array_map(void)
 	int len;
 
 	skel = bpf_iter_bpf_percpu_array_map__open();
-	if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__open",
-		  "skeleton open failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__open"))
 		return;
 
 	skel->rodata->num_cpus = bpf_num_possible_cpus();
 	val = malloc(8 * bpf_num_possible_cpus());
 
 	err = bpf_iter_bpf_percpu_array_map__load(skel);
-	if (CHECK(!skel, "bpf_iter_bpf_percpu_array_map__load",
-		  "skeleton load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__load"))
 		goto out;
 
 	/* update map values here */
@@ -912,7 +860,7 @@ static void test_bpf_percpu_array_map(void)
 		}
 
 		err = bpf_map_update_elem(map_fd, &i, val, BPF_ANY);
-		if (CHECK(err, "map_update", "map_update failed\n"))
+		if (!ASSERT_OK(err, "map_update"))
 			goto out;
 	}
 
@@ -925,7 +873,7 @@ static void test_bpf_percpu_array_map(void)
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	/* do some tests */
@@ -935,13 +883,9 @@ static void test_bpf_percpu_array_map(void)
 		goto close_iter;
 
 	/* test results */
-	if (CHECK(skel->bss->key_sum != expected_key,
-		  "key_sum", "got %u expected %u\n",
-		  skel->bss->key_sum, expected_key))
+	if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum"))
 		goto close_iter;
-	if (CHECK(skel->bss->val_sum != expected_val,
-		  "val_sum", "got %u expected %u\n",
-		  skel->bss->val_sum, expected_val))
+	if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
 		goto close_iter;
 
 close_iter:
@@ -966,17 +910,16 @@ static void test_bpf_sk_storage_delete(void)
 	char buf[64];
 
 	skel = bpf_iter_bpf_sk_storage_helpers__open_and_load();
-	if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load"))
 		return;
 
 	map_fd = bpf_map__fd(skel->maps.sk_stg_map);
 
 	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
-	if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
+	if (!ASSERT_GE(sock_fd, 0, "socket"))
 		goto out;
 	err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
-	if (CHECK(err, "map_update", "map_update failed\n"))
+	if (!ASSERT_OK(err, "map_update"))
 		goto out;
 
 	memset(&linfo, 0, sizeof(linfo));
@@ -989,7 +932,7 @@ static void test_bpf_sk_storage_delete(void)
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	/* do some tests */
@@ -1027,22 +970,21 @@ static void test_bpf_sk_storage_get(void)
 	int sock_fd = -1;
 
 	skel = bpf_iter_bpf_sk_storage_helpers__open_and_load();
-	if (CHECK(!skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load"))
 		return;
 
 	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
-	if (CHECK(sock_fd < 0, "socket", "errno: %d\n", errno))
+	if (!ASSERT_GE(sock_fd, 0, "socket"))
 		goto out;
 
 	err = listen(sock_fd, 1);
-	if (CHECK(err != 0, "listen", "errno: %d\n", errno))
+	if (!ASSERT_OK(err, "listen"))
 		goto close_socket;
 
 	map_fd = bpf_map__fd(skel->maps.sk_stg_map);
 
 	err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
-	if (CHECK(err, "bpf_map_update_elem", "map_update_failed\n"))
+	if (!ASSERT_OK(err, "bpf_map_update_elem"))
 		goto close_socket;
 
 	do_dummy_read(skel->progs.fill_socket_owner);
@@ -1078,15 +1020,14 @@ static void test_bpf_sk_storage_map(void)
 	char buf[64];
 
 	skel = bpf_iter_bpf_sk_storage_map__open_and_load();
-	if (CHECK(!skel, "bpf_iter_bpf_sk_storage_map__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_map__open_and_load"))
 		return;
 
 	map_fd = bpf_map__fd(skel->maps.sk_stg_map);
 	num_sockets = ARRAY_SIZE(sock_fd);
 	for (i = 0; i < num_sockets; i++) {
 		sock_fd[i] = socket(AF_INET6, SOCK_STREAM, 0);
-		if (CHECK(sock_fd[i] < 0, "socket", "errno: %d\n", errno))
+		if (!ASSERT_GE(sock_fd[i], 0, "socket"))
 			goto out;
 
 		val = i + 1;
@@ -1094,7 +1035,7 @@ static void test_bpf_sk_storage_map(void)
 
 		err = bpf_map_update_elem(map_fd, &sock_fd[i], &val,
 					  BPF_NOEXIST);
-		if (CHECK(err, "map_update", "map_update failed\n"))
+		if (!ASSERT_OK(err, "map_update"))
 			goto out;
 	}
 
@@ -1107,7 +1048,7 @@ static void test_bpf_sk_storage_map(void)
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	/* do some tests */
@@ -1117,14 +1058,10 @@ static void test_bpf_sk_storage_map(void)
 		goto close_iter;
 
 	/* test results */
-	if (CHECK(skel->bss->ipv6_sk_count != num_sockets,
-		  "ipv6_sk_count", "got %u expected %u\n",
-		  skel->bss->ipv6_sk_count, num_sockets))
+	if (!ASSERT_EQ(skel->bss->ipv6_sk_count, num_sockets, "ipv6_sk_count"))
 		goto close_iter;
 
-	if (CHECK(skel->bss->val_sum != expected_val,
-		  "val_sum", "got %u expected %u\n",
-		  skel->bss->val_sum, expected_val))
+	if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
 		goto close_iter;
 
 close_iter:
@@ -1147,8 +1084,7 @@ static void test_rdonly_buf_out_of_bound(void)
 	struct bpf_link *link;
 
 	skel = bpf_iter_test_kern5__open_and_load();
-	if (CHECK(!skel, "bpf_iter_test_kern5__open_and_load",
-		  "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern5__open_and_load"))
 		return;
 
 	memset(&linfo, 0, sizeof(linfo));
@@ -1167,11 +1103,23 @@ static void test_buf_neg_offset(void)
 	struct bpf_iter_test_kern6 *skel;
 
 	skel = bpf_iter_test_kern6__open_and_load();
-	if (CHECK(skel, "bpf_iter_test_kern6__open_and_load",
-		  "skeleton open_and_load unexpected success\n"))
+	if (!ASSERT_ERR_PTR(skel, "bpf_iter_test_kern6__open_and_load"))
 		bpf_iter_test_kern6__destroy(skel);
 }
 
+static void test_link_iter(void)
+{
+	struct bpf_iter_bpf_link *skel;
+
+	skel = bpf_iter_bpf_link__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_link__open_and_load"))
+		return;
+
+	do_dummy_read(skel->progs.dump_bpf_link);
+
+	bpf_iter_bpf_link__destroy(skel);
+}
+
 #define CMP_BUFFER_SIZE 1024
 static char task_vma_output[CMP_BUFFER_SIZE];
 static char proc_maps_output[CMP_BUFFER_SIZE];
@@ -1192,8 +1140,6 @@ static void str_strip_first_line(char *str)
 	*dst = '\0';
 }
 
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
 static void test_task_vma(void)
 {
 	int err, iter_fd = -1, proc_maps_fd = -1;
@@ -1202,13 +1148,13 @@ static void test_task_vma(void)
 	char maps_path[64];
 
 	skel = bpf_iter_task_vma__open();
-	if (CHECK(!skel, "bpf_iter_task_vma__open", "skeleton open failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vma__open"))
 		return;
 
 	skel->bss->pid = getpid();
 
 	err = bpf_iter_task_vma__load(skel);
-	if (CHECK(err, "bpf_iter_task_vma__load", "skeleton load failed\n"))
+	if (!ASSERT_OK(err, "bpf_iter_task_vma__load"))
 		goto out;
 
 	skel->links.proc_maps = bpf_program__attach_iter(
@@ -1220,7 +1166,7 @@ static void test_task_vma(void)
 	}
 
 	iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto out;
 
 	/* Read CMP_BUFFER_SIZE (1kB) from bpf_iter. Read in small chunks
@@ -1229,10 +1175,10 @@ static void test_task_vma(void)
 	len = 0;
 	while (len < CMP_BUFFER_SIZE) {
 		err = read_fd_into_buffer(iter_fd, task_vma_output + len,
-					  min(read_size, CMP_BUFFER_SIZE - len));
+					  MIN(read_size, CMP_BUFFER_SIZE - len));
 		if (!err)
 			break;
-		if (CHECK(err < 0, "read_iter_fd", "read_iter_fd failed\n"))
+		if (!ASSERT_GE(err, 0, "read_iter_fd"))
 			goto out;
 		len += err;
 	}
@@ -1240,18 +1186,17 @@ static void test_task_vma(void)
 	/* read CMP_BUFFER_SIZE (1kB) from /proc/pid/maps */
 	snprintf(maps_path, 64, "/proc/%u/maps", skel->bss->pid);
 	proc_maps_fd = open(maps_path, O_RDONLY);
-	if (CHECK(proc_maps_fd < 0, "open_proc_maps", "open_proc_maps failed\n"))
+	if (!ASSERT_GE(proc_maps_fd, 0, "open_proc_maps"))
 		goto out;
 	err = read_fd_into_buffer(proc_maps_fd, proc_maps_output, CMP_BUFFER_SIZE);
-	if (CHECK(err < 0, "read_prog_maps_fd", "read_prog_maps_fd failed\n"))
+	if (!ASSERT_GE(err, 0, "read_prog_maps_fd"))
 		goto out;
 
 	/* strip and compare the first line of the two files */
 	str_strip_first_line(task_vma_output);
 	str_strip_first_line(proc_maps_output);
 
-	CHECK(strcmp(task_vma_output, proc_maps_output), "compare_output",
-	      "found mismatch\n");
+	ASSERT_STREQ(task_vma_output, proc_maps_output, "compare_output");
 out:
 	close(proc_maps_fd);
 	close(iter_fd);
@@ -1320,4 +1265,6 @@ void test_bpf_iter(void)
 		test_rdonly_buf_out_of_bound();
 	if (test__start_subtest("buf-neg-offset"))
 		test_buf_neg_offset();
+	if (test__start_subtest("link-iter"))
+		test_link_iter();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
index d43f548c572c..a4d0cc9d3367 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
@@ -36,13 +36,13 @@ struct test_config {
 	void (*bpf_destroy)(void *);
 };
 
-enum test_state {
+enum bpf_test_state {
 	_TS_INVALID,
 	TS_MODULE_LOAD,
 	TS_MODULE_LOAD_FAIL,
 };
 
-static _Atomic enum test_state state = _TS_INVALID;
+static _Atomic enum bpf_test_state state = _TS_INVALID;
 
 static int sys_finit_module(int fd, const char *param_values, int flags)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 8f7a1cef7d87..e9a9a31b2ffe 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -10,8 +10,6 @@
 #include "bpf_tcp_nogpl.skel.h"
 #include "bpf_dctcp_release.skel.h"
 
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
 #ifndef ENOTSUPP
 #define ENOTSUPP 524
 #endif
@@ -53,7 +51,7 @@ static void *server(void *arg)
 
 	while (bytes < total_bytes && !READ_ONCE(stop)) {
 		nr_sent = send(fd, &batch,
-			       min(total_bytes - bytes, sizeof(batch)), 0);
+			       MIN(total_bytes - bytes, sizeof(batch)), 0);
 		if (nr_sent == -1 && errno == EINTR)
 			continue;
 		if (nr_sent == -1) {
@@ -146,7 +144,7 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
 	/* recv total_bytes */
 	while (bytes < total_bytes && !READ_ONCE(stop)) {
 		nr_recv = recv(fd, &batch,
-			       min(total_bytes - bytes, sizeof(batch)), 0);
+			       MIN(total_bytes - bytes, sizeof(batch)), 0);
 		if (nr_recv == -1 && errno == EINTR)
 			continue;
 		if (nr_recv == -1)
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index ec823561b912..ba5bde53d418 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -8,7 +8,6 @@
 #include <linux/filter.h>
 #include <linux/unistd.h>
 #include <bpf/bpf.h>
-#include <sys/resource.h>
 #include <libelf.h>
 #include <gelf.h>
 #include <string.h>
@@ -3974,6 +3973,105 @@ static struct btf_raw_test raw_tests[] = {
 	.value_type_id = 1,
 	.max_entries = 1,
 },
+{
+	.descr = "type_tag test #2, type tag order",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_CONST_ENC(3),				/* [2] */
+		BTF_TYPE_TAG_ENC(NAME_TBD, 1),			/* [3] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Type tags don't precede modifiers",
+},
+{
+	.descr = "type_tag test #3, type tag order",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_TAG_ENC(NAME_TBD, 3),			/* [2] */
+		BTF_CONST_ENC(4),				/* [3] */
+		BTF_TYPE_TAG_ENC(NAME_TBD, 1),			/* [4] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0tag\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Type tags don't precede modifiers",
+},
+{
+	.descr = "type_tag test #4, type tag order",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 3),			/* [2] */
+		BTF_CONST_ENC(4),				/* [3] */
+		BTF_TYPE_TAG_ENC(NAME_TBD, 1),			/* [4] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0tag\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Type tags don't precede modifiers",
+},
+{
+	.descr = "type_tag test #5, type tag order",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_TAG_ENC(NAME_TBD, 3),			/* [2] */
+		BTF_CONST_ENC(1),				/* [3] */
+		BTF_TYPE_TAG_ENC(NAME_TBD, 2),			/* [4] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0tag\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+},
+{
+	.descr = "type_tag test #6, type tag order",
+	.raw_types = {
+		BTF_PTR_ENC(2),					/* [1] */
+		BTF_TYPE_TAG_ENC(NAME_TBD, 3),			/* [2] */
+		BTF_CONST_ENC(4),				/* [3] */
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [4] */
+		BTF_PTR_ENC(6),					/* [5] */
+		BTF_CONST_ENC(2),				/* [6] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Type tags don't precede modifiers",
+},
 
 }; /* struct btf_raw_test raw_tests[] */
 
diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
index 1dfe14ff6aa4..f2ce4fd1cdae 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
@@ -167,7 +167,7 @@ void test_core_autosize(void)
 	if (!ASSERT_OK_PTR(bss_map, "bss_map_find"))
 		goto cleanup;
 
-	err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, (void *)&out);
+	err = bpf_map__lookup_elem(bss_map, &zero, sizeof(zero), &out, sizeof(out), 0);
 	if (!ASSERT_OK(err, "bss_lookup"))
 		goto cleanup;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index f28f75aa9154..3712dfe1be59 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -277,13 +277,21 @@ static int duration = 0;
 #define SIZE_OUTPUT_DATA(type)						\
 	STRUCT_TO_CHAR_PTR(core_reloc_size_output) {			\
 		.int_sz = sizeof(((type *)0)->int_field),		\
+		.int_off = offsetof(type, int_field),			\
 		.struct_sz = sizeof(((type *)0)->struct_field),		\
+		.struct_off = offsetof(type, struct_field),		\
 		.union_sz = sizeof(((type *)0)->union_field),		\
+		.union_off = offsetof(type, union_field),		\
 		.arr_sz = sizeof(((type *)0)->arr_field),		\
-		.arr_elem_sz = sizeof(((type *)0)->arr_field[0]),	\
+		.arr_off = offsetof(type, arr_field),			\
+		.arr_elem_sz = sizeof(((type *)0)->arr_field[1]),	\
+		.arr_elem_off = offsetof(type, arr_field[1]),		\
 		.ptr_sz = 8, /* always 8-byte pointer for BPF */	\
+		.ptr_off = offsetof(type, ptr_field),			\
 		.enum_sz = sizeof(((type *)0)->enum_field),		\
+		.enum_off = offsetof(type, enum_field),			\
 		.float_sz = sizeof(((type *)0)->float_field),		\
+		.float_off = offsetof(type, float_field),		\
 	}
 
 #define SIZE_CASE(name) {						\
@@ -714,9 +722,10 @@ static const struct core_reloc_test_case test_cases[] = {
 	}),
 	BITFIELDS_ERR_CASE(bitfields___err_too_big_bitfield),
 
-	/* size relocation checks */
+	/* field size and offset relocation checks */
 	SIZE_CASE(size),
 	SIZE_CASE(size___diff_sz),
+	SIZE_CASE(size___diff_offs),
 	SIZE_ERR_CASE(size___err_ambiguous),
 
 	/* validate type existence and size relocations */
diff --git a/tools/testing/selftests/bpf/prog_tests/core_retro.c b/tools/testing/selftests/bpf/prog_tests/core_retro.c
index 6acb0e94d4d7..4a2c256c8db6 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_retro.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_retro.c
@@ -6,31 +6,32 @@
 
 void test_core_retro(void)
 {
-	int err, zero = 0, res, duration = 0, my_pid = getpid();
+	int err, zero = 0, res, my_pid = getpid();
 	struct test_core_retro *skel;
 
 	/* load program */
 	skel = test_core_retro__open_and_load();
-	if (CHECK(!skel, "skel_load", "skeleton open/load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "skel_load"))
 		goto out_close;
 
-	err = bpf_map_update_elem(bpf_map__fd(skel->maps.exp_tgid_map), &zero, &my_pid, 0);
-	if (CHECK(err, "map_update", "failed to set expected PID: %d\n", errno))
+	err = bpf_map__update_elem(skel->maps.exp_tgid_map, &zero, sizeof(zero),
+				   &my_pid, sizeof(my_pid), 0);
+	if (!ASSERT_OK(err, "map_update"))
 		goto out_close;
 
 	/* attach probe */
 	err = test_core_retro__attach(skel);
-	if (CHECK(err, "attach_kprobe", "err %d\n", err))
+	if (!ASSERT_OK(err, "attach_kprobe"))
 		goto out_close;
 
 	/* trigger */
 	usleep(1);
 
-	err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.results), &zero, &res);
-	if (CHECK(err, "map_lookup", "failed to lookup result: %d\n", errno))
+	err = bpf_map__lookup_elem(skel->maps.results, &zero, sizeof(zero), &res, sizeof(res), 0);
+	if (!ASSERT_OK(err, "map_lookup"))
 		goto out_close;
 
-	CHECK(res != my_pid, "pid_check", "got %d != exp %d\n", res, my_pid);
+	ASSERT_EQ(res, my_pid, "pid_check");
 
 out_close:
 	test_core_retro__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
new file mode 100644
index 000000000000..3c7aa82b98e2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <test_progs.h>
+#include "dynptr_fail.skel.h"
+#include "dynptr_success.skel.h"
+
+static size_t log_buf_sz = 1048576; /* 1 MB */
+static char obj_log_buf[1048576];
+
+static struct {
+	const char *prog_name;
+	const char *expected_err_msg;
+} dynptr_tests[] = {
+	/* failure cases */
+	{"ringbuf_missing_release1", "Unreleased reference id=1"},
+	{"ringbuf_missing_release2", "Unreleased reference id=2"},
+	{"ringbuf_missing_release_callback", "Unreleased reference id"},
+	{"use_after_invalid", "Expected an initialized dynptr as arg #3"},
+	{"ringbuf_invalid_api", "type=mem expected=alloc_mem"},
+	{"add_dynptr_to_map1", "invalid indirect read from stack"},
+	{"add_dynptr_to_map2", "invalid indirect read from stack"},
+	{"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"},
+	{"data_slice_out_of_bounds_map_value", "value is outside of the allowed memory range"},
+	{"data_slice_use_after_release", "invalid mem access 'scalar'"},
+	{"data_slice_missing_null_check1", "invalid mem access 'mem_or_null'"},
+	{"data_slice_missing_null_check2", "invalid mem access 'mem_or_null'"},
+	{"invalid_helper1", "invalid indirect read from stack"},
+	{"invalid_helper2", "Expected an initialized dynptr as arg #3"},
+	{"invalid_write1", "Expected an initialized dynptr as arg #1"},
+	{"invalid_write2", "Expected an initialized dynptr as arg #3"},
+	{"invalid_write3", "Expected an initialized ringbuf dynptr as arg #1"},
+	{"invalid_write4", "arg 1 is an unacquired reference"},
+	{"invalid_read1", "invalid read from stack"},
+	{"invalid_read2", "cannot pass in dynptr at an offset"},
+	{"invalid_read3", "invalid read from stack"},
+	{"invalid_read4", "invalid read from stack"},
+	{"invalid_offset", "invalid write to stack"},
+	{"global", "type=map_value expected=fp"},
+	{"release_twice", "arg 1 is an unacquired reference"},
+	{"release_twice_callback", "arg 1 is an unacquired reference"},
+	{"dynptr_from_mem_invalid_api",
+		"Unsupported reg type fp for bpf_dynptr_from_mem data"},
+
+	/* success cases */
+	{"test_read_write", NULL},
+	{"test_data_slice", NULL},
+	{"test_ringbuf", NULL},
+};
+
+static void verify_fail(const char *prog_name, const char *expected_err_msg)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+	struct bpf_program *prog;
+	struct dynptr_fail *skel;
+	int err;
+
+	opts.kernel_log_buf = obj_log_buf;
+	opts.kernel_log_size = log_buf_sz;
+	opts.kernel_log_level = 1;
+
+	skel = dynptr_fail__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "dynptr_fail__open_opts"))
+		goto cleanup;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto cleanup;
+
+	bpf_program__set_autoload(prog, true);
+
+	bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
+
+	err = dynptr_fail__load(skel);
+	if (!ASSERT_ERR(err, "unexpected load success"))
+		goto cleanup;
+
+	if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
+		fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
+		fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
+	}
+
+cleanup:
+	dynptr_fail__destroy(skel);
+}
+
+static void verify_success(const char *prog_name)
+{
+	struct dynptr_success *skel;
+	struct bpf_program *prog;
+	struct bpf_link *link;
+
+	skel = dynptr_success__open();
+	if (!ASSERT_OK_PTR(skel, "dynptr_success__open"))
+		return;
+
+	skel->bss->pid = getpid();
+
+	bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
+
+	dynptr_success__load(skel);
+	if (!ASSERT_OK_PTR(skel, "dynptr_success__load"))
+		goto cleanup;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto cleanup;
+
+	link = bpf_program__attach(prog);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+		goto cleanup;
+
+	usleep(1);
+
+	ASSERT_EQ(skel->bss->err, 0, "err");
+
+	bpf_link__destroy(link);
+
+cleanup:
+	dynptr_success__destroy(skel);
+}
+
+void test_dynptr(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(dynptr_tests); i++) {
+		if (!test__start_subtest(dynptr_tests[i].prog_name))
+			continue;
+
+		if (dynptr_tests[i].expected_err_msg)
+			verify_fail(dynptr_tests[i].prog_name,
+				    dynptr_tests[i].expected_err_msg);
+		else
+			verify_success(dynptr_tests[i].prog_name);
+	}
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
index 3ee2107bbf7a..a7e74297f15f 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -5,7 +5,7 @@
 /* that's kernel internal BPF_MAX_TRAMP_PROGS define */
 #define CNT 38
 
-void test_fexit_stress(void)
+void serial_test_fexit_stress(void)
 {
 	char test_skb[128] = {};
 	int fexit_fd[CNT] = {};
@@ -53,7 +53,7 @@ void test_fexit_stress(void)
 					    &trace_opts);
 		if (!ASSERT_GE(fexit_fd[i], 0, "fexit load"))
 			goto out;
-		link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]);
+		link_fd[i] = bpf_link_create(fexit_fd[i], 0, BPF_TRACE_FEXIT, NULL);
 		if (!ASSERT_GE(link_fd[i], 0, "fexit attach"))
 			goto out;
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
index 044df13ee069..8963f8a549f2 100644
--- a/tools/testing/selftests/bpf/prog_tests/for_each.c
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -4,14 +4,16 @@
 #include <network_helpers.h>
 #include "for_each_hash_map_elem.skel.h"
 #include "for_each_array_map_elem.skel.h"
+#include "for_each_map_elem_write_key.skel.h"
 
 static unsigned int duration;
 
 static void test_hash_map(void)
 {
-	int i, err, hashmap_fd, max_entries, percpu_map_fd;
+	int i, err, max_entries;
 	struct for_each_hash_map_elem *skel;
 	__u64 *percpu_valbuf = NULL;
+	size_t percpu_val_sz;
 	__u32 key, num_cpus;
 	__u64 val;
 	LIBBPF_OPTS(bpf_test_run_opts, topts,
@@ -24,26 +26,27 @@ static void test_hash_map(void)
 	if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load"))
 		return;
 
-	hashmap_fd = bpf_map__fd(skel->maps.hashmap);
 	max_entries = bpf_map__max_entries(skel->maps.hashmap);
 	for (i = 0; i < max_entries; i++) {
 		key = i;
 		val = i + 1;
-		err = bpf_map_update_elem(hashmap_fd, &key, &val, BPF_ANY);
+		err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key),
+					   &val, sizeof(val), BPF_ANY);
 		if (!ASSERT_OK(err, "map_update"))
 			goto out;
 	}
 
 	num_cpus = bpf_num_possible_cpus();
-	percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
-	percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
+	percpu_val_sz = sizeof(__u64) * num_cpus;
+	percpu_valbuf = malloc(percpu_val_sz);
 	if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
 		goto out;
 
 	key = 1;
 	for (i = 0; i < num_cpus; i++)
 		percpu_valbuf[i] = i + 1;
-	err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
+	err = bpf_map__update_elem(skel->maps.percpu_map, &key, sizeof(key),
+				   percpu_valbuf, percpu_val_sz, BPF_ANY);
 	if (!ASSERT_OK(err, "percpu_map_update"))
 		goto out;
 
@@ -57,7 +60,7 @@ static void test_hash_map(void)
 	ASSERT_EQ(skel->bss->hashmap_elems, max_entries, "hashmap_elems");
 
 	key = 1;
-	err = bpf_map_lookup_elem(hashmap_fd, &key, &val);
+	err = bpf_map__lookup_elem(skel->maps.hashmap, &key, sizeof(key), &val, sizeof(val), 0);
 	ASSERT_ERR(err, "hashmap_lookup");
 
 	ASSERT_EQ(skel->bss->percpu_called, 1, "percpu_called");
@@ -74,9 +77,10 @@ out:
 static void test_array_map(void)
 {
 	__u32 key, num_cpus, max_entries;
-	int i, arraymap_fd, percpu_map_fd, err;
+	int i, err;
 	struct for_each_array_map_elem *skel;
 	__u64 *percpu_valbuf = NULL;
+	size_t percpu_val_sz;
 	__u64 val, expected_total;
 	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.data_in = &pkt_v4,
@@ -88,7 +92,6 @@ static void test_array_map(void)
 	if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load"))
 		return;
 
-	arraymap_fd = bpf_map__fd(skel->maps.arraymap);
 	expected_total = 0;
 	max_entries = bpf_map__max_entries(skel->maps.arraymap);
 	for (i = 0; i < max_entries; i++) {
@@ -97,21 +100,23 @@ static void test_array_map(void)
 		/* skip the last iteration for expected total */
 		if (i != max_entries - 1)
 			expected_total += val;
-		err = bpf_map_update_elem(arraymap_fd, &key, &val, BPF_ANY);
+		err = bpf_map__update_elem(skel->maps.arraymap, &key, sizeof(key),
+					   &val, sizeof(val), BPF_ANY);
 		if (!ASSERT_OK(err, "map_update"))
 			goto out;
 	}
 
 	num_cpus = bpf_num_possible_cpus();
-	percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
-	percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
+	percpu_val_sz = sizeof(__u64) * num_cpus;
+	percpu_valbuf = malloc(percpu_val_sz);
 	if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
 		goto out;
 
 	key = 0;
 	for (i = 0; i < num_cpus; i++)
 		percpu_valbuf[i] = i + 1;
-	err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
+	err = bpf_map__update_elem(skel->maps.percpu_map, &key, sizeof(key),
+				   percpu_valbuf, percpu_val_sz, BPF_ANY);
 	if (!ASSERT_OK(err, "percpu_map_update"))
 		goto out;
 
@@ -129,10 +134,21 @@ out:
 	for_each_array_map_elem__destroy(skel);
 }
 
+static void test_write_map_key(void)
+{
+	struct for_each_map_elem_write_key *skel;
+
+	skel = for_each_map_elem_write_key__open_and_load();
+	if (!ASSERT_ERR_PTR(skel, "for_each_map_elem_write_key__open_and_load"))
+		for_each_map_elem_write_key__destroy(skel);
+}
+
 void test_for_each(void)
 {
 	if (test__start_subtest("hash_map"))
 		test_hash_map();
 	if (test__start_subtest("array_map"))
 		test_array_map();
+	if (test__start_subtest("write_map_key"))
+		test_write_map_key();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/helper_restricted.c b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c
index e1de5f80c3b2..0354f9b82c65 100644
--- a/tools/testing/selftests/bpf/prog_tests/helper_restricted.c
+++ b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c
@@ -6,11 +6,10 @@
 void test_helper_restricted(void)
 {
 	int prog_i = 0, prog_cnt;
-	int duration = 0;
 
 	do {
 		struct test_helper_restricted *test;
-		int maybeOK;
+		int err;
 
 		test = test_helper_restricted__open();
 		if (!ASSERT_OK_PTR(test, "open"))
@@ -21,12 +20,11 @@ void test_helper_restricted(void)
 		for (int j = 0; j < prog_cnt; ++j) {
 			struct bpf_program *prog = *test->skeleton->progs[j].prog;
 
-			maybeOK = bpf_program__set_autoload(prog, prog_i == j);
-			ASSERT_OK(maybeOK, "set autoload");
+			bpf_program__set_autoload(prog, true);
 		}
 
-		maybeOK = test_helper_restricted__load(test);
-		CHECK(!maybeOK, test->skeleton->progs[prog_i].name, "helper isn't restricted");
+		err = test_helper_restricted__load(test);
+		ASSERT_ERR(err, "load_should_fail");
 
 		test_helper_restricted__destroy(test);
 	} while (++prog_i < prog_cnt);
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index b9876b55fc0c..586dc52d6fb9 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -2,6 +2,9 @@
 #include <test_progs.h>
 #include "kprobe_multi.skel.h"
 #include "trace_helpers.h"
+#include "kprobe_multi_empty.skel.h"
+#include "bpf/libbpf_internal.h"
+#include "bpf/hashmap.h"
 
 static void kprobe_multi_test_run(struct kprobe_multi *skel, bool test_return)
 {
@@ -140,14 +143,14 @@ test_attach_api(const char *pattern, struct bpf_kprobe_multi_opts *opts)
 		goto cleanup;
 
 	skel->bss->pid = getpid();
-	link1 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+	link1 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						      pattern, opts);
 	if (!ASSERT_OK_PTR(link1, "bpf_program__attach_kprobe_multi_opts"))
 		goto cleanup;
 
 	if (opts) {
 		opts->retprobe = true;
-		link2 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kretprobe,
+		link2 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kretprobe_manual,
 							      pattern, opts);
 		if (!ASSERT_OK_PTR(link2, "bpf_program__attach_kprobe_multi_opts"))
 			goto cleanup;
@@ -232,7 +235,7 @@ static void test_attach_api_fails(void)
 	skel->bss->pid = getpid();
 
 	/* fail_1 - pattern and opts NULL */
-	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						     NULL, NULL);
 	if (!ASSERT_ERR_PTR(link, "fail_1"))
 		goto cleanup;
@@ -246,7 +249,7 @@ static void test_attach_api_fails(void)
 	opts.cnt = ARRAY_SIZE(syms);
 	opts.cookies = NULL;
 
-	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						     NULL, &opts);
 	if (!ASSERT_ERR_PTR(link, "fail_2"))
 		goto cleanup;
@@ -260,7 +263,7 @@ static void test_attach_api_fails(void)
 	opts.cnt = ARRAY_SIZE(syms);
 	opts.cookies = NULL;
 
-	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						     "ksys_*", &opts);
 	if (!ASSERT_ERR_PTR(link, "fail_3"))
 		goto cleanup;
@@ -274,7 +277,7 @@ static void test_attach_api_fails(void)
 	opts.cnt = ARRAY_SIZE(syms);
 	opts.cookies = NULL;
 
-	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						     "ksys_*", &opts);
 	if (!ASSERT_ERR_PTR(link, "fail_4"))
 		goto cleanup;
@@ -288,7 +291,7 @@ static void test_attach_api_fails(void)
 	opts.cnt = 0;
 	opts.cookies = cookies;
 
-	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
 						     "ksys_*", &opts);
 	if (!ASSERT_ERR_PTR(link, "fail_5"))
 		goto cleanup;
@@ -301,6 +304,146 @@ cleanup:
 	kprobe_multi__destroy(skel);
 }
 
+static inline __u64 get_time_ns(void)
+{
+	struct timespec t;
+
+	clock_gettime(CLOCK_MONOTONIC, &t);
+	return (__u64) t.tv_sec * 1000000000 + t.tv_nsec;
+}
+
+static size_t symbol_hash(const void *key, void *ctx __maybe_unused)
+{
+	return str_hash((const char *) key);
+}
+
+static bool symbol_equal(const void *key1, const void *key2, void *ctx __maybe_unused)
+{
+	return strcmp((const char *) key1, (const char *) key2) == 0;
+}
+
+static int get_syms(char ***symsp, size_t *cntp)
+{
+	size_t cap = 0, cnt = 0, i;
+	char *name, **syms = NULL;
+	struct hashmap *map;
+	char buf[256];
+	FILE *f;
+	int err;
+
+	/*
+	 * The available_filter_functions contains many duplicates,
+	 * but other than that all symbols are usable in kprobe multi
+	 * interface.
+	 * Filtering out duplicates by using hashmap__add, which won't
+	 * add existing entry.
+	 */
+	f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
+	if (!f)
+		return -EINVAL;
+
+	map = hashmap__new(symbol_hash, symbol_equal, NULL);
+	if (IS_ERR(map)) {
+		err = libbpf_get_error(map);
+		goto error;
+	}
+
+	while (fgets(buf, sizeof(buf), f)) {
+		/* skip modules */
+		if (strchr(buf, '['))
+			continue;
+		if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
+			continue;
+		/*
+		 * We attach to almost all kernel functions and some of them
+		 * will cause 'suspicious RCU usage' when fprobe is attached
+		 * to them. Filter out the current culprits - arch_cpu_idle
+		 * and rcu_* functions.
+		 */
+		if (!strcmp(name, "arch_cpu_idle"))
+			continue;
+		if (!strncmp(name, "rcu_", 4))
+			continue;
+		err = hashmap__add(map, name, NULL);
+		if (err) {
+			free(name);
+			if (err == -EEXIST)
+				continue;
+			goto error;
+		}
+		err = libbpf_ensure_mem((void **) &syms, &cap,
+					sizeof(*syms), cnt + 1);
+		if (err) {
+			free(name);
+			goto error;
+		}
+		syms[cnt] = name;
+		cnt++;
+	}
+
+	*symsp = syms;
+	*cntp = cnt;
+
+error:
+	fclose(f);
+	hashmap__free(map);
+	if (err) {
+		for (i = 0; i < cnt; i++)
+			free(syms[cnt]);
+		free(syms);
+	}
+	return err;
+}
+
+static void test_bench_attach(void)
+{
+	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+	struct kprobe_multi_empty *skel = NULL;
+	long attach_start_ns, attach_end_ns;
+	long detach_start_ns, detach_end_ns;
+	double attach_delta, detach_delta;
+	struct bpf_link *link = NULL;
+	char **syms = NULL;
+	size_t cnt, i;
+
+	if (!ASSERT_OK(get_syms(&syms, &cnt), "get_syms"))
+		return;
+
+	skel = kprobe_multi_empty__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
+		goto cleanup;
+
+	opts.syms = (const char **) syms;
+	opts.cnt = cnt;
+
+	attach_start_ns = get_time_ns();
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_empty,
+						     NULL, &opts);
+	attach_end_ns = get_time_ns();
+
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_kprobe_multi_opts"))
+		goto cleanup;
+
+	detach_start_ns = get_time_ns();
+	bpf_link__destroy(link);
+	detach_end_ns = get_time_ns();
+
+	attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0;
+	detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0;
+
+	printf("%s: found %lu functions\n", __func__, cnt);
+	printf("%s: attached in %7.3lfs\n", __func__, attach_delta);
+	printf("%s: detached in %7.3lfs\n", __func__, detach_delta);
+
+cleanup:
+	kprobe_multi_empty__destroy(skel);
+	if (syms) {
+		for (i = 0; i < cnt; i++)
+			free(syms[i]);
+		free(syms);
+	}
+}
+
 void test_kprobe_multi_test(void)
 {
 	if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
@@ -320,4 +463,6 @@ void test_kprobe_multi_test(void)
 		test_attach_api_syms();
 	if (test__start_subtest("attach_api_fails"))
 		test_attach_api_fails();
+	if (test__start_subtest("bench_attach"))
+		test_bench_attach();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
index f6933b06daf8..1d7a2f1e0731 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -138,12 +138,16 @@ cleanup:
 	test_ksyms_weak_lskel__destroy(skel);
 }
 
-static void test_write_check(void)
+static void test_write_check(bool test_handler1)
 {
 	struct test_ksyms_btf_write_check *skel;
 
-	skel = test_ksyms_btf_write_check__open_and_load();
-	ASSERT_ERR_PTR(skel, "unexpected load of a prog writing to ksym memory\n");
+	skel = test_ksyms_btf_write_check__open();
+	if (!ASSERT_OK_PTR(skel, "test_ksyms_btf_write_check__open"))
+		return;
+	bpf_program__set_autoload(test_handler1 ? skel->progs.handler2 : skel->progs.handler1, false);
+	ASSERT_ERR(test_ksyms_btf_write_check__load(skel),
+		   "unexpected load of a prog writing to ksym memory\n");
 
 	test_ksyms_btf_write_check__destroy(skel);
 }
@@ -179,6 +183,9 @@ void test_ksyms_btf(void)
 	if (test__start_subtest("weak_ksyms_lskel"))
 		test_weak_syms_lskel();
 
-	if (test__start_subtest("write_check"))
-		test_write_check();
+	if (test__start_subtest("write_check1"))
+		test_write_check(true);
+
+	if (test__start_subtest("write_check2"))
+		test_write_check(false);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
index e9916f2817ec..cad664546912 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
@@ -14,6 +14,12 @@ void test_linked_funcs(void)
 	if (!ASSERT_OK_PTR(skel, "skel_open"))
 		return;
 
+	/* handler1 and handler2 are marked as SEC("?raw_tp/sys_enter") and
+	 * are set to not autoload by default
+	 */
+	bpf_program__set_autoload(skel->progs.handler1, true);
+	bpf_program__set_autoload(skel->progs.handler2, true);
+
 	skel->rodata->my_tid = syscall(SYS_gettid);
 	skel->bss->syscall_id = SYS_getpgid;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
new file mode 100644
index 000000000000..f4ffdcabf4e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "test_log_fixup.skel.h"
+
+enum trunc_type {
+	TRUNC_NONE,
+	TRUNC_PARTIAL,
+	TRUNC_FULL,
+};
+
+static void bad_core_relo(size_t log_buf_size, enum trunc_type trunc_type)
+{
+	char log_buf[8 * 1024];
+	struct test_log_fixup* skel;
+	int err;
+
+	skel = test_log_fixup__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bpf_program__set_autoload(skel->progs.bad_relo, true);
+	memset(log_buf, 0, sizeof(log_buf));
+	bpf_program__set_log_buf(skel->progs.bad_relo, log_buf, log_buf_size ?: sizeof(log_buf));
+
+	err = test_log_fixup__load(skel);
+	if (!ASSERT_ERR(err, "load_fail"))
+		goto cleanup;
+
+	ASSERT_HAS_SUBSTR(log_buf,
+			  "0: <invalid CO-RE relocation>\n"
+			  "failed to resolve CO-RE relocation <byte_sz> ",
+			  "log_buf_part1");
+
+	switch (trunc_type) {
+	case TRUNC_NONE:
+		ASSERT_HAS_SUBSTR(log_buf,
+				  "struct task_struct___bad.fake_field (0:1 @ offset 4)\n",
+				  "log_buf_part2");
+		ASSERT_HAS_SUBSTR(log_buf,
+				  "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n",
+				  "log_buf_end");
+		break;
+	case TRUNC_PARTIAL:
+		/* we should get full libbpf message patch */
+		ASSERT_HAS_SUBSTR(log_buf,
+				  "struct task_struct___bad.fake_field (0:1 @ offset 4)\n",
+				  "log_buf_part2");
+		/* we shouldn't get full end of BPF verifier log */
+		ASSERT_NULL(strstr(log_buf, "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n"),
+			    "log_buf_end");
+		break;
+	case TRUNC_FULL:
+		/* we shouldn't get second part of libbpf message patch */
+		ASSERT_NULL(strstr(log_buf, "struct task_struct___bad.fake_field (0:1 @ offset 4)\n"),
+			    "log_buf_part2");
+		/* we shouldn't get full end of BPF verifier log */
+		ASSERT_NULL(strstr(log_buf, "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n"),
+			    "log_buf_end");
+		break;
+	}
+
+	if (env.verbosity > VERBOSE_NONE)
+		printf("LOG:   \n=================\n%s=================\n", log_buf);
+cleanup:
+	test_log_fixup__destroy(skel);
+}
+
+static void bad_core_relo_subprog(void)
+{
+	char log_buf[8 * 1024];
+	struct test_log_fixup* skel;
+	int err;
+
+	skel = test_log_fixup__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bpf_program__set_autoload(skel->progs.bad_relo_subprog, true);
+	bpf_program__set_log_buf(skel->progs.bad_relo_subprog, log_buf, sizeof(log_buf));
+
+	err = test_log_fixup__load(skel);
+	if (!ASSERT_ERR(err, "load_fail"))
+		goto cleanup;
+
+	ASSERT_HAS_SUBSTR(log_buf,
+			  ": <invalid CO-RE relocation>\n"
+			  "failed to resolve CO-RE relocation <byte_off> ",
+			  "log_buf");
+	ASSERT_HAS_SUBSTR(log_buf,
+			  "struct task_struct___bad.fake_field_subprog (0:2 @ offset 8)\n",
+			  "log_buf");
+
+	if (env.verbosity > VERBOSE_NONE)
+		printf("LOG:   \n=================\n%s=================\n", log_buf);
+
+cleanup:
+	test_log_fixup__destroy(skel);
+}
+
+static void missing_map(void)
+{
+	char log_buf[8 * 1024];
+	struct test_log_fixup* skel;
+	int err;
+
+	skel = test_log_fixup__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bpf_map__set_autocreate(skel->maps.missing_map, false);
+
+	bpf_program__set_autoload(skel->progs.use_missing_map, true);
+	bpf_program__set_log_buf(skel->progs.use_missing_map, log_buf, sizeof(log_buf));
+
+	err = test_log_fixup__load(skel);
+	if (!ASSERT_ERR(err, "load_fail"))
+		goto cleanup;
+
+	ASSERT_TRUE(bpf_map__autocreate(skel->maps.existing_map), "existing_map_autocreate");
+	ASSERT_FALSE(bpf_map__autocreate(skel->maps.missing_map), "missing_map_autocreate");
+
+	ASSERT_HAS_SUBSTR(log_buf,
+			  "8: <invalid BPF map reference>\n"
+			  "BPF map 'missing_map' is referenced but wasn't created\n",
+			  "log_buf");
+
+	if (env.verbosity > VERBOSE_NONE)
+		printf("LOG:   \n=================\n%s=================\n", log_buf);
+
+cleanup:
+	test_log_fixup__destroy(skel);
+}
+
+void test_log_fixup(void)
+{
+	if (test__start_subtest("bad_core_relo_trunc_none"))
+		bad_core_relo(0, TRUNC_NONE /* full buf */);
+	if (test__start_subtest("bad_core_relo_trunc_partial"))
+		bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */);
+	if (test__start_subtest("bad_core_relo_trunc_full"))
+		bad_core_relo(250, TRUNC_FULL  /* truncate also libbpf's message patch */);
+	if (test__start_subtest("bad_core_relo_subprog"))
+		bad_core_relo_subprog();
+	if (test__start_subtest("missing_map"))
+		missing_map();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
index beebfa9730e1..a767bb4a271c 100644
--- a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
+++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
@@ -112,7 +112,8 @@ static void test_lookup_and_delete_hash(void)
 
 	/* Lookup and delete element. */
 	key = 1;
-	err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+	err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map,
+					      &key, sizeof(key), &value, sizeof(value), 0);
 	if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
 		goto cleanup;
 
@@ -147,7 +148,8 @@ static void test_lookup_and_delete_percpu_hash(void)
 
 	/* Lookup and delete element. */
 	key = 1;
-	err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
+	err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map,
+					      &key, sizeof(key), value, sizeof(value), 0);
 	if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
 		goto cleanup;
 
@@ -191,7 +193,8 @@ static void test_lookup_and_delete_lru_hash(void)
 		goto cleanup;
 
 	/* Lookup and delete element 3. */
-	err = bpf_map_lookup_and_delete_elem(map_fd, &key, &value);
+	err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map,
+					      &key, sizeof(key), &value, sizeof(value), 0);
 	if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
 		goto cleanup;
 
@@ -240,10 +243,10 @@ static void test_lookup_and_delete_lru_percpu_hash(void)
 		value[i] = 0;
 
 	/* Lookup and delete element 3. */
-	err = bpf_map_lookup_and_delete_elem(map_fd, &key, value);
-	if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem")) {
+	err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map,
+					      &key, sizeof(key), value, sizeof(value), 0);
+	if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
 		goto cleanup;
-	}
 
 	/* Check if only one CPU has set the value. */
 	for (i = 0; i < nr_cpus; i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
new file mode 100644
index 000000000000..fdcea7a61491
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "map_kptr.skel.h"
+#include "map_kptr_fail.skel.h"
+
+static char log_buf[1024 * 1024];
+
+struct {
+	const char *prog_name;
+	const char *err_msg;
+} map_kptr_fail_tests[] = {
+	{ "size_not_bpf_dw", "kptr access size must be BPF_DW" },
+	{ "non_const_var_off", "kptr access cannot have variable offset" },
+	{ "non_const_var_off_kptr_xchg", "R1 doesn't have constant offset. kptr has to be" },
+	{ "misaligned_access_write", "kptr access misaligned expected=8 off=7" },
+	{ "misaligned_access_read", "kptr access misaligned expected=8 off=1" },
+	{ "reject_var_off_store", "variable untrusted_ptr_ access var_off=(0x0; 0x1e0)" },
+	{ "reject_bad_type_match", "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc" },
+	{ "marked_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" },
+	{ "correct_btf_id_check_size", "access beyond struct prog_test_ref_kfunc at off 32 size 4" },
+	{ "inherit_untrusted_on_walk", "R1 type=untrusted_ptr_ expected=percpu_ptr_" },
+	{ "reject_kptr_xchg_on_unref", "off=8 kptr isn't referenced kptr" },
+	{ "reject_kptr_get_no_map_val", "arg#0 expected pointer to map value" },
+	{ "reject_kptr_get_no_null_map_val", "arg#0 expected pointer to map value" },
+	{ "reject_kptr_get_no_kptr", "arg#0 no referenced kptr at map value offset=0" },
+	{ "reject_kptr_get_on_unref", "arg#0 no referenced kptr at map value offset=8" },
+	{ "reject_kptr_get_bad_type_match", "kernel function bpf_kfunc_call_test_kptr_get args#0" },
+	{ "mark_ref_as_untrusted_or_null", "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_" },
+	{ "reject_untrusted_store_to_ref", "store to referenced kptr disallowed" },
+	{ "reject_bad_type_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member" },
+	{ "reject_untrusted_xchg", "R2 type=untrusted_ptr_ expected=ptr_" },
+	{ "reject_member_of_ref_xchg", "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc" },
+	{ "reject_indirect_helper_access", "kptr cannot be accessed indirectly by helper" },
+	{ "reject_indirect_global_func_access", "kptr cannot be accessed indirectly by helper" },
+	{ "kptr_xchg_ref_state", "Unreleased reference id=5 alloc_insn=" },
+	{ "kptr_get_ref_state", "Unreleased reference id=3 alloc_insn=" },
+};
+
+static void test_map_kptr_fail_prog(const char *prog_name, const char *err_msg)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
+						.kernel_log_size = sizeof(log_buf),
+						.kernel_log_level = 1);
+	struct map_kptr_fail *skel;
+	struct bpf_program *prog;
+	int ret;
+
+	skel = map_kptr_fail__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "map_kptr_fail__open_opts"))
+		return;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto end;
+
+	bpf_program__set_autoload(prog, true);
+
+	ret = map_kptr_fail__load(skel);
+	if (!ASSERT_ERR(ret, "map_kptr__load must fail"))
+		goto end;
+
+	if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) {
+		fprintf(stderr, "Expected: %s\n", err_msg);
+		fprintf(stderr, "Verifier: %s\n", log_buf);
+	}
+
+end:
+	map_kptr_fail__destroy(skel);
+}
+
+static void test_map_kptr_fail(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(map_kptr_fail_tests); i++) {
+		if (!test__start_subtest(map_kptr_fail_tests[i].prog_name))
+			continue;
+		test_map_kptr_fail_prog(map_kptr_fail_tests[i].prog_name,
+					map_kptr_fail_tests[i].err_msg);
+	}
+}
+
+static void test_map_kptr_success(bool test_run)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
+	struct map_kptr *skel;
+	int key = 0, ret;
+	char buf[16];
+
+	skel = map_kptr__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load"))
+		return;
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref), &opts);
+	ASSERT_OK(ret, "test_map_kptr_ref refcount");
+	ASSERT_OK(opts.retval, "test_map_kptr_ref retval");
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref2), &opts);
+	ASSERT_OK(ret, "test_map_kptr_ref2 refcount");
+	ASSERT_OK(opts.retval, "test_map_kptr_ref2 retval");
+
+	if (test_run)
+		return;
+
+	ret = bpf_map__update_elem(skel->maps.array_map,
+				   &key, sizeof(key), buf, sizeof(buf), 0);
+	ASSERT_OK(ret, "array_map update");
+	ret = bpf_map__update_elem(skel->maps.array_map,
+				   &key, sizeof(key), buf, sizeof(buf), 0);
+	ASSERT_OK(ret, "array_map update2");
+
+	ret = bpf_map__update_elem(skel->maps.hash_map,
+				   &key, sizeof(key), buf, sizeof(buf), 0);
+	ASSERT_OK(ret, "hash_map update");
+	ret = bpf_map__delete_elem(skel->maps.hash_map, &key, sizeof(key), 0);
+	ASSERT_OK(ret, "hash_map delete");
+
+	ret = bpf_map__update_elem(skel->maps.hash_malloc_map,
+				   &key, sizeof(key), buf, sizeof(buf), 0);
+	ASSERT_OK(ret, "hash_malloc_map update");
+	ret = bpf_map__delete_elem(skel->maps.hash_malloc_map, &key, sizeof(key), 0);
+	ASSERT_OK(ret, "hash_malloc_map delete");
+
+	ret = bpf_map__update_elem(skel->maps.lru_hash_map,
+				   &key, sizeof(key), buf, sizeof(buf), 0);
+	ASSERT_OK(ret, "lru_hash_map update");
+	ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0);
+	ASSERT_OK(ret, "lru_hash_map delete");
+
+	map_kptr__destroy(skel);
+}
+
+void test_map_kptr(void)
+{
+	if (test__start_subtest("success")) {
+		test_map_kptr_success(false);
+		/* Do test_run twice, so that we see refcount going back to 1
+		 * after we leave it in map from first iteration.
+		 */
+		test_map_kptr_success(true);
+	}
+	test_map_kptr_fail();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c b/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c
new file mode 100644
index 000000000000..bfb1bf3fd427
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Bytedance */
+
+#include <test_progs.h>
+#include "test_map_lookup_percpu_elem.skel.h"
+
+void test_map_lookup_percpu_elem(void)
+{
+	struct test_map_lookup_percpu_elem *skel;
+	__u64 key = 0, sum;
+	int ret, i, nr_cpus = libbpf_num_possible_cpus();
+	__u64 *buf;
+
+	buf = malloc(nr_cpus*sizeof(__u64));
+	if (!ASSERT_OK_PTR(buf, "malloc"))
+		return;
+
+	for (i = 0; i < nr_cpus; i++)
+		buf[i] = i;
+	sum = (nr_cpus - 1) * nr_cpus / 2;
+
+	skel = test_map_lookup_percpu_elem__open();
+	if (!ASSERT_OK_PTR(skel, "test_map_lookup_percpu_elem__open"))
+		goto exit;
+
+	skel->rodata->my_pid = getpid();
+	skel->rodata->nr_cpus = nr_cpus;
+
+	ret = test_map_lookup_percpu_elem__load(skel);
+	if (!ASSERT_OK(ret, "test_map_lookup_percpu_elem__load"))
+		goto cleanup;
+
+	ret = test_map_lookup_percpu_elem__attach(skel);
+	if (!ASSERT_OK(ret, "test_map_lookup_percpu_elem__attach"))
+		goto cleanup;
+
+	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_array_map), &key, buf, 0);
+	ASSERT_OK(ret, "percpu_array_map update");
+
+	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_hash_map), &key, buf, 0);
+	ASSERT_OK(ret, "percpu_hash_map update");
+
+	ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_lru_hash_map), &key, buf, 0);
+	ASSERT_OK(ret, "percpu_lru_hash_map update");
+
+	syscall(__NR_getuid);
+
+	test_map_lookup_percpu_elem__detach(skel);
+
+	ASSERT_EQ(skel->bss->percpu_array_elem_sum, sum, "percpu_array lookup percpu elem");
+	ASSERT_EQ(skel->bss->percpu_hash_elem_sum, sum, "percpu_hash lookup percpu elem");
+	ASSERT_EQ(skel->bss->percpu_lru_hash_elem_sum, sum, "percpu_lru_hash lookup percpu elem");
+
+cleanup:
+	test_map_lookup_percpu_elem__destroy(skel);
+exit:
+	free(buf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
new file mode 100644
index 000000000000..59f08d6d1d53
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Tessares SA. */
+/* Copyright (c) 2022, SUSE. */
+
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "mptcp_sock.skel.h"
+
+#ifndef TCP_CA_NAME_MAX
+#define TCP_CA_NAME_MAX	16
+#endif
+
+struct mptcp_storage {
+	__u32 invoked;
+	__u32 is_mptcp;
+	struct sock *sk;
+	__u32 token;
+	struct sock *first;
+	char ca_name[TCP_CA_NAME_MAX];
+};
+
+static int verify_tsk(int map_fd, int client_fd)
+{
+	int err, cfd = client_fd;
+	struct mptcp_storage val;
+
+	err = bpf_map_lookup_elem(map_fd, &cfd, &val);
+	if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+		return err;
+
+	if (!ASSERT_EQ(val.invoked, 1, "unexpected invoked count"))
+		err++;
+
+	if (!ASSERT_EQ(val.is_mptcp, 0, "unexpected is_mptcp"))
+		err++;
+
+	return err;
+}
+
+static void get_msk_ca_name(char ca_name[])
+{
+	size_t len;
+	int fd;
+
+	fd = open("/proc/sys/net/ipv4/tcp_congestion_control", O_RDONLY);
+	if (!ASSERT_GE(fd, 0, "failed to open tcp_congestion_control"))
+		return;
+
+	len = read(fd, ca_name, TCP_CA_NAME_MAX);
+	if (!ASSERT_GT(len, 0, "failed to read ca_name"))
+		goto err;
+
+	if (len > 0 && ca_name[len - 1] == '\n')
+		ca_name[len - 1] = '\0';
+
+err:
+	close(fd);
+}
+
+static int verify_msk(int map_fd, int client_fd, __u32 token)
+{
+	char ca_name[TCP_CA_NAME_MAX];
+	int err, cfd = client_fd;
+	struct mptcp_storage val;
+
+	if (!ASSERT_GT(token, 0, "invalid token"))
+		return -1;
+
+	get_msk_ca_name(ca_name);
+
+	err = bpf_map_lookup_elem(map_fd, &cfd, &val);
+	if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+		return err;
+
+	if (!ASSERT_EQ(val.invoked, 1, "unexpected invoked count"))
+		err++;
+
+	if (!ASSERT_EQ(val.is_mptcp, 1, "unexpected is_mptcp"))
+		err++;
+
+	if (!ASSERT_EQ(val.token, token, "unexpected token"))
+		err++;
+
+	if (!ASSERT_EQ(val.first, val.sk, "unexpected first"))
+		err++;
+
+	if (!ASSERT_STRNEQ(val.ca_name, ca_name, TCP_CA_NAME_MAX, "unexpected ca_name"))
+		err++;
+
+	return err;
+}
+
+static int run_test(int cgroup_fd, int server_fd, bool is_mptcp)
+{
+	int client_fd, prog_fd, map_fd, err;
+	struct mptcp_sock *sock_skel;
+
+	sock_skel = mptcp_sock__open_and_load();
+	if (!ASSERT_OK_PTR(sock_skel, "skel_open_load"))
+		return -EIO;
+
+	err = mptcp_sock__attach(sock_skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto out;
+
+	prog_fd = bpf_program__fd(sock_skel->progs._sockops);
+	if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd")) {
+		err = -EIO;
+		goto out;
+	}
+
+	map_fd = bpf_map__fd(sock_skel->maps.socket_storage_map);
+	if (!ASSERT_GE(map_fd, 0, "bpf_map__fd")) {
+		err = -EIO;
+		goto out;
+	}
+
+	err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0);
+	if (!ASSERT_OK(err, "bpf_prog_attach"))
+		goto out;
+
+	client_fd = connect_to_fd(server_fd, 0);
+	if (!ASSERT_GE(client_fd, 0, "connect to fd")) {
+		err = -EIO;
+		goto out;
+	}
+
+	err += is_mptcp ? verify_msk(map_fd, client_fd, sock_skel->bss->token) :
+			  verify_tsk(map_fd, client_fd);
+
+	close(client_fd);
+
+out:
+	mptcp_sock__destroy(sock_skel);
+	return err;
+}
+
+static void test_base(void)
+{
+	int server_fd, cgroup_fd;
+
+	cgroup_fd = test__join_cgroup("/mptcp");
+	if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
+		return;
+
+	/* without MPTCP */
+	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
+	if (!ASSERT_GE(server_fd, 0, "start_server"))
+		goto with_mptcp;
+
+	ASSERT_OK(run_test(cgroup_fd, server_fd, false), "run_test tcp");
+
+	close(server_fd);
+
+with_mptcp:
+	/* with MPTCP */
+	server_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+	if (!ASSERT_GE(server_fd, 0, "start_mptcp_server"))
+		goto close_cgroup_fd;
+
+	ASSERT_OK(run_test(cgroup_fd, server_fd, true), "run_test mptcp");
+
+	close(server_fd);
+
+close_cgroup_fd:
+	close(cgroup_fd);
+}
+
+void test_mptcp(void)
+{
+	if (test__start_subtest("base"))
+		test_base();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c
index 954964f0ac3d..d3915c58d0e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/netcnt.c
+++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c
@@ -25,7 +25,7 @@ void serial_test_netcnt(void)
 	if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load"))
 		return;
 
-	nproc = get_nprocs_conf();
+	nproc = bpf_num_possible_cpus();
 	percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
 	if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)"))
 		goto err;
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
new file mode 100644
index 000000000000..14f2796076e0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#include "test_progs.h"
+#include "testing_helpers.h"
+
+static void clear_test_state(struct test_state *state)
+{
+	state->error_cnt = 0;
+	state->sub_succ_cnt = 0;
+	state->skip_cnt = 0;
+}
+
+void test_prog_tests_framework(void)
+{
+	struct test_state *state = env.test_state;
+
+	/* in all the ASSERT calls below we need to return on the first
+	 * error due to the fact that we are cleaning the test state after
+	 * each dummy subtest
+	 */
+
+	/* test we properly count skipped tests with subtests */
+	if (test__start_subtest("test_good_subtest"))
+		test__end_subtest();
+	if (!ASSERT_EQ(state->skip_cnt, 0, "skip_cnt_check"))
+		return;
+	if (!ASSERT_EQ(state->error_cnt, 0, "error_cnt_check"))
+		return;
+	if (!ASSERT_EQ(state->subtest_num, 1, "subtest_num_check"))
+		return;
+	clear_test_state(state);
+
+	if (test__start_subtest("test_skip_subtest")) {
+		test__skip();
+		test__end_subtest();
+	}
+	if (test__start_subtest("test_skip_subtest")) {
+		test__skip();
+		test__end_subtest();
+	}
+	if (!ASSERT_EQ(state->skip_cnt, 2, "skip_cnt_check"))
+		return;
+	if (!ASSERT_EQ(state->subtest_num, 3, "subtest_num_check"))
+		return;
+	clear_test_state(state);
+
+	if (test__start_subtest("test_fail_subtest")) {
+		test__fail();
+		test__end_subtest();
+	}
+	if (!ASSERT_EQ(state->error_cnt, 1, "error_cnt_check"))
+		return;
+	if (!ASSERT_EQ(state->subtest_num, 4, "subtest_num_check"))
+		return;
+	clear_test_state(state);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index 873323fb18ba..739d2ea6ca55 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -1,21 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 
-static void toggle_object_autoload_progs(const struct bpf_object *obj,
-					 const char *name_load)
-{
-	struct bpf_program *prog;
-
-	bpf_object__for_each_program(prog, obj) {
-		const char *name = bpf_program__name(prog);
-
-		if (!strcmp(name_load, name))
-			bpf_program__set_autoload(prog, true);
-		else
-			bpf_program__set_autoload(prog, false);
-	}
-}
-
 void test_reference_tracking(void)
 {
 	const char *file = "test_sk_lookup_kern.o";
@@ -39,6 +24,7 @@ void test_reference_tracking(void)
 		goto cleanup;
 
 	bpf_object__for_each_program(prog, obj_iter) {
+		struct bpf_program *p;
 		const char *name;
 
 		name = bpf_program__name(prog);
@@ -49,7 +35,12 @@ void test_reference_tracking(void)
 		if (!ASSERT_OK_PTR(obj, "obj_open_file"))
 			goto cleanup;
 
-		toggle_object_autoload_progs(obj, name);
+		/* all programs are not loaded by default, so just set
+		 * autoload to true for the single prog under test
+		 */
+		p = bpf_object__find_program_by_name(obj, name);
+		bpf_program__set_autoload(p, true);
+
 		/* Expect verifier failure if test name has 'err' */
 		if (strncmp(name, "err_", sizeof("err_") - 1) == 0) {
 			libbpf_print_fn_t old_print_fn;
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index e945195b24c9..eb5f7f5aa81a 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -50,18 +50,6 @@ void test_ringbuf_multi(void)
 	if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
 		return;
 
-	err = bpf_map__set_max_entries(skel->maps.ringbuf1, page_size);
-	if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
-		goto cleanup;
-
-	err = bpf_map__set_max_entries(skel->maps.ringbuf2, page_size);
-	if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
-		goto cleanup;
-
-	err = bpf_map__set_max_entries(bpf_map__inner_map(skel->maps.ringbuf_arr), page_size);
-	if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
-		goto cleanup;
-
 	proto_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, page_size, NULL);
 	if (CHECK(proto_fd < 0, "bpf_map_create", "bpf_map_create failed\n"))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c
new file mode 100644
index 000000000000..d7f83c0a40a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "skb_load_bytes.skel.h"
+
+void test_skb_load_bytes(void)
+{
+	struct skb_load_bytes *skel;
+	int err, prog_fd, test_result;
+	struct __sk_buff skb = { 0 };
+
+	LIBBPF_OPTS(bpf_test_run_opts, tattr,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.ctx_in = &skb,
+		.ctx_size_in = sizeof(skb),
+	);
+
+	skel = skb_load_bytes__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		return;
+
+	prog_fd = bpf_program__fd(skel->progs.skb_process);
+	if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
+		goto out;
+
+	skel->bss->load_offset = (uint32_t)(-1);
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+		goto out;
+	test_result = skel->bss->test_result;
+	if (!ASSERT_EQ(test_result, -EFAULT, "offset -1"))
+		goto out;
+
+	skel->bss->load_offset = (uint32_t)10;
+	err = bpf_prog_test_run_opts(prog_fd, &tattr);
+	if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+		goto out;
+	test_result = skel->bss->test_result;
+	if (!ASSERT_EQ(test_result, 0, "offset 10"))
+		goto out;
+
+out:
+	skb_load_bytes__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
index 394ebfc3bbf3..4be6fdb78c6a 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -83,8 +83,6 @@ cleanup:
 	test_snprintf__destroy(skel);
 }
 
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
 /* Loads an eBPF object calling bpf_snprintf with up to 10 characters of fmt */
 static int load_single_snprintf(char *fmt)
 {
@@ -95,7 +93,7 @@ static int load_single_snprintf(char *fmt)
 	if (!skel)
 		return -EINVAL;
 
-	memcpy(skel->rodata->fmt, fmt, min(strlen(fmt) + 1, 10));
+	memcpy(skel->rodata->fmt, fmt, MIN(strlen(fmt) + 1, 10));
 
 	ret = test_snprintf_single__load(skel);
 	test_snprintf_single__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index e8399ae50e77..9ad09a6c538a 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -8,7 +8,7 @@ void test_stacktrace_build_id(void)
 	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
 	struct test_stacktrace_build_id *skel;
 	int err, stack_trace_len;
-	__u32 key, previous_key, val, duration = 0;
+	__u32 key, prev_key, val, duration = 0;
 	char buf[256];
 	int i, j;
 	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
@@ -58,7 +58,7 @@ retry:
 		  "err %d errno %d\n", err, errno))
 		goto cleanup;
 
-	err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+	err = bpf_map__get_next_key(skel->maps.stackmap, NULL, &key, sizeof(key));
 	if (CHECK(err, "get_next_key from stackmap",
 		  "err %d, errno %d\n", err, errno))
 		goto cleanup;
@@ -79,8 +79,8 @@ retry:
 				if (strstr(buf, build_id) != NULL)
 					build_id_matches = 1;
 			}
-		previous_key = key;
-	} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+		prev_key = key;
+	} while (bpf_map__get_next_key(skel->maps.stackmap, &prev_key, &key, sizeof(key)) == 0);
 
 	/* stack_map_get_build_id_offset() is racy and sometimes can return
 	 * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index f45a1d7b0a28..f4ea1a215ce4 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -27,7 +27,7 @@ void test_stacktrace_build_id_nmi(void)
 		.type = PERF_TYPE_HARDWARE,
 		.config = PERF_COUNT_HW_CPU_CYCLES,
 	};
-	__u32 key, previous_key, val, duration = 0;
+	__u32 key, prev_key, val, duration = 0;
 	char buf[256];
 	int i, j;
 	struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
@@ -100,7 +100,7 @@ retry:
 		  "err %d errno %d\n", err, errno))
 		goto cleanup;
 
-	err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+	err = bpf_map__get_next_key(skel->maps.stackmap, NULL, &key, sizeof(key));
 	if (CHECK(err, "get_next_key from stackmap",
 		  "err %d, errno %d\n", err, errno))
 		goto cleanup;
@@ -108,7 +108,8 @@ retry:
 	do {
 		char build_id[64];
 
-		err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+		err = bpf_map__lookup_elem(skel->maps.stackmap, &key, sizeof(key),
+					   id_offs, sizeof(id_offs), 0);
 		if (CHECK(err, "lookup_elem from stackmap",
 			  "err %d, errno %d\n", err, errno))
 			goto cleanup;
@@ -121,8 +122,8 @@ retry:
 				if (strstr(buf, build_id) != NULL)
 					build_id_matches = 1;
 			}
-		previous_key = key;
-	} while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+		prev_key = key;
+	} while (bpf_map__get_next_key(skel->maps.stackmap, &prev_key, &key, sizeof(key)) == 0);
 
 	/* stack_map_get_build_id_offset() is racy and sometimes can return
 	 * BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID;
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index 7ad66a247c02..958dae769c52 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -949,7 +949,6 @@ fail:
 	return -1;
 }
 
-#define MAX(a, b) ((a) > (b) ? (a) : (b))
 enum {
 	SRC_TO_TARGET = 0,
 	TARGET_TO_SRC = 1,
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index 509e21d5cb9d..b90ee47d3111 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -81,6 +81,7 @@ void test_test_global_funcs(void)
 		{ "test_global_func14.o", "reference type('FWD S') size cannot be determined" },
 		{ "test_global_func15.o", "At program exit the register R0 has value" },
 		{ "test_global_func16.o", "invalid indirect read from stack" },
+		{ "test_global_func17.o", "Caller passes invalid args into func#1" },
 	};
 	libbpf_print_fn_t old_print_fn = NULL;
 	int err, i, duration = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_strncmp.c b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c
index b57a3009465f..7ddd6615b7e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_strncmp.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c
@@ -44,16 +44,12 @@ static void strncmp_full_str_cmp(struct strncmp_test *skel, const char *name,
 static void test_strncmp_ret(void)
 {
 	struct strncmp_test *skel;
-	struct bpf_program *prog;
 	int err, got;
 
 	skel = strncmp_test__open();
 	if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
 		return;
 
-	bpf_object__for_each_program(prog, skel->obj)
-		bpf_program__set_autoload(prog, false);
-
 	bpf_program__set_autoload(skel->progs.do_strncmp, true);
 
 	err = strncmp_test__load(skel);
@@ -91,18 +87,13 @@ out:
 static void test_strncmp_bad_not_const_str_size(void)
 {
 	struct strncmp_test *skel;
-	struct bpf_program *prog;
 	int err;
 
 	skel = strncmp_test__open();
 	if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
 		return;
 
-	bpf_object__for_each_program(prog, skel->obj)
-		bpf_program__set_autoload(prog, false);
-
-	bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size,
-				  true);
+	bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size, true);
 
 	err = strncmp_test__load(skel);
 	ASSERT_ERR(err, "strncmp_test load bad_not_const_str_size");
@@ -113,18 +104,13 @@ static void test_strncmp_bad_not_const_str_size(void)
 static void test_strncmp_bad_writable_target(void)
 {
 	struct strncmp_test *skel;
-	struct bpf_program *prog;
 	int err;
 
 	skel = strncmp_test__open();
 	if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
 		return;
 
-	bpf_object__for_each_program(prog, skel->obj)
-		bpf_program__set_autoload(prog, false);
-
-	bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target,
-				  true);
+	bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target, true);
 
 	err = strncmp_test__load(skel);
 	ASSERT_ERR(err, "strncmp_test load bad_writable_target");
@@ -135,18 +121,13 @@ static void test_strncmp_bad_writable_target(void)
 static void test_strncmp_bad_not_null_term_target(void)
 {
 	struct strncmp_test *skel;
-	struct bpf_program *prog;
 	int err;
 
 	skel = strncmp_test__open();
 	if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
 		return;
 
-	bpf_object__for_each_program(prog, skel->obj)
-		bpf_program__set_autoload(prog, false);
-
-	bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target,
-				  true);
+	bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target, true);
 
 	err = strncmp_test__load(skel);
 	ASSERT_ERR(err, "strncmp_test load bad_not_null_term_target");
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
new file mode 100644
index 000000000000..3bba4a2a0530
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * End-to-end eBPF tunnel test suite
+ *   The file tests BPF network tunnel implementation.
+ *
+ * Topology:
+ * ---------
+ *     root namespace   |     at_ns0 namespace
+ *                       |
+ *       -----------     |     -----------
+ *       | tnl dev |     |     | tnl dev |  (overlay network)
+ *       -----------     |     -----------
+ *       metadata-mode   |     metadata-mode
+ *        with bpf       |       with bpf
+ *                       |
+ *       ----------      |     ----------
+ *       |  veth1  | --------- |  veth0  |  (underlay network)
+ *       ----------    peer    ----------
+ *
+ *
+ *  Device Configuration
+ *  --------------------
+ *  root namespace with metadata-mode tunnel + BPF
+ *  Device names and addresses:
+ *	veth1 IP 1: 172.16.1.200, IPv6: 00::22 (underlay)
+ *		IP 2: 172.16.1.20, IPv6: 00::bb (underlay)
+ *	tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay)
+ *
+ *  Namespace at_ns0 with native tunnel
+ *  Device names and addresses:
+ *	veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
+ *	tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay)
+ *
+ *
+ * End-to-end ping packet flow
+ *  ---------------------------
+ *  Most of the tests start by namespace creation, device configuration,
+ *  then ping the underlay and overlay network.  When doing 'ping 10.1.1.100'
+ *  from root namespace, the following operations happen:
+ *  1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
+ *  2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
+ *     with local_ip=172.16.1.200, remote_ip=172.16.1.100. BPF program choose
+ *     the primary or secondary ip of veth1 as the local ip of tunnel. The
+ *     choice is made based on the value of bpf map local_ip_map.
+ *  3) Outer tunnel header is prepended and route the packet to veth1's egress.
+ *  4) veth0's ingress queue receive the tunneled packet at namespace at_ns0.
+ *  5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet.
+ *  6) Forward the packet to the overlay tnl dev.
+ */
+
+#include <arpa/inet.h>
+#include <linux/if_tun.h>
+#include <linux/limits.h>
+#include <linux/sysctl.h>
+#include <linux/time_types.h>
+#include <linux/net_tstamp.h>
+#include <net/if.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_tunnel_kern.skel.h"
+
+#define IP4_ADDR_VETH0 "172.16.1.100"
+#define IP4_ADDR1_VETH1 "172.16.1.200"
+#define IP4_ADDR2_VETH1 "172.16.1.20"
+#define IP4_ADDR_TUNL_DEV0 "10.1.1.100"
+#define IP4_ADDR_TUNL_DEV1 "10.1.1.200"
+
+#define IP6_ADDR_VETH0 "::11"
+#define IP6_ADDR1_VETH1 "::22"
+#define IP6_ADDR2_VETH1 "::bb"
+
+#define IP4_ADDR1_HEX_VETH1 0xac1001c8
+#define IP4_ADDR2_HEX_VETH1 0xac100114
+#define IP6_ADDR1_HEX_VETH1 0x22
+#define IP6_ADDR2_HEX_VETH1 0xbb
+
+#define MAC_TUNL_DEV0 "52:54:00:d9:01:00"
+#define MAC_TUNL_DEV1 "52:54:00:d9:02:00"
+
+#define VXLAN_TUNL_DEV0 "vxlan00"
+#define VXLAN_TUNL_DEV1 "vxlan11"
+#define IP6VXLAN_TUNL_DEV0 "ip6vxlan00"
+#define IP6VXLAN_TUNL_DEV1 "ip6vxlan11"
+
+#define PING_ARGS "-i 0.01 -c 3 -w 10 -q"
+
+#define SYS(fmt, ...)						\
+	({							\
+		char cmd[1024];					\
+		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);	\
+		if (!ASSERT_OK(system(cmd), cmd))		\
+			goto fail;				\
+	})
+
+#define SYS_NOFAIL(fmt, ...)					\
+	({							\
+		char cmd[1024];					\
+		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);	\
+		system(cmd);					\
+	})
+
+static int config_device(void)
+{
+	SYS("ip netns add at_ns0");
+	SYS("ip link add veth0 type veth peer name veth1");
+	SYS("ip link set veth0 netns at_ns0");
+	SYS("ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1");
+	SYS("ip addr add " IP4_ADDR2_VETH1 "/24 dev veth1");
+	SYS("ip link set dev veth1 up mtu 1500");
+	SYS("ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0");
+	SYS("ip netns exec at_ns0 ip link set dev veth0 up mtu 1500");
+
+	return 0;
+fail:
+	return -1;
+}
+
+static void cleanup(void)
+{
+	SYS_NOFAIL("test -f /var/run/netns/at_ns0 && ip netns delete at_ns0");
+	SYS_NOFAIL("ip link del veth1 2> /dev/null");
+	SYS_NOFAIL("ip link del %s 2> /dev/null", VXLAN_TUNL_DEV1);
+	SYS_NOFAIL("ip link del %s 2> /dev/null", IP6VXLAN_TUNL_DEV1);
+}
+
+static int add_vxlan_tunnel(void)
+{
+	/* at_ns0 namespace */
+	SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external gbp dstport 4789",
+	    VXLAN_TUNL_DEV0);
+	SYS("ip netns exec at_ns0 ip link set dev %s address %s up",
+	    VXLAN_TUNL_DEV0, MAC_TUNL_DEV0);
+	SYS("ip netns exec at_ns0 ip addr add dev %s %s/24",
+	    VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
+	SYS("ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s",
+	    IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0);
+
+	/* root namespace */
+	SYS("ip link add dev %s type vxlan external gbp dstport 4789",
+	    VXLAN_TUNL_DEV1);
+	SYS("ip link set dev %s address %s up", VXLAN_TUNL_DEV1, MAC_TUNL_DEV1);
+	SYS("ip addr add dev %s %s/24", VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1);
+	SYS("ip neigh add %s lladdr %s dev %s",
+	    IP4_ADDR_TUNL_DEV0, MAC_TUNL_DEV0, VXLAN_TUNL_DEV1);
+
+	return 0;
+fail:
+	return -1;
+}
+
+static void delete_vxlan_tunnel(void)
+{
+	SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s",
+		   VXLAN_TUNL_DEV0);
+	SYS_NOFAIL("ip link delete dev %s", VXLAN_TUNL_DEV1);
+}
+
+static int add_ip6vxlan_tunnel(void)
+{
+	SYS("ip netns exec at_ns0 ip -6 addr add %s/96 dev veth0",
+	    IP6_ADDR_VETH0);
+	SYS("ip netns exec at_ns0 ip link set dev veth0 up");
+	SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR1_VETH1);
+	SYS("ip -6 addr add %s/96 dev veth1", IP6_ADDR2_VETH1);
+	SYS("ip link set dev veth1 up");
+
+	/* at_ns0 namespace */
+	SYS("ip netns exec at_ns0 ip link add dev %s type vxlan external dstport 4789",
+	    IP6VXLAN_TUNL_DEV0);
+	SYS("ip netns exec at_ns0 ip addr add dev %s %s/24",
+	    IP6VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
+	SYS("ip netns exec at_ns0 ip link set dev %s address %s up",
+	    IP6VXLAN_TUNL_DEV0, MAC_TUNL_DEV0);
+
+	/* root namespace */
+	SYS("ip link add dev %s type vxlan external dstport 4789",
+	    IP6VXLAN_TUNL_DEV1);
+	SYS("ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1);
+	SYS("ip link set dev %s address %s up",
+	    IP6VXLAN_TUNL_DEV1, MAC_TUNL_DEV1);
+
+	return 0;
+fail:
+	return -1;
+}
+
+static void delete_ip6vxlan_tunnel(void)
+{
+	SYS_NOFAIL("ip netns exec at_ns0 ip -6 addr delete %s/96 dev veth0",
+		   IP6_ADDR_VETH0);
+	SYS_NOFAIL("ip -6 addr delete %s/96 dev veth1", IP6_ADDR1_VETH1);
+	SYS_NOFAIL("ip -6 addr delete %s/96 dev veth1", IP6_ADDR2_VETH1);
+	SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s",
+		   IP6VXLAN_TUNL_DEV0);
+	SYS_NOFAIL("ip link delete dev %s", IP6VXLAN_TUNL_DEV1);
+}
+
+static int test_ping(int family, const char *addr)
+{
+	SYS("%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
+	return 0;
+fail:
+	return -1;
+}
+
+static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd)
+{
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
+			    .priority = 1, .prog_fd = igr_fd);
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
+			    .priority = 1, .prog_fd = egr_fd);
+	int ret;
+
+	ret = bpf_tc_hook_create(hook);
+	if (!ASSERT_OK(ret, "create tc hook"))
+		return ret;
+
+	if (igr_fd >= 0) {
+		hook->attach_point = BPF_TC_INGRESS;
+		ret = bpf_tc_attach(hook, &opts1);
+		if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+			bpf_tc_hook_destroy(hook);
+			return ret;
+		}
+	}
+
+	if (egr_fd >= 0) {
+		hook->attach_point = BPF_TC_EGRESS;
+		ret = bpf_tc_attach(hook, &opts2);
+		if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+			bpf_tc_hook_destroy(hook);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void test_vxlan_tunnel(void)
+{
+	struct test_tunnel_kern *skel = NULL;
+	struct nstoken *nstoken;
+	int local_ip_map_fd = -1;
+	int set_src_prog_fd, get_src_prog_fd;
+	int set_dst_prog_fd;
+	int key = 0, ifindex = -1;
+	uint local_ip;
+	int err;
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+			    .attach_point = BPF_TC_INGRESS);
+
+	/* add vxlan tunnel */
+	err = add_vxlan_tunnel();
+	if (!ASSERT_OK(err, "add vxlan tunnel"))
+		goto done;
+
+	/* load and attach bpf prog to tunnel dev tc hook point */
+	skel = test_tunnel_kern__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+		goto done;
+	ifindex = if_nametoindex(VXLAN_TUNL_DEV1);
+	if (!ASSERT_NEQ(ifindex, 0, "vxlan11 ifindex"))
+		goto done;
+	tc_hook.ifindex = ifindex;
+	get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src);
+	set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src);
+	if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
+		goto done;
+	if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
+		goto done;
+	if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+		goto done;
+
+	/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
+	nstoken = open_netns("at_ns0");
+	if (!ASSERT_OK_PTR(nstoken, "setns src"))
+		goto done;
+	ifindex = if_nametoindex(VXLAN_TUNL_DEV0);
+	if (!ASSERT_NEQ(ifindex, 0, "vxlan00 ifindex"))
+		goto done;
+	tc_hook.ifindex = ifindex;
+	set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst);
+	if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
+		goto done;
+	if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd))
+		goto done;
+	close_netns(nstoken);
+
+	/* use veth1 ip 2 as tunnel source ip */
+	local_ip_map_fd = bpf_map__fd(skel->maps.local_ip_map);
+	if (!ASSERT_GE(local_ip_map_fd, 0, "bpf_map__fd"))
+		goto done;
+	local_ip = IP4_ADDR2_HEX_VETH1;
+	err = bpf_map_update_elem(local_ip_map_fd, &key, &local_ip, BPF_ANY);
+	if (!ASSERT_OK(err, "update bpf local_ip_map"))
+		goto done;
+
+	/* ping test */
+	err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
+	if (!ASSERT_OK(err, "test_ping"))
+		goto done;
+
+done:
+	/* delete vxlan tunnel */
+	delete_vxlan_tunnel();
+	if (local_ip_map_fd >= 0)
+		close(local_ip_map_fd);
+	if (skel)
+		test_tunnel_kern__destroy(skel);
+}
+
+static void test_ip6vxlan_tunnel(void)
+{
+	struct test_tunnel_kern *skel = NULL;
+	struct nstoken *nstoken;
+	int local_ip_map_fd = -1;
+	int set_src_prog_fd, get_src_prog_fd;
+	int set_dst_prog_fd;
+	int key = 0, ifindex = -1;
+	uint local_ip;
+	int err;
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+			    .attach_point = BPF_TC_INGRESS);
+
+	/* add vxlan tunnel */
+	err = add_ip6vxlan_tunnel();
+	if (!ASSERT_OK(err, "add_ip6vxlan_tunnel"))
+		goto done;
+
+	/* load and attach bpf prog to tunnel dev tc hook point */
+	skel = test_tunnel_kern__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+		goto done;
+	ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV1);
+	if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan11 ifindex"))
+		goto done;
+	tc_hook.ifindex = ifindex;
+	get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src);
+	set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src);
+	if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
+		goto done;
+	if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
+		goto done;
+	if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+		goto done;
+
+	/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
+	nstoken = open_netns("at_ns0");
+	if (!ASSERT_OK_PTR(nstoken, "setns src"))
+		goto done;
+	ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV0);
+	if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan00 ifindex"))
+		goto done;
+	tc_hook.ifindex = ifindex;
+	set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst);
+	if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
+		goto done;
+	if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd))
+		goto done;
+	close_netns(nstoken);
+
+	/* use veth1 ip 2 as tunnel source ip */
+	local_ip_map_fd = bpf_map__fd(skel->maps.local_ip_map);
+	if (!ASSERT_GE(local_ip_map_fd, 0, "get local_ip_map fd"))
+		goto done;
+	local_ip = IP6_ADDR2_HEX_VETH1;
+	err = bpf_map_update_elem(local_ip_map_fd, &key, &local_ip, BPF_ANY);
+	if (!ASSERT_OK(err, "update bpf local_ip_map"))
+		goto done;
+
+	/* ping test */
+	err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
+	if (!ASSERT_OK(err, "test_ping"))
+		goto done;
+
+done:
+	/* delete ipv6 vxlan tunnel */
+	delete_ip6vxlan_tunnel();
+	if (local_ip_map_fd >= 0)
+		close(local_ip_map_fd);
+	if (skel)
+		test_tunnel_kern__destroy(skel);
+}
+
+#define RUN_TEST(name)							\
+	({								\
+		if (test__start_subtest(#name)) {			\
+			test_ ## name();				\
+		}							\
+	})
+
+static void *test_tunnel_run_tests(void *arg)
+{
+	cleanup();
+	config_device();
+
+	RUN_TEST(vxlan_tunnel);
+	RUN_TEST(ip6vxlan_tunnel);
+
+	cleanup();
+
+	return NULL;
+}
+
+void serial_test_tunnel(void)
+{
+	pthread_t test_thread;
+	int err;
+
+	/* Run the tests in their own thread to isolate the namespace changes
+	 * so they do not affect the environment of other tests.
+	 * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+	 */
+	err = pthread_create(&test_thread, NULL, &test_tunnel_run_tests, NULL);
+	if (ASSERT_OK(err, "pthread_create"))
+		ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
index 2ee5f5ae11d4..9ff7843909e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
@@ -35,7 +35,7 @@ static int timer_mim(struct timer_mim *timer_skel)
 	ASSERT_EQ(timer_skel->bss->ok, 1 | 2, "ok");
 
 	close(bpf_map__fd(timer_skel->maps.inner_htab));
-	err = bpf_map_delete_elem(bpf_map__fd(timer_skel->maps.outer_arr), &key1);
+	err = bpf_map__delete_elem(timer_skel->maps.outer_arr, &key1, sizeof(key1), 0);
 	ASSERT_EQ(err, 0, "delete inner map");
 
 	/* check that timer_cb[12] are no longer running */
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index 9c795ee52b7b..b0acbda6dbf5 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -1,126 +1,94 @@
 // SPDX-License-Identifier: GPL-2.0-only
 #define _GNU_SOURCE
-#include <sched.h>
-#include <sys/prctl.h>
 #include <test_progs.h>
 
 #define MAX_TRAMP_PROGS 38
 
 struct inst {
 	struct bpf_object *obj;
-	struct bpf_link   *link_fentry;
-	struct bpf_link   *link_fexit;
+	struct bpf_link   *link;
 };
 
-static int test_task_rename(void)
-{
-	int fd, duration = 0, err;
-	char buf[] = "test_overhead";
-
-	fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
-	if (CHECK(fd < 0, "open /proc", "err %d", errno))
-		return -1;
-	err = write(fd, buf, sizeof(buf));
-	if (err < 0) {
-		CHECK(err < 0, "task rename", "err %d", errno);
-		close(fd);
-		return -1;
-	}
-	close(fd);
-	return 0;
-}
-
-static struct bpf_link *load(struct bpf_object *obj, const char *name)
+static struct bpf_program *load_prog(char *file, char *name, struct inst *inst)
 {
+	struct bpf_object *obj;
 	struct bpf_program *prog;
-	int duration = 0;
+	int err;
+
+	obj = bpf_object__open_file(file, NULL);
+	if (!ASSERT_OK_PTR(obj, "obj_open_file"))
+		return NULL;
+
+	inst->obj = obj;
+
+	err = bpf_object__load(obj);
+	if (!ASSERT_OK(err, "obj_load"))
+		return NULL;
 
 	prog = bpf_object__find_program_by_name(obj, name);
-	if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name))
-		return ERR_PTR(-EINVAL);
-	return bpf_program__attach_trace(prog);
+	if (!ASSERT_OK_PTR(prog, "obj_find_prog"))
+		return NULL;
+
+	return prog;
 }
 
 /* TODO: use different target function to run in concurrent mode */
 void serial_test_trampoline_count(void)
 {
-	const char *fentry_name = "prog1";
-	const char *fexit_name = "prog2";
-	const char *object = "test_trampoline_count.o";
-	struct inst inst[MAX_TRAMP_PROGS] = {};
-	int err, i = 0, duration = 0;
-	struct bpf_object *obj;
+	char *file = "test_trampoline_count.o";
+	char *const progs[] = { "fentry_test", "fmod_ret_test", "fexit_test" };
+	struct inst inst[MAX_TRAMP_PROGS + 1] = {};
+	struct bpf_program *prog;
 	struct bpf_link *link;
-	char comm[16] = {};
+	int prog_fd, err, i;
+	LIBBPF_OPTS(bpf_test_run_opts, opts);
 
 	/* attach 'allowed' trampoline programs */
 	for (i = 0; i < MAX_TRAMP_PROGS; i++) {
-		obj = bpf_object__open_file(object, NULL);
-		if (!ASSERT_OK_PTR(obj, "obj_open_file")) {
-			obj = NULL;
+		prog = load_prog(file, progs[i % ARRAY_SIZE(progs)], &inst[i]);
+		if (!prog)
 			goto cleanup;
-		}
 
-		err = bpf_object__load(obj);
-		if (CHECK(err, "obj_load", "err %d\n", err))
+		link = bpf_program__attach(prog);
+		if (!ASSERT_OK_PTR(link, "attach_prog"))
 			goto cleanup;
-		inst[i].obj = obj;
-		obj = NULL;
-
-		if (rand() % 2) {
-			link = load(inst[i].obj, fentry_name);
-			if (!ASSERT_OK_PTR(link, "attach_prog")) {
-				link = NULL;
-				goto cleanup;
-			}
-			inst[i].link_fentry = link;
-		} else {
-			link = load(inst[i].obj, fexit_name);
-			if (!ASSERT_OK_PTR(link, "attach_prog")) {
-				link = NULL;
-				goto cleanup;
-			}
-			inst[i].link_fexit = link;
-		}
+
+		inst[i].link = link;
 	}
 
 	/* and try 1 extra.. */
-	obj = bpf_object__open_file(object, NULL);
-	if (!ASSERT_OK_PTR(obj, "obj_open_file")) {
-		obj = NULL;
+	prog = load_prog(file, "fmod_ret_test", &inst[i]);
+	if (!prog)
 		goto cleanup;
-	}
-
-	err = bpf_object__load(obj);
-	if (CHECK(err, "obj_load", "err %d\n", err))
-		goto cleanup_extra;
 
 	/* ..that needs to fail */
-	link = load(obj, fentry_name);
-	err = libbpf_get_error(link);
-	if (!ASSERT_ERR_PTR(link, "cannot attach over the limit")) {
-		bpf_link__destroy(link);
-		goto cleanup_extra;
+	link = bpf_program__attach(prog);
+	if (!ASSERT_ERR_PTR(link, "attach_prog")) {
+		inst[i].link = link;
+		goto cleanup;
 	}
 
 	/* with E2BIG error */
-	ASSERT_EQ(err, -E2BIG, "proper error check");
-	ASSERT_EQ(link, NULL, "ptr_is_null");
+	if (!ASSERT_EQ(libbpf_get_error(link), -E2BIG, "E2BIG"))
+		goto cleanup;
+	if (!ASSERT_EQ(link, NULL, "ptr_is_null"))
+		goto cleanup;
 
 	/* and finaly execute the probe */
-	if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
-		goto cleanup_extra;
-	CHECK_FAIL(test_task_rename());
-	CHECK_FAIL(prctl(PR_SET_NAME, comm, 0L, 0L, 0L));
+	prog_fd = bpf_program__fd(prog);
+	if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd"))
+		goto cleanup;
+
+	err = bpf_prog_test_run_opts(prog_fd, &opts);
+	if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+		goto cleanup;
+
+	ASSERT_EQ(opts.retval & 0xffff, 4, "bpf_modify_return_test.result");
+	ASSERT_EQ(opts.retval >> 16, 1, "bpf_modify_return_test.side_effect");
 
-cleanup_extra:
-	bpf_object__close(obj);
 cleanup:
-	if (i >= MAX_TRAMP_PROGS)
-		i = MAX_TRAMP_PROGS - 1;
 	for (; i >= 0; i--) {
-		bpf_link__destroy(inst[i].link_fentry);
-		bpf_link__destroy(inst[i].link_fexit);
+		bpf_link__destroy(inst[i].link);
 		bpf_object__close(inst[i].obj);
 	}
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
new file mode 100644
index 000000000000..1ed3cc2092db
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "test_unpriv_bpf_disabled.skel.h"
+
+#include "cap_helpers.h"
+
+/* Using CAP_LAST_CAP is risky here, since it can get pulled in from
+ * an old /usr/include/linux/capability.h and be < CAP_BPF; as a result
+ * CAP_BPF would not be included in ALL_CAPS.  Instead use CAP_BPF as
+ * we know its value is correct since it is explicitly defined in
+ * cap_helpers.h.
+ */
+#define ALL_CAPS	((2ULL << CAP_BPF) - 1)
+
+#define PINPATH		"/sys/fs/bpf/unpriv_bpf_disabled_"
+#define NUM_MAPS	7
+
+static __u32 got_perfbuf_val;
+static __u32 got_ringbuf_val;
+
+static int process_ringbuf(void *ctx, void *data, size_t len)
+{
+	if (ASSERT_EQ(len, sizeof(__u32), "ringbuf_size_valid"))
+		got_ringbuf_val = *(__u32 *)data;
+	return 0;
+}
+
+static void process_perfbuf(void *ctx, int cpu, void *data, __u32 len)
+{
+	if (ASSERT_EQ(len, sizeof(__u32), "perfbuf_size_valid"))
+		got_perfbuf_val = *(__u32 *)data;
+}
+
+static int sysctl_set(const char *sysctl_path, char *old_val, const char *new_val)
+{
+	int ret = 0;
+	FILE *fp;
+
+	fp = fopen(sysctl_path, "r+");
+	if (!fp)
+		return -errno;
+	if (old_val && fscanf(fp, "%s", old_val) <= 0) {
+		ret = -ENOENT;
+	} else if (!old_val || strcmp(old_val, new_val) != 0) {
+		fseek(fp, 0, SEEK_SET);
+		if (fprintf(fp, "%s", new_val) < 0)
+			ret = -errno;
+	}
+	fclose(fp);
+
+	return ret;
+}
+
+static void test_unpriv_bpf_disabled_positive(struct test_unpriv_bpf_disabled *skel,
+					      __u32 prog_id, int prog_fd, int perf_fd,
+					      char **map_paths, int *map_fds)
+{
+	struct perf_buffer *perfbuf = NULL;
+	struct ring_buffer *ringbuf = NULL;
+	int i, nr_cpus, link_fd = -1;
+
+	nr_cpus = bpf_num_possible_cpus();
+
+	skel->bss->perfbuf_val = 1;
+	skel->bss->ringbuf_val = 2;
+
+	/* Positive tests for unprivileged BPF disabled. Verify we can
+	 * - retrieve and interact with pinned maps;
+	 * - set up and interact with perf buffer;
+	 * - set up and interact with ring buffer;
+	 * - create a link
+	 */
+	perfbuf = perf_buffer__new(bpf_map__fd(skel->maps.perfbuf), 8, process_perfbuf, NULL, NULL,
+				   NULL);
+	if (!ASSERT_OK_PTR(perfbuf, "perf_buffer__new"))
+		goto cleanup;
+
+	ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf), process_ringbuf, NULL, NULL);
+	if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new"))
+		goto cleanup;
+
+	/* trigger & validate perf event, ringbuf output */
+	usleep(1);
+
+	ASSERT_GT(perf_buffer__poll(perfbuf, 100), -1, "perf_buffer__poll");
+	ASSERT_EQ(got_perfbuf_val, skel->bss->perfbuf_val, "check_perfbuf_val");
+	ASSERT_EQ(ring_buffer__consume(ringbuf), 1, "ring_buffer__consume");
+	ASSERT_EQ(got_ringbuf_val, skel->bss->ringbuf_val, "check_ringbuf_val");
+
+	for (i = 0; i < NUM_MAPS; i++) {
+		map_fds[i] = bpf_obj_get(map_paths[i]);
+		if (!ASSERT_GT(map_fds[i], -1, "obj_get"))
+			goto cleanup;
+	}
+
+	for (i = 0; i < NUM_MAPS; i++) {
+		bool prog_array = strstr(map_paths[i], "prog_array") != NULL;
+		bool array = strstr(map_paths[i], "array") != NULL;
+		bool buf = strstr(map_paths[i], "buf") != NULL;
+		__u32 key = 0, vals[nr_cpus], lookup_vals[nr_cpus];
+		__u32 expected_val = 1;
+		int j;
+
+		/* skip ringbuf, perfbuf */
+		if (buf)
+			continue;
+
+		for (j = 0; j < nr_cpus; j++)
+			vals[j] = expected_val;
+
+		if (prog_array) {
+			/* need valid prog array value */
+			vals[0] = prog_fd;
+			/* prog array lookup returns prog id, not fd */
+			expected_val = prog_id;
+		}
+		ASSERT_OK(bpf_map_update_elem(map_fds[i], &key, vals, 0), "map_update_elem");
+		ASSERT_OK(bpf_map_lookup_elem(map_fds[i], &key, &lookup_vals), "map_lookup_elem");
+		ASSERT_EQ(lookup_vals[0], expected_val, "map_lookup_elem_values");
+		if (!array)
+			ASSERT_OK(bpf_map_delete_elem(map_fds[i], &key), "map_delete_elem");
+	}
+
+	link_fd = bpf_link_create(bpf_program__fd(skel->progs.handle_perf_event), perf_fd,
+				  BPF_PERF_EVENT, NULL);
+	ASSERT_GT(link_fd, 0, "link_create");
+
+cleanup:
+	if (link_fd)
+		close(link_fd);
+	if (perfbuf)
+		perf_buffer__free(perfbuf);
+	if (ringbuf)
+		ring_buffer__free(ringbuf);
+}
+
+static void test_unpriv_bpf_disabled_negative(struct test_unpriv_bpf_disabled *skel,
+					      __u32 prog_id, int prog_fd, int perf_fd,
+					      char **map_paths, int *map_fds)
+{
+	const struct bpf_insn prog_insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn);
+	LIBBPF_OPTS(bpf_prog_load_opts, load_opts);
+	struct bpf_map_info map_info = {};
+	__u32 map_info_len = sizeof(map_info);
+	struct bpf_link_info link_info = {};
+	__u32 link_info_len = sizeof(link_info);
+	struct btf *btf = NULL;
+	__u32 attach_flags = 0;
+	__u32 prog_ids[3] = {};
+	__u32 prog_cnt = 3;
+	__u32 next;
+	int i;
+
+	/* Negative tests for unprivileged BPF disabled.  Verify we cannot
+	 * - load BPF programs;
+	 * - create BPF maps;
+	 * - get a prog/map/link fd by id;
+	 * - get next prog/map/link id
+	 * - query prog
+	 * - BTF load
+	 */
+	ASSERT_EQ(bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "simple_prog", "GPL",
+				prog_insns, prog_insn_cnt, &load_opts),
+		  -EPERM, "prog_load_fails");
+
+	for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_BLOOM_FILTER; i++)
+		ASSERT_EQ(bpf_map_create(i, NULL, sizeof(int), sizeof(int), 1, NULL),
+			  -EPERM, "map_create_fails");
+
+	ASSERT_EQ(bpf_prog_get_fd_by_id(prog_id), -EPERM, "prog_get_fd_by_id_fails");
+	ASSERT_EQ(bpf_prog_get_next_id(prog_id, &next), -EPERM, "prog_get_next_id_fails");
+	ASSERT_EQ(bpf_prog_get_next_id(0, &next), -EPERM, "prog_get_next_id_fails");
+
+	if (ASSERT_OK(bpf_obj_get_info_by_fd(map_fds[0], &map_info, &map_info_len),
+		      "obj_get_info_by_fd")) {
+		ASSERT_EQ(bpf_map_get_fd_by_id(map_info.id), -EPERM, "map_get_fd_by_id_fails");
+		ASSERT_EQ(bpf_map_get_next_id(map_info.id, &next), -EPERM,
+			  "map_get_next_id_fails");
+	}
+	ASSERT_EQ(bpf_map_get_next_id(0, &next), -EPERM, "map_get_next_id_fails");
+
+	if (ASSERT_OK(bpf_obj_get_info_by_fd(bpf_link__fd(skel->links.sys_nanosleep_enter),
+					     &link_info, &link_info_len),
+		      "obj_get_info_by_fd")) {
+		ASSERT_EQ(bpf_link_get_fd_by_id(link_info.id), -EPERM, "link_get_fd_by_id_fails");
+		ASSERT_EQ(bpf_link_get_next_id(link_info.id, &next), -EPERM,
+			  "link_get_next_id_fails");
+	}
+	ASSERT_EQ(bpf_link_get_next_id(0, &next), -EPERM, "link_get_next_id_fails");
+
+	ASSERT_EQ(bpf_prog_query(prog_fd, BPF_TRACE_FENTRY, 0, &attach_flags, prog_ids,
+				 &prog_cnt), -EPERM, "prog_query_fails");
+
+	btf = btf__new_empty();
+	if (ASSERT_OK_PTR(btf, "empty_btf") &&
+	    ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "unpriv_int_type")) {
+		const void *raw_btf_data;
+		__u32 raw_btf_size;
+
+		raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+		if (ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_good"))
+			ASSERT_EQ(bpf_btf_load(raw_btf_data, raw_btf_size, NULL), -EPERM,
+				  "bpf_btf_load_fails");
+	}
+	btf__free(btf);
+}
+
+void test_unpriv_bpf_disabled(void)
+{
+	char *map_paths[NUM_MAPS] = {	PINPATH	"array",
+					PINPATH "percpu_array",
+					PINPATH "hash",
+					PINPATH "percpu_hash",
+					PINPATH "perfbuf",
+					PINPATH "ringbuf",
+					PINPATH "prog_array" };
+	int map_fds[NUM_MAPS];
+	struct test_unpriv_bpf_disabled *skel;
+	char unprivileged_bpf_disabled_orig[32] = {};
+	char perf_event_paranoid_orig[32] = {};
+	struct bpf_prog_info prog_info = {};
+	__u32 prog_info_len = sizeof(prog_info);
+	struct perf_event_attr attr = {};
+	int prog_fd, perf_fd = -1, i, ret;
+	__u64 save_caps = 0;
+	__u32 prog_id;
+
+	skel = test_unpriv_bpf_disabled__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	skel->bss->test_pid = getpid();
+
+	map_fds[0] = bpf_map__fd(skel->maps.array);
+	map_fds[1] = bpf_map__fd(skel->maps.percpu_array);
+	map_fds[2] = bpf_map__fd(skel->maps.hash);
+	map_fds[3] = bpf_map__fd(skel->maps.percpu_hash);
+	map_fds[4] = bpf_map__fd(skel->maps.perfbuf);
+	map_fds[5] = bpf_map__fd(skel->maps.ringbuf);
+	map_fds[6] = bpf_map__fd(skel->maps.prog_array);
+
+	for (i = 0; i < NUM_MAPS; i++)
+		ASSERT_OK(bpf_obj_pin(map_fds[i], map_paths[i]), "pin map_fd");
+
+	/* allow user without caps to use perf events */
+	if (!ASSERT_OK(sysctl_set("/proc/sys/kernel/perf_event_paranoid", perf_event_paranoid_orig,
+				  "-1"),
+		       "set_perf_event_paranoid"))
+		goto cleanup;
+	/* ensure unprivileged bpf disabled is set */
+	ret = sysctl_set("/proc/sys/kernel/unprivileged_bpf_disabled",
+			 unprivileged_bpf_disabled_orig, "2");
+	if (ret == -EPERM) {
+		/* if unprivileged_bpf_disabled=1, we get -EPERM back; that's okay. */
+		if (!ASSERT_OK(strcmp(unprivileged_bpf_disabled_orig, "1"),
+			       "unprivileged_bpf_disabled_on"))
+			goto cleanup;
+	} else {
+		if (!ASSERT_OK(ret, "set unprivileged_bpf_disabled"))
+			goto cleanup;
+	}
+
+	prog_fd = bpf_program__fd(skel->progs.sys_nanosleep_enter);
+	ASSERT_OK(bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len),
+		  "obj_get_info_by_fd");
+	prog_id = prog_info.id;
+	ASSERT_GT(prog_id, 0, "valid_prog_id");
+
+	attr.size = sizeof(attr);
+	attr.type = PERF_TYPE_SOFTWARE;
+	attr.config = PERF_COUNT_SW_CPU_CLOCK;
+	attr.freq = 1;
+	attr.sample_freq = 1000;
+	perf_fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+	if (!ASSERT_GE(perf_fd, 0, "perf_fd"))
+		goto cleanup;
+
+	if (!ASSERT_OK(test_unpriv_bpf_disabled__attach(skel), "skel_attach"))
+		goto cleanup;
+
+	if (!ASSERT_OK(cap_disable_effective(ALL_CAPS, &save_caps), "disable caps"))
+		goto cleanup;
+
+	if (test__start_subtest("unpriv_bpf_disabled_positive"))
+		test_unpriv_bpf_disabled_positive(skel, prog_id, prog_fd, perf_fd, map_paths,
+						  map_fds);
+
+	if (test__start_subtest("unpriv_bpf_disabled_negative"))
+		test_unpriv_bpf_disabled_negative(skel, prog_id, prog_fd, perf_fd, map_paths,
+						  map_fds);
+
+cleanup:
+	close(perf_fd);
+	if (save_caps)
+		cap_enable_effective(save_caps, NULL);
+	if (strlen(perf_event_paranoid_orig) > 0)
+		sysctl_set("/proc/sys/kernel/perf_event_paranoid", NULL, perf_event_paranoid_orig);
+	if (strlen(unprivileged_bpf_disabled_orig) > 0)
+		sysctl_set("/proc/sys/kernel/unprivileged_bpf_disabled", NULL,
+			   unprivileged_bpf_disabled_orig);
+	for (i = 0; i < NUM_MAPS; i++)
+		unlink(map_paths[i]);
+	test_unpriv_bpf_disabled__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
new file mode 100644
index 000000000000..35b87c7ba5be
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+#include "test_uprobe_autoattach.skel.h"
+
+/* uprobe attach point */
+static noinline int autoattach_trigger_func(int arg)
+{
+	asm volatile ("");
+	return arg + 1;
+}
+
+void test_uprobe_autoattach(void)
+{
+	struct test_uprobe_autoattach *skel;
+	int trigger_val = 100, trigger_ret;
+	size_t malloc_sz = 1;
+	char *mem;
+
+	skel = test_uprobe_autoattach__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	if (!ASSERT_OK(test_uprobe_autoattach__attach(skel), "skel_attach"))
+		goto cleanup;
+
+	skel->bss->test_pid = getpid();
+
+	/* trigger & validate uprobe & uretprobe */
+	trigger_ret = autoattach_trigger_func(trigger_val);
+
+	skel->bss->test_pid = getpid();
+
+	/* trigger & validate shared library u[ret]probes attached by name */
+	mem = malloc(malloc_sz);
+
+	ASSERT_EQ(skel->bss->uprobe_byname_parm1, trigger_val, "check_uprobe_byname_parm1");
+	ASSERT_EQ(skel->bss->uprobe_byname_ran, 1, "check_uprobe_byname_ran");
+	ASSERT_EQ(skel->bss->uretprobe_byname_rc, trigger_ret, "check_uretprobe_byname_rc");
+	ASSERT_EQ(skel->bss->uretprobe_byname_ran, 2, "check_uretprobe_byname_ran");
+	ASSERT_EQ(skel->bss->uprobe_byname2_parm1, malloc_sz, "check_uprobe_byname2_parm1");
+	ASSERT_EQ(skel->bss->uprobe_byname2_ran, 3, "check_uprobe_byname2_ran");
+	ASSERT_EQ(skel->bss->uretprobe_byname2_rc, mem, "check_uretprobe_byname2_rc");
+	ASSERT_EQ(skel->bss->uretprobe_byname2_ran, 4, "check_uretprobe_byname2_ran");
+
+	free(mem);
+cleanup:
+	test_uprobe_autoattach__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
new file mode 100644
index 000000000000..5f733d50b0d7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -0,0 +1,419 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+
+#define _SDT_HAS_SEMAPHORES 1
+#include "../sdt.h"
+
+#include "test_usdt.skel.h"
+#include "test_urandom_usdt.skel.h"
+
+int lets_test_this(int);
+
+static volatile int idx = 2;
+static volatile __u64 bla = 0xFEDCBA9876543210ULL;
+static volatile short nums[] = {-1, -2, -3, };
+
+static volatile struct {
+	int x;
+	signed char y;
+} t1 = { 1, -127 };
+
+#define SEC(name) __attribute__((section(name), used))
+
+unsigned short test_usdt0_semaphore SEC(".probes");
+unsigned short test_usdt3_semaphore SEC(".probes");
+unsigned short test_usdt12_semaphore SEC(".probes");
+
+static void __always_inline trigger_func(int x) {
+	long y = 42;
+
+	if (test_usdt0_semaphore)
+		STAP_PROBE(test, usdt0);
+	if (test_usdt3_semaphore)
+		STAP_PROBE3(test, usdt3, x, y, &bla);
+	if (test_usdt12_semaphore) {
+		STAP_PROBE12(test, usdt12,
+			     x, x + 1, y, x + y, 5,
+			     y / 7, bla, &bla, -9, nums[x],
+			     nums[idx], t1.y);
+	}
+}
+
+static void subtest_basic_usdt(void)
+{
+	LIBBPF_OPTS(bpf_usdt_opts, opts);
+	struct test_usdt *skel;
+	struct test_usdt__bss *bss;
+	int err;
+
+	skel = test_usdt__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bss = skel->bss;
+	bss->my_pid = getpid();
+
+	err = test_usdt__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto cleanup;
+
+	/* usdt0 won't be auto-attached */
+	opts.usdt_cookie = 0xcafedeadbeeffeed;
+	skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0,
+						     0 /*self*/, "/proc/self/exe",
+						     "test", "usdt0", &opts);
+	if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link"))
+		goto cleanup;
+
+	trigger_func(1);
+
+	ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called");
+	ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called");
+	ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called");
+
+	ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie");
+	ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
+	ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
+
+	/* auto-attached usdt3 gets default zero cookie value */
+	ASSERT_EQ(bss->usdt3_cookie, 0, "usdt3_cookie");
+	ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");
+
+	ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret");
+	ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret");
+	ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret");
+	ASSERT_EQ(bss->usdt3_args[0], 1, "usdt3_arg1");
+	ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
+	ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+
+	/* auto-attached usdt12 gets default zero cookie value */
+	ASSERT_EQ(bss->usdt12_cookie, 0, "usdt12_cookie");
+	ASSERT_EQ(bss->usdt12_arg_cnt, 12, "usdt12_arg_cnt");
+
+	ASSERT_EQ(bss->usdt12_args[0], 1, "usdt12_arg1");
+	ASSERT_EQ(bss->usdt12_args[1], 1 + 1, "usdt12_arg2");
+	ASSERT_EQ(bss->usdt12_args[2], 42, "usdt12_arg3");
+	ASSERT_EQ(bss->usdt12_args[3], 42 + 1, "usdt12_arg4");
+	ASSERT_EQ(bss->usdt12_args[4], 5, "usdt12_arg5");
+	ASSERT_EQ(bss->usdt12_args[5], 42 / 7, "usdt12_arg6");
+	ASSERT_EQ(bss->usdt12_args[6], bla, "usdt12_arg7");
+	ASSERT_EQ(bss->usdt12_args[7], (uintptr_t)&bla, "usdt12_arg8");
+	ASSERT_EQ(bss->usdt12_args[8], -9, "usdt12_arg9");
+	ASSERT_EQ(bss->usdt12_args[9], nums[1], "usdt12_arg10");
+	ASSERT_EQ(bss->usdt12_args[10], nums[idx], "usdt12_arg11");
+	ASSERT_EQ(bss->usdt12_args[11], t1.y, "usdt12_arg12");
+
+	/* trigger_func() is marked __always_inline, so USDT invocations will be
+	 * inlined in two different places, meaning that each USDT will have
+	 * at least 2 different places to be attached to. This verifies that
+	 * bpf_program__attach_usdt() handles this properly and attaches to
+	 * all possible places of USDT invocation.
+	 */
+	trigger_func(2);
+
+	ASSERT_EQ(bss->usdt0_called, 2, "usdt0_called");
+	ASSERT_EQ(bss->usdt3_called, 2, "usdt3_called");
+	ASSERT_EQ(bss->usdt12_called, 2, "usdt12_called");
+
+	/* only check values that depend on trigger_func()'s input value */
+	ASSERT_EQ(bss->usdt3_args[0], 2, "usdt3_arg1");
+
+	ASSERT_EQ(bss->usdt12_args[0], 2, "usdt12_arg1");
+	ASSERT_EQ(bss->usdt12_args[1], 2 + 1, "usdt12_arg2");
+	ASSERT_EQ(bss->usdt12_args[3], 42 + 2, "usdt12_arg4");
+	ASSERT_EQ(bss->usdt12_args[9], nums[2], "usdt12_arg10");
+
+	/* detach and re-attach usdt3 */
+	bpf_link__destroy(skel->links.usdt3);
+
+	opts.usdt_cookie = 0xBADC00C51E;
+	skel->links.usdt3 = bpf_program__attach_usdt(skel->progs.usdt3, -1 /* any pid */,
+						     "/proc/self/exe", "test", "usdt3", &opts);
+	if (!ASSERT_OK_PTR(skel->links.usdt3, "usdt3_reattach"))
+		goto cleanup;
+
+	trigger_func(3);
+
+	ASSERT_EQ(bss->usdt3_called, 3, "usdt3_called");
+	/* this time usdt3 has custom cookie */
+	ASSERT_EQ(bss->usdt3_cookie, 0xBADC00C51E, "usdt3_cookie");
+	ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");
+
+	ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret");
+	ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret");
+	ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret");
+	ASSERT_EQ(bss->usdt3_args[0], 3, "usdt3_arg1");
+	ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
+	ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+
+cleanup:
+	test_usdt__destroy(skel);
+}
+
+unsigned short test_usdt_100_semaphore SEC(".probes");
+unsigned short test_usdt_300_semaphore SEC(".probes");
+unsigned short test_usdt_400_semaphore SEC(".probes");
+
+#define R10(F, X)  F(X+0); F(X+1);F(X+2); F(X+3); F(X+4); \
+		   F(X+5); F(X+6); F(X+7); F(X+8); F(X+9);
+#define R100(F, X) R10(F,X+ 0);R10(F,X+10);R10(F,X+20);R10(F,X+30);R10(F,X+40); \
+		   R10(F,X+50);R10(F,X+60);R10(F,X+70);R10(F,X+80);R10(F,X+90);
+
+/* carefully control that we get exactly 100 inlines by preventing inlining */
+static void __always_inline f100(int x)
+{
+	STAP_PROBE1(test, usdt_100, x);
+}
+
+__weak void trigger_100_usdts(void)
+{
+	R100(f100, 0);
+}
+
+/* we shouldn't be able to attach to test:usdt2_300 USDT as we don't have as
+ * many slots for specs. It's important that each STAP_PROBE2() invocation
+ * (after untolling) gets different arg spec due to compiler inlining i as
+ * a constant
+ */
+static void __always_inline f300(int x)
+{
+	STAP_PROBE1(test, usdt_300, x);
+}
+
+__weak void trigger_300_usdts(void)
+{
+	R100(f300, 0);
+	R100(f300, 100);
+	R100(f300, 200);
+}
+
+static void __always_inline f400(int x __attribute__((unused)))
+{
+	STAP_PROBE1(test, usdt_400, 400);
+}
+
+/* this time we have 400 different USDT call sites, but they have uniform
+ * argument location, so libbpf's spec string deduplication logic should keep
+ * spec count use very small and so we should be able to attach to all 400
+ * call sites
+ */
+__weak void trigger_400_usdts(void)
+{
+	R100(f400, 0);
+	R100(f400, 100);
+	R100(f400, 200);
+	R100(f400, 300);
+}
+
+static void subtest_multispec_usdt(void)
+{
+	LIBBPF_OPTS(bpf_usdt_opts, opts);
+	struct test_usdt *skel;
+	struct test_usdt__bss *bss;
+	int err, i;
+
+	skel = test_usdt__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	bss = skel->bss;
+	bss->my_pid = getpid();
+
+	err = test_usdt__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto cleanup;
+
+	/* usdt_100 is auto-attached and there are 100 inlined call sites,
+	 * let's validate that all of them are properly attached to and
+	 * handled from BPF side
+	 */
+	trigger_100_usdts();
+
+	ASSERT_EQ(bss->usdt_100_called, 100, "usdt_100_called");
+	ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum");
+
+	/* Stress test free spec ID tracking. By default libbpf allows up to
+	 * 256 specs to be used, so if we don't return free spec IDs back
+	 * after few detachments and re-attachments we should run out of
+	 * available spec IDs.
+	 */
+	for (i = 0; i < 2; i++) {
+		bpf_link__destroy(skel->links.usdt_100);
+
+		skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1,
+							        "/proc/self/exe",
+								"test", "usdt_100", NULL);
+		if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_100_reattach"))
+			goto cleanup;
+
+		bss->usdt_100_sum = 0;
+		trigger_100_usdts();
+
+		ASSERT_EQ(bss->usdt_100_called, (i + 1) * 100 + 100, "usdt_100_called");
+		ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum");
+	}
+
+	/* Now let's step it up and try to attach USDT that requires more than
+	 * 256 attach points with different specs for each.
+	 * Note that we need trigger_300_usdts() only to actually have 300
+	 * USDT call sites, we are not going to actually trace them.
+	 */
+	trigger_300_usdts();
+
+	/* we'll reuse usdt_100 BPF program for usdt_300 test */
+	bpf_link__destroy(skel->links.usdt_100);
+	skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe",
+							"test", "usdt_300", NULL);
+	err = -errno;
+	if (!ASSERT_ERR_PTR(skel->links.usdt_100, "usdt_300_bad_attach"))
+		goto cleanup;
+	ASSERT_EQ(err, -E2BIG, "usdt_300_attach_err");
+
+	/* let's check that there are no "dangling" BPF programs attached due
+	 * to partial success of the above test:usdt_300 attachment
+	 */
+	bss->usdt_100_called = 0;
+	bss->usdt_100_sum = 0;
+
+	f300(777); /* this is 301st instance of usdt_300 */
+
+	ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called");
+	ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum");
+
+	/* This time we have USDT with 400 inlined invocations, but arg specs
+	 * should be the same across all sites, so libbpf will only need to
+	 * use one spec and thus we'll be able to attach 400 uprobes
+	 * successfully.
+	 *
+	 * Again, we are reusing usdt_100 BPF program.
+	 */
+	skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1,
+							"/proc/self/exe",
+							"test", "usdt_400", NULL);
+	if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_400_attach"))
+		goto cleanup;
+
+	trigger_400_usdts();
+
+	ASSERT_EQ(bss->usdt_100_called, 400, "usdt_400_called");
+	ASSERT_EQ(bss->usdt_100_sum, 400 * 400, "usdt_400_sum");
+
+cleanup:
+	test_usdt__destroy(skel);
+}
+
+static FILE *urand_spawn(int *pid)
+{
+	FILE *f;
+
+	/* urandom_read's stdout is wired into f */
+	f = popen("./urandom_read 1 report-pid", "r");
+	if (!f)
+		return NULL;
+
+	if (fscanf(f, "%d", pid) != 1) {
+		pclose(f);
+		return NULL;
+	}
+
+	return f;
+}
+
+static int urand_trigger(FILE **urand_pipe)
+{
+	int exit_code;
+
+	/* pclose() waits for child process to exit and returns their exit code */
+	exit_code = pclose(*urand_pipe);
+	*urand_pipe = NULL;
+
+	return exit_code;
+}
+
+static void subtest_urandom_usdt(bool auto_attach)
+{
+	struct test_urandom_usdt *skel;
+	struct test_urandom_usdt__bss *bss;
+	struct bpf_link *l;
+	FILE *urand_pipe = NULL;
+	int err, urand_pid = 0;
+
+	skel = test_urandom_usdt__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	urand_pipe = urand_spawn(&urand_pid);
+	if (!ASSERT_OK_PTR(urand_pipe, "urand_spawn"))
+		goto cleanup;
+
+	bss = skel->bss;
+	bss->urand_pid = urand_pid;
+
+	if (auto_attach) {
+		err = test_urandom_usdt__attach(skel);
+		if (!ASSERT_OK(err, "skel_auto_attach"))
+			goto cleanup;
+	} else {
+		l = bpf_program__attach_usdt(skel->progs.urand_read_without_sema,
+					     urand_pid, "./urandom_read",
+					     "urand", "read_without_sema", NULL);
+		if (!ASSERT_OK_PTR(l, "urand_without_sema_attach"))
+			goto cleanup;
+		skel->links.urand_read_without_sema = l;
+
+		l = bpf_program__attach_usdt(skel->progs.urand_read_with_sema,
+					     urand_pid, "./urandom_read",
+					     "urand", "read_with_sema", NULL);
+		if (!ASSERT_OK_PTR(l, "urand_with_sema_attach"))
+			goto cleanup;
+		skel->links.urand_read_with_sema = l;
+
+		l = bpf_program__attach_usdt(skel->progs.urandlib_read_without_sema,
+					     urand_pid, "./liburandom_read.so",
+					     "urandlib", "read_without_sema", NULL);
+		if (!ASSERT_OK_PTR(l, "urandlib_without_sema_attach"))
+			goto cleanup;
+		skel->links.urandlib_read_without_sema = l;
+
+		l = bpf_program__attach_usdt(skel->progs.urandlib_read_with_sema,
+					     urand_pid, "./liburandom_read.so",
+					     "urandlib", "read_with_sema", NULL);
+		if (!ASSERT_OK_PTR(l, "urandlib_with_sema_attach"))
+			goto cleanup;
+		skel->links.urandlib_read_with_sema = l;
+
+	}
+
+	/* trigger urandom_read USDTs */
+	ASSERT_OK(urand_trigger(&urand_pipe), "urand_exit_code");
+
+	ASSERT_EQ(bss->urand_read_without_sema_call_cnt, 1, "urand_wo_sema_cnt");
+	ASSERT_EQ(bss->urand_read_without_sema_buf_sz_sum, 256, "urand_wo_sema_sum");
+
+	ASSERT_EQ(bss->urand_read_with_sema_call_cnt, 1, "urand_w_sema_cnt");
+	ASSERT_EQ(bss->urand_read_with_sema_buf_sz_sum, 256, "urand_w_sema_sum");
+
+	ASSERT_EQ(bss->urandlib_read_without_sema_call_cnt, 1, "urandlib_wo_sema_cnt");
+	ASSERT_EQ(bss->urandlib_read_without_sema_buf_sz_sum, 256, "urandlib_wo_sema_sum");
+
+	ASSERT_EQ(bss->urandlib_read_with_sema_call_cnt, 1, "urandlib_w_sema_cnt");
+	ASSERT_EQ(bss->urandlib_read_with_sema_buf_sz_sum, 256, "urandlib_w_sema_sum");
+
+cleanup:
+	if (urand_pipe)
+		pclose(urand_pipe);
+	test_urandom_usdt__destroy(skel);
+}
+
+void test_usdt(void)
+{
+	if (test__start_subtest("basic"))
+		subtest_basic_usdt();
+	if (test__start_subtest("multispec"))
+		subtest_multispec_usdt();
+	if (test__start_subtest("urand_auto_attach"))
+		subtest_urandom_usdt(true /* auto_attach */);
+	if (test__start_subtest("urand_pid_attach"))
+		subtest_urandom_usdt(false /* auto_attach */);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index 8cfaeba1ddbf..97ec8bc76ae6 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -16,6 +16,7 @@
 #define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
 #define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
 #define bpf_iter__sockmap bpf_iter__sockmap___not_used
+#define bpf_iter__bpf_link bpf_iter__bpf_link___not_used
 #define btf_ptr btf_ptr___not_used
 #define BTF_F_COMPACT BTF_F_COMPACT___not_used
 #define BTF_F_NONAME BTF_F_NONAME___not_used
@@ -37,6 +38,7 @@
 #undef bpf_iter__bpf_map_elem
 #undef bpf_iter__bpf_sk_storage_map
 #undef bpf_iter__sockmap
+#undef bpf_iter__bpf_link
 #undef btf_ptr
 #undef BTF_F_COMPACT
 #undef BTF_F_NONAME
@@ -132,6 +134,11 @@ struct bpf_iter__sockmap {
 	struct sock *sk;
 };
 
+struct bpf_iter__bpf_link {
+	struct bpf_iter_meta *meta;
+	struct bpf_link *link;
+};
+
 struct btf_ptr {
 	void *ptr;
 	__u32 type_id;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c
new file mode 100644
index 000000000000..e1af2f8f75a6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Red Hat, Inc. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("iter/bpf_link")
+int dump_bpf_link(struct bpf_iter__bpf_link *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct bpf_link *link = ctx->link;
+	int link_id;
+
+	if (!link)
+		return 0;
+
+	link_id = link->id;
+	bpf_seq_write(seq, &link_id, sizeof(link_id));
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c
new file mode 100644
index 000000000000..3824345d82ab
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_size___diff_offs x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index 1c7105fcae3c..4ee4748133fe 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -94,7 +94,7 @@ typedef void (* (*signal_t)(int, void (*)(int)))(int);
 
 typedef char * (*fn_ptr_arr1_t[10])(int **);
 
-typedef char * (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int));
+typedef char * (* (* const fn_ptr_arr2_t[5])())(char * (*)(int));
 
 struct struct_w_typedefs {
 	int_t a;
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index c95c0cabe951..f9dc9766546e 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -785,13 +785,21 @@ struct core_reloc_bitfields___err_too_big_bitfield {
  */
 struct core_reloc_size_output {
 	int int_sz;
+	int int_off;
 	int struct_sz;
+	int struct_off;
 	int union_sz;
+	int union_off;
 	int arr_sz;
+	int arr_off;
 	int arr_elem_sz;
+	int arr_elem_off;
 	int ptr_sz;
+	int ptr_off;
 	int enum_sz;
+	int enum_off;
 	int float_sz;
+	int float_off;
 };
 
 struct core_reloc_size {
@@ -814,6 +822,16 @@ struct core_reloc_size___diff_sz {
 	double float_field;
 };
 
+struct core_reloc_size___diff_offs {
+	float float_field;
+	enum { YET_OTHER_VALUE = 123 } enum_field;
+	void *ptr_field;
+	int arr_field[4];
+	union { int x; } union_field;
+	struct { int x; } struct_field;
+	int int_field;
+};
+
 /* Error case of two candidates with the fields (int_field) at the same
  * offset, but with differing final relocation values: size 4 vs size 1
  */
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
new file mode 100644
index 000000000000..d811cff73597
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -0,0 +1,588 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct test_info {
+	int x;
+	struct bpf_dynptr ptr;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct bpf_dynptr);
+} array_map1 SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct test_info);
+} array_map2 SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} array_map3 SEC(".maps");
+
+struct sample {
+	int pid;
+	long value;
+	char comm[16];
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+int err, val;
+
+static int get_map_val_dynptr(struct bpf_dynptr *ptr)
+{
+	__u32 key = 0, *map_val;
+
+	bpf_map_update_elem(&array_map3, &key, &val, 0);
+
+	map_val = bpf_map_lookup_elem(&array_map3, &key);
+	if (!map_val)
+		return -ENOENT;
+
+	bpf_dynptr_from_mem(map_val, sizeof(*map_val), 0, ptr);
+
+	return 0;
+}
+
+/* Every bpf_ringbuf_reserve_dynptr call must have a corresponding
+ * bpf_ringbuf_submit/discard_dynptr call
+ */
+SEC("?raw_tp/sys_nanosleep")
+int ringbuf_missing_release1(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+	/* missing a call to bpf_ringbuf_discard/submit_dynptr */
+
+	return 0;
+}
+
+SEC("?raw_tp/sys_nanosleep")
+int ringbuf_missing_release2(void *ctx)
+{
+	struct bpf_dynptr ptr1, ptr2;
+	struct sample *sample;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr1);
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr2);
+
+	sample = bpf_dynptr_data(&ptr1, 0, sizeof(*sample));
+	if (!sample) {
+		bpf_ringbuf_discard_dynptr(&ptr1, 0);
+		bpf_ringbuf_discard_dynptr(&ptr2, 0);
+		return 0;
+	}
+
+	bpf_ringbuf_submit_dynptr(&ptr1, 0);
+
+	/* missing a call to bpf_ringbuf_discard/submit_dynptr on ptr2 */
+
+	return 0;
+}
+
+static int missing_release_callback_fn(__u32 index, void *data)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+	/* missing a call to bpf_ringbuf_discard/submit_dynptr */
+
+	return 0;
+}
+
+/* Any dynptr initialized within a callback must have bpf_dynptr_put called */
+SEC("?raw_tp/sys_nanosleep")
+int ringbuf_missing_release_callback(void *ctx)
+{
+	bpf_loop(10, missing_release_callback_fn, NULL, 0);
+	return 0;
+}
+
+/* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */
+SEC("?raw_tp/sys_nanosleep")
+int ringbuf_release_uninit_dynptr(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	/* this should fail */
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+/* A dynptr can't be used after it has been invalidated */
+SEC("?raw_tp/sys_nanosleep")
+int use_after_invalid(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	char read_data[64];
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(read_data), 0, &ptr);
+
+	bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	/* this should fail */
+	bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+
+	return 0;
+}
+
+/* Can't call non-dynptr ringbuf APIs on a dynptr ringbuf sample */
+SEC("?raw_tp/sys_nanosleep")
+int ringbuf_invalid_api(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	struct sample *sample;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr);
+	sample = bpf_dynptr_data(&ptr, 0, sizeof(*sample));
+	if (!sample)
+		goto done;
+
+	sample->pid = 123;
+
+	/* invalid API use. need to use dynptr API to submit/discard */
+	bpf_ringbuf_submit(sample, 0);
+
+done:
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+	return 0;
+}
+
+/* Can't add a dynptr to a map */
+SEC("?raw_tp/sys_nanosleep")
+int add_dynptr_to_map1(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	int key = 0;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+	/* this should fail */
+	bpf_map_update_elem(&array_map1, &key, &ptr, 0);
+
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+/* Can't add a struct with an embedded dynptr to a map */
+SEC("?raw_tp/sys_nanosleep")
+int add_dynptr_to_map2(void *ctx)
+{
+	struct test_info x;
+	int key = 0;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &x.ptr);
+
+	/* this should fail */
+	bpf_map_update_elem(&array_map2, &key, &x, 0);
+
+	bpf_ringbuf_submit_dynptr(&x.ptr, 0);
+
+	return 0;
+}
+
+/* A data slice can't be accessed out of bounds */
+SEC("?raw_tp/sys_nanosleep")
+int data_slice_out_of_bounds_ringbuf(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	void *data;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr);
+
+	data  = bpf_dynptr_data(&ptr, 0, 8);
+	if (!data)
+		goto done;
+
+	/* can't index out of bounds of the data slice */
+	val = *((char *)data + 8);
+
+done:
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+	return 0;
+}
+
+SEC("?raw_tp/sys_nanosleep")
+int data_slice_out_of_bounds_map_value(void *ctx)
+{
+	__u32 key = 0, map_val;
+	struct bpf_dynptr ptr;
+	void *data;
+
+	get_map_val_dynptr(&ptr);
+
+	data  = bpf_dynptr_data(&ptr, 0, sizeof(map_val));
+	if (!data)
+		return 0;
+
+	/* can't index out of bounds of the data slice */
+	val = *((char *)data + (sizeof(map_val) + 1));
+
+	return 0;
+}
+
+/* A data slice can't be used after it has been released */
+SEC("?raw_tp/sys_nanosleep")
+int data_slice_use_after_release(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	struct sample *sample;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr);
+	sample = bpf_dynptr_data(&ptr, 0, sizeof(*sample));
+	if (!sample)
+		goto done;
+
+	sample->pid = 123;
+
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	/* this should fail */
+	val = sample->pid;
+
+	return 0;
+
+done:
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+	return 0;
+}
+
+/* A data slice must be first checked for NULL */
+SEC("?raw_tp/sys_nanosleep")
+int data_slice_missing_null_check1(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	void *data;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr);
+
+	data  = bpf_dynptr_data(&ptr, 0, 8);
+
+	/* missing if (!data) check */
+
+	/* this should fail */
+	*(__u8 *)data = 3;
+
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+	return 0;
+}
+
+/* A data slice can't be dereferenced if it wasn't checked for null */
+SEC("?raw_tp/sys_nanosleep")
+int data_slice_missing_null_check2(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	__u64 *data1, *data2;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr);
+
+	data1 = bpf_dynptr_data(&ptr, 0, 8);
+	data2 = bpf_dynptr_data(&ptr, 0, 8);
+	if (data1)
+		/* this should fail */
+		*data2 = 3;
+
+done:
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+	return 0;
+}
+
+/* Can't pass in a dynptr as an arg to a helper function that doesn't take in a
+ * dynptr argument
+ */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_helper1(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	get_map_val_dynptr(&ptr);
+
+	/* this should fail */
+	bpf_strncmp((const char *)&ptr, sizeof(ptr), "hello!");
+
+	return 0;
+}
+
+/* A dynptr can't be passed into a helper function at a non-zero offset */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_helper2(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	char read_data[64];
+
+	get_map_val_dynptr(&ptr);
+
+	/* this should fail */
+	bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0);
+
+	return 0;
+}
+
+/* A bpf_dynptr is invalidated if it's been written into */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_write1(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	void *data;
+	__u8 x = 0;
+
+	get_map_val_dynptr(&ptr);
+
+	memcpy(&ptr, &x, sizeof(x));
+
+	/* this should fail */
+	data = bpf_dynptr_data(&ptr, 0, 1);
+
+	return 0;
+}
+
+/*
+ * A bpf_dynptr can't be used as a dynptr if it has been written into at a fixed
+ * offset
+ */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_write2(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	char read_data[64];
+	__u8 x = 0;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+	memcpy((void *)&ptr + 8, &x, sizeof(x));
+
+	/* this should fail */
+	bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+/*
+ * A bpf_dynptr can't be used as a dynptr if it has been written into at a
+ * non-const offset
+ */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_write3(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	char stack_buf[16];
+	unsigned long len;
+	__u8 x = 0;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr);
+
+	memcpy(stack_buf, &val, sizeof(val));
+	len = stack_buf[0] & 0xf;
+
+	memcpy((void *)&ptr + len, &x, sizeof(x));
+
+	/* this should fail */
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+static int invalid_write4_callback(__u32 index, void *data)
+{
+	*(__u32 *)data = 123;
+
+	return 0;
+}
+
+/* If the dynptr is written into in a callback function, it should
+ * be invalidated as a dynptr
+ */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_write4(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+	bpf_loop(10, invalid_write4_callback, &ptr, 0);
+
+	/* this should fail */
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+/* A globally-defined bpf_dynptr can't be used (it must reside as a stack frame) */
+struct bpf_dynptr global_dynptr;
+SEC("?raw_tp/sys_nanosleep")
+int global(void *ctx)
+{
+	/* this should fail */
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &global_dynptr);
+
+	bpf_ringbuf_discard_dynptr(&global_dynptr, 0);
+
+	return 0;
+}
+
+/* A direct read should fail */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_read1(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+	/* this should fail */
+	val = *(int *)&ptr;
+
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+/* A direct read at an offset should fail */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_read2(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	char read_data[64];
+
+	get_map_val_dynptr(&ptr);
+
+	/* this should fail */
+	bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0);
+
+	return 0;
+}
+
+/* A direct read at an offset into the lower stack slot should fail */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_read3(void *ctx)
+{
+	struct bpf_dynptr ptr1, ptr2;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr1);
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr2);
+
+	/* this should fail */
+	memcpy(&val, (void *)&ptr1 + 8, sizeof(val));
+
+	bpf_ringbuf_discard_dynptr(&ptr1, 0);
+	bpf_ringbuf_discard_dynptr(&ptr2, 0);
+
+	return 0;
+}
+
+static int invalid_read4_callback(__u32 index, void *data)
+{
+	/* this should fail */
+	val = *(__u32 *)data;
+
+	return 0;
+}
+
+/* A direct read within a callback function should fail */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_read4(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+	bpf_loop(10, invalid_read4_callback, &ptr, 0);
+
+	bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+/* Initializing a dynptr on an offset should fail */
+SEC("?raw_tp/sys_nanosleep")
+int invalid_offset(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	/* this should fail */
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr + 1);
+
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+/* Can't release a dynptr twice */
+SEC("?raw_tp/sys_nanosleep")
+int release_twice(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr);
+
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+	/* this second release should fail */
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+	return 0;
+}
+
+static int release_twice_callback_fn(__u32 index, void *data)
+{
+	/* this should fail */
+	bpf_ringbuf_discard_dynptr(data, 0);
+
+	return 0;
+}
+
+/* Test that releasing a dynptr twice, where one of the releases happens
+ * within a calback function, fails
+ */
+SEC("?raw_tp/sys_nanosleep")
+int release_twice_callback(void *ctx)
+{
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 32, 0, &ptr);
+
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+	bpf_loop(10, release_twice_callback_fn, &ptr, 0);
+
+	return 0;
+}
+
+/* Reject unsupported local mem types for dynptr_from_mem API */
+SEC("?raw_tp/sys_nanosleep")
+int dynptr_from_mem_invalid_api(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	int x = 0;
+
+	/* this should fail */
+	bpf_dynptr_from_mem(&x, sizeof(x), 0, &ptr);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
new file mode 100644
index 000000000000..d67be48df4b2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <string.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char _license[] SEC("license") = "GPL";
+
+int pid, err, val;
+
+struct sample {
+	int pid;
+	int seq;
+	long value;
+	char comm[16];
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} array_map SEC(".maps");
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_read_write(void *ctx)
+{
+	char write_data[64] = "hello there, world!!";
+	char read_data[64] = {}, buf[64] = {};
+	struct bpf_dynptr ptr;
+	int i;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr);
+
+	/* Write data into the dynptr */
+	err = err ?: bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data));
+
+	/* Read the data that was written into the dynptr */
+	err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+
+	/* Ensure the data we read matches the data we wrote */
+	for (i = 0; i < sizeof(read_data); i++) {
+		if (read_data[i] != write_data[i]) {
+			err = 1;
+			break;
+		}
+	}
+
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+	return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_data_slice(void *ctx)
+{
+	__u32 key = 0, val = 235, *map_val;
+	struct bpf_dynptr ptr;
+	__u32 map_val_size;
+	void *data;
+
+	map_val_size = sizeof(*map_val);
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	bpf_map_update_elem(&array_map, &key, &val, 0);
+
+	map_val = bpf_map_lookup_elem(&array_map, &key);
+	if (!map_val) {
+		err = 1;
+		return 0;
+	}
+
+	bpf_dynptr_from_mem(map_val, map_val_size, 0, &ptr);
+
+	/* Try getting a data slice that is out of range */
+	data = bpf_dynptr_data(&ptr, map_val_size + 1, 1);
+	if (data) {
+		err = 2;
+		return 0;
+	}
+
+	/* Try getting more bytes than available */
+	data = bpf_dynptr_data(&ptr, 0, map_val_size + 1);
+	if (data) {
+		err = 3;
+		return 0;
+	}
+
+	data = bpf_dynptr_data(&ptr, 0, sizeof(__u32));
+	if (!data) {
+		err = 4;
+		return 0;
+	}
+
+	*(__u32 *)data = 999;
+
+	err = bpf_probe_read_kernel(&val, sizeof(val), data);
+	if (err)
+		return 0;
+
+	if (val != *(int *)data)
+		err = 5;
+
+	return 0;
+}
+
+static int ringbuf_callback(__u32 index, void *data)
+{
+	struct sample *sample;
+
+	struct bpf_dynptr *ptr = (struct bpf_dynptr *)data;
+
+	sample = bpf_dynptr_data(ptr, 0, sizeof(*sample));
+	if (!sample)
+		err = 2;
+	else
+		sample->pid += index;
+
+	return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_ringbuf(void *ctx)
+{
+	struct bpf_dynptr ptr;
+	struct sample *sample;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	val = 100;
+
+	/* check that you can reserve a dynamic size reservation */
+	err = bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+	sample = err ? NULL : bpf_dynptr_data(&ptr, 0, sizeof(*sample));
+	if (!sample) {
+		err = 1;
+		goto done;
+	}
+
+	sample->pid = 10;
+
+	/* Can pass dynptr to callback functions */
+	bpf_loop(10, ringbuf_callback, &ptr, 0);
+
+	if (sample->pid != 55)
+		err = 2;
+
+done:
+	bpf_ringbuf_discard_dynptr(&ptr, 0);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/exhandler_kern.c b/tools/testing/selftests/bpf/progs/exhandler_kern.c
index f5ca142abf8f..20d009e2d266 100644
--- a/tools/testing/selftests/bpf/progs/exhandler_kern.c
+++ b/tools/testing/selftests/bpf/progs/exhandler_kern.c
@@ -37,7 +37,16 @@ int BPF_PROG(trace_task_newtask, struct task_struct *task, u64 clone_flags)
 	 */
 	work = task->task_works;
 	func = work->func;
-	if (!work && !func)
-		exception_triggered++;
+	/* Currently verifier will fail for `btf_ptr |= btf_ptr` * instruction.
+	 * To workaround the issue, use barrier_var() and rewrite as below to
+	 * prevent compiler from generating verifier-unfriendly code.
+	 */
+	barrier_var(work);
+	if (work)
+		return 0;
+	barrier_var(func);
+	if (func)
+		return 0;
+	exception_triggered++;
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c b/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c
new file mode 100644
index 000000000000..8e545865ea33
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} array_map SEC(".maps");
+
+static __u64
+check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+		 void *data)
+{
+	bpf_get_current_comm(key, sizeof(*key));
+	return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int test_map_key_write(const void *ctx)
+{
+	bpf_for_each_map_elem(&array_map, check_array_elem, NULL, 0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c
index 600be50800f8..93510f4f0f3a 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_multi.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c
@@ -98,3 +98,17 @@ int test_kretprobe(struct pt_regs *ctx)
 	kprobe_multi_check(ctx, true);
 	return 0;
 }
+
+SEC("kprobe.multi")
+int test_kprobe_manual(struct pt_regs *ctx)
+{
+	kprobe_multi_check(ctx, false);
+	return 0;
+}
+
+SEC("kretprobe.multi")
+int test_kretprobe_manual(struct pt_regs *ctx)
+{
+	kprobe_multi_check(ctx, true);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c b/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c
new file mode 100644
index 000000000000..e76e499aca39
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("kprobe.multi/")
+int test_kprobe_empty(struct pt_regs *ctx)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c
index b964ec1390c2..b05571bc67d5 100644
--- a/tools/testing/selftests/bpf/progs/linked_funcs1.c
+++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c
@@ -4,6 +4,7 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
 
 /* weak and shared between two files */
 const volatile int my_tid __weak;
@@ -44,6 +45,13 @@ void set_output_ctx1(__u64 *ctx)
 /* this weak instance should win because it's the first one */
 __weak int set_output_weak(int x)
 {
+	static volatile int whatever;
+
+	/* make sure we use CO-RE relocations in a weak function, this used to
+	 * cause problems for BPF static linker
+	 */
+	whatever = bpf_core_type_size(struct task_struct);
+
 	output_weak1 = x;
 	return x;
 }
@@ -53,12 +61,17 @@ extern int set_output_val2(int x);
 /* here we'll force set_output_ctx2() to be __hidden in the final obj file */
 __hidden extern void set_output_ctx2(__u64 *ctx);
 
-SEC("raw_tp/sys_enter")
+SEC("?raw_tp/sys_enter")
 int BPF_PROG(handler1, struct pt_regs *regs, long id)
 {
+	static volatile int whatever;
+
 	if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
 		return 0;
 
+	/* make sure we have CO-RE relocations in main program */
+	whatever = bpf_core_type_size(struct task_struct);
+
 	set_output_val2(1000);
 	set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */
 
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c
index 575e958e60b7..ee7e3848ee4f 100644
--- a/tools/testing/selftests/bpf/progs/linked_funcs2.c
+++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c
@@ -4,6 +4,7 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
 
 /* weak and shared between both files */
 const volatile int my_tid __weak;
@@ -44,6 +45,13 @@ void set_output_ctx2(__u64 *ctx)
 /* this weak instance should lose, because it will be processed second */
 __weak int set_output_weak(int x)
 {
+	static volatile int whatever;
+
+	/* make sure we use CO-RE relocations in a weak function, this used to
+	 * cause problems for BPF static linker
+	 */
+	whatever = 2 * bpf_core_type_size(struct task_struct);
+
 	output_weak2 = x;
 	return 2 * x;
 }
@@ -53,12 +61,17 @@ extern int set_output_val1(int x);
 /* here we'll force set_output_ctx1() to be __hidden in the final obj file */
 __hidden extern void set_output_ctx1(__u64 *ctx);
 
-SEC("raw_tp/sys_enter")
+SEC("?raw_tp/sys_enter")
 int BPF_PROG(handler2, struct pt_regs *regs, long id)
 {
+	static volatile int whatever;
+
 	if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
 		return 0;
 
+	/* make sure we have CO-RE relocations in main program */
+	whatever = bpf_core_type_size(struct task_struct);
+
 	set_output_val1(2000);
 	set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */
 
diff --git a/tools/testing/selftests/bpf/progs/loop5.c b/tools/testing/selftests/bpf/progs/loop5.c
index 913791923fa3..1b13f37f85ec 100644
--- a/tools/testing/selftests/bpf/progs/loop5.c
+++ b/tools/testing/selftests/bpf/progs/loop5.c
@@ -2,7 +2,6 @@
 // Copyright (c) 2019 Facebook
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
-#define barrier() __asm__ __volatile__("": : :"memory")
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
new file mode 100644
index 000000000000..eb8217803493
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct map_value {
+	struct prog_test_ref_kfunc __kptr *unref_ptr;
+	struct prog_test_ref_kfunc __kptr_ref *ref_ptr;
+};
+
+struct array_map {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 1);
+} array_map SEC(".maps");
+
+struct hash_map {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 1);
+} hash_map SEC(".maps");
+
+struct hash_malloc_map {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 1);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+} hash_malloc_map SEC(".maps");
+
+struct lru_hash_map {
+	__uint(type, BPF_MAP_TYPE_LRU_HASH);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 1);
+} lru_hash_map SEC(".maps");
+
+#define DEFINE_MAP_OF_MAP(map_type, inner_map_type, name)       \
+	struct {                                                \
+		__uint(type, map_type);                         \
+		__uint(max_entries, 1);                         \
+		__uint(key_size, sizeof(int));                  \
+		__uint(value_size, sizeof(int));                \
+		__array(values, struct inner_map_type);         \
+	} name SEC(".maps") = {                                 \
+		.values = { [0] = &inner_map_type },            \
+	}
+
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_map, array_of_array_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_map, array_of_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_malloc_map, array_of_hash_malloc_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, lru_hash_map, array_of_lru_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, array_map, hash_of_array_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps);
+
+extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
+extern struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym;
+extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+
+static void test_kptr_unref(struct map_value *v)
+{
+	struct prog_test_ref_kfunc *p;
+
+	p = v->unref_ptr;
+	/* store untrusted_ptr_or_null_ */
+	v->unref_ptr = p;
+	if (!p)
+		return;
+	if (p->a + p->b > 100)
+		return;
+	/* store untrusted_ptr_ */
+	v->unref_ptr = p;
+	/* store NULL */
+	v->unref_ptr = NULL;
+}
+
+static void test_kptr_ref(struct map_value *v)
+{
+	struct prog_test_ref_kfunc *p;
+
+	p = v->ref_ptr;
+	/* store ptr_or_null_ */
+	v->unref_ptr = p;
+	if (!p)
+		return;
+	if (p->a + p->b > 100)
+		return;
+	/* store NULL */
+	p = bpf_kptr_xchg(&v->ref_ptr, NULL);
+	if (!p)
+		return;
+	if (p->a + p->b > 100) {
+		bpf_kfunc_call_test_release(p);
+		return;
+	}
+	/* store ptr_ */
+	v->unref_ptr = p;
+	bpf_kfunc_call_test_release(p);
+
+	p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+	if (!p)
+		return;
+	/* store ptr_ */
+	p = bpf_kptr_xchg(&v->ref_ptr, p);
+	if (!p)
+		return;
+	if (p->a + p->b > 100) {
+		bpf_kfunc_call_test_release(p);
+		return;
+	}
+	bpf_kfunc_call_test_release(p);
+}
+
+static void test_kptr_get(struct map_value *v)
+{
+	struct prog_test_ref_kfunc *p;
+
+	p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0);
+	if (!p)
+		return;
+	if (p->a + p->b > 100) {
+		bpf_kfunc_call_test_release(p);
+		return;
+	}
+	bpf_kfunc_call_test_release(p);
+}
+
+static void test_kptr(struct map_value *v)
+{
+	test_kptr_unref(v);
+	test_kptr_ref(v);
+	test_kptr_get(v);
+}
+
+SEC("tc")
+int test_map_kptr(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+#define TEST(map)					\
+	v = bpf_map_lookup_elem(&map, &key);		\
+	if (!v)						\
+		return 0;				\
+	test_kptr(v)
+
+	TEST(array_map);
+	TEST(hash_map);
+	TEST(hash_malloc_map);
+	TEST(lru_hash_map);
+
+#undef TEST
+	return 0;
+}
+
+SEC("tc")
+int test_map_in_map_kptr(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+	void *map;
+
+#define TEST(map_in_map)                                \
+	map = bpf_map_lookup_elem(&map_in_map, &key);   \
+	if (!map)                                       \
+		return 0;                               \
+	v = bpf_map_lookup_elem(map, &key);		\
+	if (!v)						\
+		return 0;				\
+	test_kptr(v)
+
+	TEST(array_of_array_maps);
+	TEST(array_of_hash_maps);
+	TEST(array_of_hash_malloc_maps);
+	TEST(array_of_lru_hash_maps);
+	TEST(hash_of_array_maps);
+	TEST(hash_of_hash_maps);
+	TEST(hash_of_hash_malloc_maps);
+	TEST(hash_of_lru_hash_maps);
+
+#undef TEST
+	return 0;
+}
+
+SEC("tc")
+int test_map_kptr_ref(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *p, *p_st;
+	unsigned long arg = 0;
+	struct map_value *v;
+	int key = 0, ret;
+
+	p = bpf_kfunc_call_test_acquire(&arg);
+	if (!p)
+		return 1;
+
+	p_st = p->next;
+	if (p_st->cnt.refs.counter != 2) {
+		ret = 2;
+		goto end;
+	}
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v) {
+		ret = 3;
+		goto end;
+	}
+
+	p = bpf_kptr_xchg(&v->ref_ptr, p);
+	if (p) {
+		ret = 4;
+		goto end;
+	}
+	if (p_st->cnt.refs.counter != 2)
+		return 5;
+
+	p = bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0);
+	if (!p)
+		return 6;
+	if (p_st->cnt.refs.counter != 3) {
+		ret = 7;
+		goto end;
+	}
+	bpf_kfunc_call_test_release(p);
+	if (p_st->cnt.refs.counter != 2)
+		return 8;
+
+	p = bpf_kptr_xchg(&v->ref_ptr, NULL);
+	if (!p)
+		return 9;
+	bpf_kfunc_call_test_release(p);
+	if (p_st->cnt.refs.counter != 1)
+		return 10;
+
+	p = bpf_kfunc_call_test_acquire(&arg);
+	if (!p)
+		return 11;
+	p = bpf_kptr_xchg(&v->ref_ptr, p);
+	if (p) {
+		ret = 12;
+		goto end;
+	}
+	if (p_st->cnt.refs.counter != 2)
+		return 13;
+	/* Leave in map */
+
+	return 0;
+end:
+	bpf_kfunc_call_test_release(p);
+	return ret;
+}
+
+SEC("tc")
+int test_map_kptr_ref2(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *p, *p_st;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 1;
+
+	p_st = v->ref_ptr;
+	if (!p_st || p_st->cnt.refs.counter != 2)
+		return 2;
+
+	p = bpf_kptr_xchg(&v->ref_ptr, NULL);
+	if (!p)
+		return 3;
+	if (p_st->cnt.refs.counter != 2) {
+		bpf_kfunc_call_test_release(p);
+		return 4;
+	}
+
+	p = bpf_kptr_xchg(&v->ref_ptr, p);
+	if (p) {
+		bpf_kfunc_call_test_release(p);
+		return 5;
+	}
+	if (p_st->cnt.refs.counter != 2)
+		return 6;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
new file mode 100644
index 000000000000..05e209b1b12a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+struct map_value {
+	char buf[8];
+	struct prog_test_ref_kfunc __kptr *unref_ptr;
+	struct prog_test_ref_kfunc __kptr_ref *ref_ptr;
+	struct prog_test_member __kptr_ref *ref_memb_ptr;
+};
+
+struct array_map {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 1);
+} array_map SEC(".maps");
+
+extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
+extern struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **p, int a, int b) __ksym;
+
+SEC("?tc")
+int size_not_bpf_dw(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	*(u32 *)&v->unref_ptr = 0;
+	return 0;
+}
+
+SEC("?tc")
+int non_const_var_off(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0, id;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	id = ctx->protocol;
+	if (id < 4 || id > 12)
+		return 0;
+	*(u64 *)((void *)v + id) = 0;
+
+	return 0;
+}
+
+SEC("?tc")
+int non_const_var_off_kptr_xchg(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0, id;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	id = ctx->protocol;
+	if (id < 4 || id > 12)
+		return 0;
+	bpf_kptr_xchg((void *)v + id, NULL);
+
+	return 0;
+}
+
+SEC("?tc")
+int misaligned_access_write(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	*(void **)((void *)v + 7) = NULL;
+
+	return 0;
+}
+
+SEC("?tc")
+int misaligned_access_read(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	return *(u64 *)((void *)v + 1);
+}
+
+SEC("?tc")
+int reject_var_off_store(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *unref_ptr;
+	struct map_value *v;
+	int key = 0, id;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	unref_ptr = v->unref_ptr;
+	if (!unref_ptr)
+		return 0;
+	id = ctx->protocol;
+	if (id < 4 || id > 12)
+		return 0;
+	unref_ptr += id;
+	v->unref_ptr = unref_ptr;
+
+	return 0;
+}
+
+SEC("?tc")
+int reject_bad_type_match(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *unref_ptr;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	unref_ptr = v->unref_ptr;
+	if (!unref_ptr)
+		return 0;
+	unref_ptr = (void *)unref_ptr + 4;
+	v->unref_ptr = unref_ptr;
+
+	return 0;
+}
+
+SEC("?tc")
+int marked_as_untrusted_or_null(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	bpf_this_cpu_ptr(v->unref_ptr);
+	return 0;
+}
+
+SEC("?tc")
+int correct_btf_id_check_size(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *p;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	p = v->unref_ptr;
+	if (!p)
+		return 0;
+	return *(int *)((void *)p + bpf_core_type_size(struct prog_test_ref_kfunc));
+}
+
+SEC("?tc")
+int inherit_untrusted_on_walk(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *unref_ptr;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	unref_ptr = v->unref_ptr;
+	if (!unref_ptr)
+		return 0;
+	unref_ptr = unref_ptr->next;
+	bpf_this_cpu_ptr(unref_ptr);
+	return 0;
+}
+
+SEC("?tc")
+int reject_kptr_xchg_on_unref(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	bpf_kptr_xchg(&v->unref_ptr, NULL);
+	return 0;
+}
+
+SEC("?tc")
+int reject_kptr_get_no_map_val(struct __sk_buff *ctx)
+{
+	bpf_kfunc_call_test_kptr_get((void *)&ctx, 0, 0);
+	return 0;
+}
+
+SEC("?tc")
+int reject_kptr_get_no_null_map_val(struct __sk_buff *ctx)
+{
+	bpf_kfunc_call_test_kptr_get(bpf_map_lookup_elem(&array_map, &(int){0}), 0, 0);
+	return 0;
+}
+
+SEC("?tc")
+int reject_kptr_get_no_kptr(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	bpf_kfunc_call_test_kptr_get((void *)v, 0, 0);
+	return 0;
+}
+
+SEC("?tc")
+int reject_kptr_get_on_unref(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	bpf_kfunc_call_test_kptr_get(&v->unref_ptr, 0, 0);
+	return 0;
+}
+
+SEC("?tc")
+int reject_kptr_get_bad_type_match(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	bpf_kfunc_call_test_kptr_get((void *)&v->ref_memb_ptr, 0, 0);
+	return 0;
+}
+
+SEC("?tc")
+int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	bpf_this_cpu_ptr(v->ref_ptr);
+	return 0;
+}
+
+SEC("?tc")
+int reject_untrusted_store_to_ref(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *p;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	p = v->ref_ptr;
+	if (!p)
+		return 0;
+	/* Checkmate, clang */
+	*(struct prog_test_ref_kfunc * volatile *)&v->ref_ptr = p;
+	return 0;
+}
+
+SEC("?tc")
+int reject_untrusted_xchg(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *p;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	p = v->ref_ptr;
+	if (!p)
+		return 0;
+	bpf_kptr_xchg(&v->ref_ptr, p);
+	return 0;
+}
+
+SEC("?tc")
+int reject_bad_type_xchg(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *ref_ptr;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	ref_ptr = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+	if (!ref_ptr)
+		return 0;
+	bpf_kptr_xchg(&v->ref_memb_ptr, ref_ptr);
+	return 0;
+}
+
+SEC("?tc")
+int reject_member_of_ref_xchg(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *ref_ptr;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	ref_ptr = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+	if (!ref_ptr)
+		return 0;
+	bpf_kptr_xchg(&v->ref_memb_ptr, &ref_ptr->memb);
+	return 0;
+}
+
+SEC("?syscall")
+int reject_indirect_helper_access(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	bpf_get_current_comm(v, sizeof(v->buf) + 1);
+	return 0;
+}
+
+__noinline
+int write_func(int *p)
+{
+	return p ? *p = 42 : 0;
+}
+
+SEC("?tc")
+int reject_indirect_global_func_access(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	return write_func((void *)v + 5);
+}
+
+SEC("?tc")
+int kptr_xchg_ref_state(struct __sk_buff *ctx)
+{
+	struct prog_test_ref_kfunc *p;
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+	if (!p)
+		return 0;
+	bpf_kptr_xchg(&v->ref_ptr, p);
+	return 0;
+}
+
+SEC("?tc")
+int kptr_get_ref_state(struct __sk_buff *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&array_map, &key);
+	if (!v)
+		return 0;
+
+	bpf_kfunc_call_test_kptr_get(&v->ref_ptr, 0, 0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/mptcp_sock.c b/tools/testing/selftests/bpf/progs/mptcp_sock.c
new file mode 100644
index 000000000000..91a0d7eff2ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_sock.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Tessares SA. */
+/* Copyright (c) 2022, SUSE. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+__u32 token = 0;
+
+struct mptcp_storage {
+	__u32 invoked;
+	__u32 is_mptcp;
+	struct sock *sk;
+	__u32 token;
+	struct sock *first;
+	char ca_name[TCP_CA_NAME_MAX];
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct mptcp_storage);
+} socket_storage_map SEC(".maps");
+
+SEC("sockops")
+int _sockops(struct bpf_sock_ops *ctx)
+{
+	struct mptcp_storage *storage;
+	struct mptcp_sock *msk;
+	int op = (int)ctx->op;
+	struct tcp_sock *tsk;
+	struct bpf_sock *sk;
+	bool is_mptcp;
+
+	if (op != BPF_SOCK_OPS_TCP_CONNECT_CB)
+		return 1;
+
+	sk = ctx->sk;
+	if (!sk)
+		return 1;
+
+	tsk = bpf_skc_to_tcp_sock(sk);
+	if (!tsk)
+		return 1;
+
+	is_mptcp = bpf_core_field_exists(tsk->is_mptcp) ? tsk->is_mptcp : 0;
+	if (!is_mptcp) {
+		storage = bpf_sk_storage_get(&socket_storage_map, sk, 0,
+					     BPF_SK_STORAGE_GET_F_CREATE);
+		if (!storage)
+			return 1;
+
+		storage->token = 0;
+		__builtin_memset(storage->ca_name, 0, TCP_CA_NAME_MAX);
+		storage->first = NULL;
+	} else {
+		msk = bpf_skc_to_mptcp_sock(sk);
+		if (!msk)
+			return 1;
+
+		storage = bpf_sk_storage_get(&socket_storage_map, msk, 0,
+					     BPF_SK_STORAGE_GET_F_CREATE);
+		if (!storage)
+			return 1;
+
+		storage->token = msk->token;
+		__builtin_memcpy(storage->ca_name, msk->ca_name, TCP_CA_NAME_MAX);
+		storage->first = msk->first;
+	}
+	storage->invoked++;
+	storage->is_mptcp = is_mptcp;
+	storage->sk = (struct sock *)sk;
+
+	return 1;
+}
+
+SEC("fentry/mptcp_pm_new_connection")
+int BPF_PROG(trace_mptcp_pm_new_connection, struct mptcp_sock *msk,
+	     const struct sock *ssk, int server_side)
+{
+	if (!server_side)
+		token = msk->token;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
index b3fcb5274ee0..f793280a3238 100644
--- a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
+++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
@@ -35,10 +35,10 @@ int oncpu(void *ctx)
 	long val;
 
 	val = bpf_get_stackid(ctx, &stackmap, 0);
-	if (val > 0)
+	if (val >= 0)
 		stackid_kernel = 2;
 	val = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
-	if (val > 0)
+	if (val >= 0)
 		stackid_user = 2;
 
 	trace = bpf_map_lookup_elem(&stackdata_map, &key);
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 4896fdf816f7..92331053dba3 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -826,8 +826,9 @@ out:
 
 SEC("kprobe/vfs_link")
 int BPF_KPROBE(kprobe__vfs_link,
-	       struct dentry* old_dentry, struct inode* dir,
-	       struct dentry* new_dentry, struct inode** delegated_inode)
+	       struct dentry* old_dentry, struct user_namespace *mnt_userns,
+	       struct inode* dir, struct dentry* new_dentry,
+	       struct inode** delegated_inode)
 {
 	struct bpf_func_stats_ctx stats_ctx;
 	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
diff --git a/tools/testing/selftests/bpf/progs/profiler1.c b/tools/testing/selftests/bpf/progs/profiler1.c
index 4df9088bfc00..fb6b13522949 100644
--- a/tools/testing/selftests/bpf/progs/profiler1.c
+++ b/tools/testing/selftests/bpf/progs/profiler1.c
@@ -1,6 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
-#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
 #define UNROLL
 #define INLINE __always_inline
 #include "profiler.inc.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index 1ed28882daf3..6c7b1fb268d6 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -171,8 +171,6 @@ struct process_frame_ctx {
 	bool done;
 };
 
-#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
-
 static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx)
 {
 	int zero = 0;
@@ -299,7 +297,11 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
 #ifdef NO_UNROLL
 #pragma clang loop unroll(disable)
 #else
+#ifdef UNROLL_COUNT
+#pragma clang loop unroll_count(UNROLL_COUNT)
+#else
 #pragma clang loop unroll(full)
+#endif
 #endif /* NO_UNROLL */
 		/* Unwind python stack */
 		for (int i = 0; i < STACK_MAX_LEN; ++i) {
diff --git a/tools/testing/selftests/bpf/progs/pyperf600.c b/tools/testing/selftests/bpf/progs/pyperf600.c
index cb49b89e37cd..ce1aa5189cc4 100644
--- a/tools/testing/selftests/bpf/progs/pyperf600.c
+++ b/tools/testing/selftests/bpf/progs/pyperf600.c
@@ -1,9 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Facebook
 #define STACK_MAX_LEN 600
-/* clang will not unroll the loop 600 times.
- * Instead it will unroll it to the amount it deemed
- * appropriate, but the loop will still execute 600 times.
- * Total program size is around 90k insns
+/* Full unroll of 600 iterations will have total
+ * program size close to 298k insns and this may
+ * cause BPF_JMP insn out of 16-bit integer range.
+ * So limit the unroll size to 150 so the
+ * total program size is around 80k insns but
+ * the loop will still execute 600 times.
  */
+#define UNROLL_COUNT 150
 #include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/skb_load_bytes.c b/tools/testing/selftests/bpf/progs/skb_load_bytes.c
new file mode 100644
index 000000000000..e4252fd973be
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/skb_load_bytes.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 load_offset = 0;
+int test_result = 0;
+
+SEC("tc")
+int skb_process(struct __sk_buff *skb)
+{
+	char buf[16];
+
+	test_result = bpf_skb_load_bytes(skb, load_offset, buf, 10);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/strncmp_test.c b/tools/testing/selftests/bpf/progs/strncmp_test.c
index 900d930d48a8..769668feed48 100644
--- a/tools/testing/selftests/bpf/progs/strncmp_test.c
+++ b/tools/testing/selftests/bpf/progs/strncmp_test.c
@@ -19,7 +19,7 @@ unsigned int no_const_str_size = STRNCMP_STR_SZ;
 
 char _license[] SEC("license") = "GPL";
 
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
 int do_strncmp(void *ctx)
 {
 	if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
@@ -29,7 +29,7 @@ int do_strncmp(void *ctx)
 	return 0;
 }
 
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
 int strncmp_bad_not_const_str_size(void *ctx)
 {
 	/* The value of string size is not const, so will fail */
@@ -37,7 +37,7 @@ int strncmp_bad_not_const_str_size(void *ctx)
 	return 0;
 }
 
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
 int strncmp_bad_writable_target(void *ctx)
 {
 	/* Compared target is not read-only, so will fail */
@@ -45,7 +45,7 @@ int strncmp_bad_writable_target(void *ctx)
 	return 0;
 }
 
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
 int strncmp_bad_not_null_term_target(void *ctx)
 {
 	/* Compared target is not null-terminated, so will fail */
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index 8056a4c6d918..ce9acf4db8d2 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -5,38 +5,92 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
 
 int kprobe_res = 0;
+int kprobe2_res = 0;
 int kretprobe_res = 0;
+int kretprobe2_res = 0;
 int uprobe_res = 0;
 int uretprobe_res = 0;
+int uprobe_byname_res = 0;
+int uretprobe_byname_res = 0;
+int uprobe_byname2_res = 0;
+int uretprobe_byname2_res = 0;
 
-SEC("kprobe/sys_nanosleep")
+SEC("kprobe")
 int handle_kprobe(struct pt_regs *ctx)
 {
 	kprobe_res = 1;
 	return 0;
 }
 
-SEC("kretprobe/sys_nanosleep")
-int BPF_KRETPROBE(handle_kretprobe)
+SEC("kprobe/" SYS_PREFIX "sys_nanosleep")
+int BPF_KPROBE(handle_kprobe_auto)
+{
+	kprobe2_res = 11;
+	return 0;
+}
+
+SEC("kretprobe")
+int handle_kretprobe(struct pt_regs *ctx)
 {
 	kretprobe_res = 2;
 	return 0;
 }
 
-SEC("uprobe/trigger_func")
+SEC("kretprobe/" SYS_PREFIX "sys_nanosleep")
+int BPF_KRETPROBE(handle_kretprobe_auto)
+{
+	kretprobe2_res = 22;
+	return 0;
+}
+
+SEC("uprobe")
 int handle_uprobe(struct pt_regs *ctx)
 {
 	uprobe_res = 3;
 	return 0;
 }
 
-SEC("uretprobe/trigger_func")
+SEC("uretprobe")
 int handle_uretprobe(struct pt_regs *ctx)
 {
 	uretprobe_res = 4;
 	return 0;
 }
 
+SEC("uprobe")
+int handle_uprobe_byname(struct pt_regs *ctx)
+{
+	uprobe_byname_res = 5;
+	return 0;
+}
+
+/* use auto-attach format for section definition. */
+SEC("uretprobe//proc/self/exe:trigger_func2")
+int handle_uretprobe_byname(struct pt_regs *ctx)
+{
+	uretprobe_byname_res = 6;
+	return 0;
+}
+
+SEC("uprobe")
+int handle_uprobe_byname2(struct pt_regs *ctx)
+{
+	unsigned int size = PT_REGS_PARM1(ctx);
+
+	/* verify malloc size */
+	if (size == 1)
+		uprobe_byname2_res = 7;
+	return 0;
+}
+
+SEC("uretprobe")
+int handle_uretprobe_byname2(struct pt_regs *ctx)
+{
+	uretprobe_byname2_res = 8;
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
index 2d3a7710e2ce..22d0ac8709b4 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
@@ -4,18 +4,23 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include <errno.h>
 
 int my_tid;
 
-int kprobe_res;
-int kprobe_multi_res;
-int kretprobe_res;
-int uprobe_res;
-int uretprobe_res;
-int tp_res;
-int pe_res;
+__u64 kprobe_res;
+__u64 kprobe_multi_res;
+__u64 kretprobe_res;
+__u64 uprobe_res;
+__u64 uretprobe_res;
+__u64 tp_res;
+__u64 pe_res;
+__u64 fentry_res;
+__u64 fexit_res;
+__u64 fmod_ret_res;
+__u64 lsm_res;
 
-static void update(void *ctx, int *res)
+static void update(void *ctx, __u64 *res)
 {
 	if (my_tid != (u32)bpf_get_current_pid_tgid())
 		return;
@@ -37,14 +42,14 @@ int handle_kretprobe(struct pt_regs *ctx)
 	return 0;
 }
 
-SEC("uprobe/trigger_func")
+SEC("uprobe")
 int handle_uprobe(struct pt_regs *ctx)
 {
 	update(ctx, &uprobe_res);
 	return 0;
 }
 
-SEC("uretprobe/trigger_func")
+SEC("uretprobe")
 int handle_uretprobe(struct pt_regs *ctx)
 {
 	update(ctx, &uretprobe_res);
@@ -82,4 +87,35 @@ int handle_pe(struct pt_regs *ctx)
 	return 0;
 }
 
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(fentry_test1, int a)
+{
+	update(ctx, &fentry_res);
+	return 0;
+}
+
+SEC("fexit/bpf_fentry_test1")
+int BPF_PROG(fexit_test1, int a, int ret)
+{
+	update(ctx, &fexit_res);
+	return 0;
+}
+
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(fmod_ret_test, int _a, int *_b, int _ret)
+{
+	update(ctx, &fmod_ret_res);
+	return 1234;
+}
+
+SEC("lsm/file_mprotect")
+int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
+	     unsigned long reqprot, unsigned long prot, int ret)
+{
+	if (my_tid != (u32)bpf_get_current_pid_tgid())
+		return ret;
+	update(ctx, &lsm_res);
+	return -EPERM;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c
index 7e45e2bdf6cd..5b8a75097ea3 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c
@@ -45,35 +45,34 @@ int test_core_existence(void *ctx)
 	struct core_reloc_existence_output *out = (void *)&data.out;
 
 	out->a_exists = bpf_core_field_exists(in->a);
-	if (bpf_core_field_exists(in->a))
+	if (bpf_core_field_exists(struct core_reloc_existence, a))
 		out->a_value = BPF_CORE_READ(in, a);
 	else
 		out->a_value = 0xff000001u;
 
 	out->b_exists = bpf_core_field_exists(in->b);
-	if (bpf_core_field_exists(in->b))
+	if (bpf_core_field_exists(struct core_reloc_existence, b))
 		out->b_value = BPF_CORE_READ(in, b);
 	else
 		out->b_value = 0xff000002u;
 
 	out->c_exists = bpf_core_field_exists(in->c);
-	if (bpf_core_field_exists(in->c))
+	if (bpf_core_field_exists(struct core_reloc_existence, c))
 		out->c_value = BPF_CORE_READ(in, c);
 	else
 		out->c_value = 0xff000003u;
 
 	out->arr_exists = bpf_core_field_exists(in->arr);
-	if (bpf_core_field_exists(in->arr))
+	if (bpf_core_field_exists(struct core_reloc_existence, arr))
 		out->arr_value = BPF_CORE_READ(in, arr[0]);
 	else
 		out->arr_value = 0xff000004u;
 
 	out->s_exists = bpf_core_field_exists(in->s);
-	if (bpf_core_field_exists(in->s))
+	if (bpf_core_field_exists(struct core_reloc_existence, s))
 		out->s_value = BPF_CORE_READ(in, s.x);
 	else
 		out->s_value = 0xff000005u;
 
 	return 0;
 }
-
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
index 7b2d576aeea1..5b686053ce42 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
@@ -15,13 +15,21 @@ struct {
 
 struct core_reloc_size_output {
 	int int_sz;
+	int int_off;
 	int struct_sz;
+	int struct_off;
 	int union_sz;
+	int union_off;
 	int arr_sz;
+	int arr_off;
 	int arr_elem_sz;
+	int arr_elem_off;
 	int ptr_sz;
+	int ptr_off;
 	int enum_sz;
+	int enum_off;
 	int float_sz;
+	int float_off;
 };
 
 struct core_reloc_size {
@@ -41,13 +49,28 @@ int test_core_size(void *ctx)
 	struct core_reloc_size_output *out = (void *)&data.out;
 
 	out->int_sz = bpf_core_field_size(in->int_field);
+	out->int_off = bpf_core_field_offset(in->int_field);
+
 	out->struct_sz = bpf_core_field_size(in->struct_field);
+	out->struct_off = bpf_core_field_offset(in->struct_field);
+
 	out->union_sz = bpf_core_field_size(in->union_field);
+	out->union_off = bpf_core_field_offset(in->union_field);
+
 	out->arr_sz = bpf_core_field_size(in->arr_field);
-	out->arr_elem_sz = bpf_core_field_size(in->arr_field[0]);
-	out->ptr_sz = bpf_core_field_size(in->ptr_field);
-	out->enum_sz = bpf_core_field_size(in->enum_field);
-	out->float_sz = bpf_core_field_size(in->float_field);
+	out->arr_off = bpf_core_field_offset(in->arr_field);
+
+	out->arr_elem_sz = bpf_core_field_size(struct core_reloc_size, arr_field[1]);
+	out->arr_elem_off = bpf_core_field_offset(struct core_reloc_size, arr_field[1]);
+
+	out->ptr_sz = bpf_core_field_size(struct core_reloc_size, ptr_field);
+	out->ptr_off = bpf_core_field_offset(struct core_reloc_size, ptr_field);
+
+	out->enum_sz = bpf_core_field_size(struct core_reloc_size, enum_field);
+	out->enum_off = bpf_core_field_offset(struct core_reloc_size, enum_field);
+
+	out->float_sz = bpf_core_field_size(struct core_reloc_size, float_field);
+	out->float_off = bpf_core_field_offset(struct core_reloc_size, float_field);
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/test_global_func17.c b/tools/testing/selftests/bpf/progs/test_global_func17.c
new file mode 100644
index 000000000000..2b8b9b8ba018
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func17.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+__noinline int foo(int *p)
+{
+	return p ? (*p = 42) : 0;
+}
+
+const volatile int i;
+
+SEC("tc")
+int test_cls(struct __sk_buff *skb)
+{
+	return foo((int *)&i);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_helper_restricted.c b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
index 68d64c365f90..20ef9d433b97 100644
--- a/tools/testing/selftests/bpf/progs/test_helper_restricted.c
+++ b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
@@ -56,7 +56,7 @@ static void spin_lock_work(void)
 	}
 }
 
-SEC("raw_tp/sys_enter")
+SEC("?raw_tp/sys_enter")
 int raw_tp_timer(void *ctx)
 {
 	timer_work();
@@ -64,7 +64,7 @@ int raw_tp_timer(void *ctx)
 	return 0;
 }
 
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
 int tp_timer(void *ctx)
 {
 	timer_work();
@@ -72,7 +72,7 @@ int tp_timer(void *ctx)
 	return 0;
 }
 
-SEC("kprobe/sys_nanosleep")
+SEC("?kprobe/sys_nanosleep")
 int kprobe_timer(void *ctx)
 {
 	timer_work();
@@ -80,7 +80,7 @@ int kprobe_timer(void *ctx)
 	return 0;
 }
 
-SEC("perf_event")
+SEC("?perf_event")
 int perf_event_timer(void *ctx)
 {
 	timer_work();
@@ -88,7 +88,7 @@ int perf_event_timer(void *ctx)
 	return 0;
 }
 
-SEC("raw_tp/sys_enter")
+SEC("?raw_tp/sys_enter")
 int raw_tp_spin_lock(void *ctx)
 {
 	spin_lock_work();
@@ -96,7 +96,7 @@ int raw_tp_spin_lock(void *ctx)
 	return 0;
 }
 
-SEC("tp/syscalls/sys_enter_nanosleep")
+SEC("?tp/syscalls/sys_enter_nanosleep")
 int tp_spin_lock(void *ctx)
 {
 	spin_lock_work();
@@ -104,7 +104,7 @@ int tp_spin_lock(void *ctx)
 	return 0;
 }
 
-SEC("kprobe/sys_nanosleep")
+SEC("?kprobe/sys_nanosleep")
 int kprobe_spin_lock(void *ctx)
 {
 	spin_lock_work();
@@ -112,7 +112,7 @@ int kprobe_spin_lock(void *ctx)
 	return 0;
 }
 
-SEC("perf_event")
+SEC("?perf_event")
 int perf_event_spin_lock(void *ctx)
 {
 	spin_lock_work();
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
index 2180c41cd890..a72a5bf3812a 100644
--- a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
@@ -8,7 +8,7 @@
 extern const int bpf_prog_active __ksym; /* int type global var. */
 
 SEC("raw_tp/sys_enter")
-int handler(const void *ctx)
+int handler1(const void *ctx)
 {
 	int *active;
 	__u32 cpu;
@@ -26,4 +26,20 @@ int handler(const void *ctx)
 	return 0;
 }
 
+__noinline int write_active(int *p)
+{
+	return p ? (*p = 42) : 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handler2(const void *ctx)
+{
+	int *active;
+	__u32 cpu;
+
+	active = bpf_this_cpu_ptr(&bpf_prog_active);
+	write_active(active);
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
index 19e4d2071c60..c8bc0c6947aa 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
@@ -218,7 +218,7 @@ static __noinline bool get_packet_dst(struct real_definition **real,
 
 	if (hash != 0x358459b7 /* jhash of ipv4 packet */  &&
 	    hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
-		return 0;
+		return false;
 
 	real_pos = bpf_map_lookup_elem(&ch_rings, &key);
 	if (!real_pos)
diff --git a/tools/testing/selftests/bpf/progs/test_log_fixup.c b/tools/testing/selftests/bpf/progs/test_log_fixup.c
new file mode 100644
index 000000000000..60450cb0e72e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_log_fixup.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+struct task_struct___bad {
+	int pid;
+	int fake_field;
+	void *fake_field_subprog;
+} __attribute__((preserve_access_index));
+
+SEC("?raw_tp/sys_enter")
+int bad_relo(const void *ctx)
+{
+	static struct task_struct___bad *t;
+
+	return bpf_core_field_size(t->fake_field);
+}
+
+static __noinline int bad_subprog(void)
+{
+	static struct task_struct___bad *t;
+
+	/* ugliness below is a field offset relocation */
+	return (void *)&t->fake_field_subprog - (void *)t;
+}
+
+SEC("?raw_tp/sys_enter")
+int bad_relo_subprog(const void *ctx)
+{
+	static struct task_struct___bad *t;
+
+	return bad_subprog() + bpf_core_field_size(t->pid);
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} existing_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} missing_map SEC(".maps");
+
+SEC("?raw_tp/sys_enter")
+int use_missing_map(const void *ctx)
+{
+	int zero = 0, *value;
+
+	value = bpf_map_lookup_elem(&existing_map, &zero);
+
+	value = bpf_map_lookup_elem(&missing_map, &zero);
+
+	return value != NULL;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c b/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c
new file mode 100644
index 000000000000..ca827b1092da
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Bytedance */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u64 percpu_array_elem_sum = 0;
+__u64 percpu_hash_elem_sum = 0;
+__u64 percpu_lru_hash_elem_sum = 0;
+const volatile int nr_cpus;
+const volatile int my_pid;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} percpu_array_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u64);
+	__type(value, __u64);
+} percpu_hash_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u64);
+	__type(value, __u64);
+} percpu_lru_hash_map SEC(".maps");
+
+struct read_percpu_elem_ctx {
+	void *map;
+	__u64 sum;
+};
+
+static int read_percpu_elem_callback(__u32 index, struct read_percpu_elem_ctx *ctx)
+{
+	__u64 key = 0;
+	__u64 *value;
+
+	value = bpf_map_lookup_percpu_elem(ctx->map, &key, index);
+	if (value)
+		ctx->sum += *value;
+	return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getuid")
+int sysenter_getuid(const void *ctx)
+{
+	struct read_percpu_elem_ctx map_ctx;
+
+	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	map_ctx.map = &percpu_array_map;
+	map_ctx.sum = 0;
+	bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0);
+	percpu_array_elem_sum = map_ctx.sum;
+
+	map_ctx.map = &percpu_hash_map;
+	map_ctx.sum = 0;
+	bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0);
+	percpu_hash_elem_sum = map_ctx.sum;
+
+	map_ctx.map = &percpu_lru_hash_map;
+	map_ctx.sum = 0;
+	bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0);
+	percpu_lru_hash_elem_sum = map_ctx.sum;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index 50ce16d02da7..08628afedb77 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -64,7 +64,7 @@ int BPF_PROG(handle_fentry,
 
 __u32 fentry_manual_read_sz = 0;
 
-SEC("fentry/placeholder")
+SEC("fentry")
 int BPF_PROG(handle_fentry_manual,
 	     struct file *file, struct kobject *kobj,
 	     struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c
index 0558544e1ff0..5cd7c096f62d 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c
@@ -14,8 +14,6 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
-#define barrier() __asm__ __volatile__("": : :"memory")
-
 /* llvm will optimize both subprograms into exactly the same BPF assembly
  *
  * Disassembly of section .text:
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
index 197b86546dca..e416e0ce12b7 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
@@ -15,6 +15,8 @@ struct sample {
 
 struct ringbuf_map {
 	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	/* libbpf will adjust to valid page size */
+	__uint(max_entries, 1000);
 } ringbuf1 SEC(".maps"),
   ringbuf2 SEC(".maps");
 
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c
index 02f79356d5eb..98c6493d9b91 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_assign.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c
@@ -89,7 +89,6 @@ get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
 static inline int
 handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
 {
-	struct bpf_sock_tuple ln = {0};
 	struct bpf_sock *sk;
 	const int zero = 0;
 	size_t tuple_len;
@@ -121,7 +120,6 @@ assign:
 static inline int
 handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
 {
-	struct bpf_sock_tuple ln = {0};
 	struct bpf_sock *sk;
 	const int zero = 0;
 	size_t tuple_len;
@@ -161,7 +159,7 @@ assign:
 SEC("tc")
 int bpf_sk_assign_test(struct __sk_buff *skb)
 {
-	struct bpf_sock_tuple *tuple, ln = {0};
+	struct bpf_sock_tuple *tuple;
 	bool ipv4 = false;
 	bool tcp = false;
 	int tuple_len;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index 40f161480a2f..b502e5c92e33 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -52,7 +52,7 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
 	return result;
 }
 
-SEC("tc")
+SEC("?tc")
 int sk_lookup_success(struct __sk_buff *skb)
 {
 	void *data_end = (void *)(long)skb->data_end;
@@ -78,7 +78,7 @@ int sk_lookup_success(struct __sk_buff *skb)
 	return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
 }
 
-SEC("tc")
+SEC("?tc")
 int sk_lookup_success_simple(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
@@ -90,7 +90,7 @@ int sk_lookup_success_simple(struct __sk_buff *skb)
 	return 0;
 }
 
-SEC("tc")
+SEC("?tc")
 int err_use_after_free(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
@@ -105,7 +105,7 @@ int err_use_after_free(struct __sk_buff *skb)
 	return family;
 }
 
-SEC("tc")
+SEC("?tc")
 int err_modify_sk_pointer(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
@@ -120,7 +120,7 @@ int err_modify_sk_pointer(struct __sk_buff *skb)
 	return 0;
 }
 
-SEC("tc")
+SEC("?tc")
 int err_modify_sk_or_null_pointer(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
@@ -134,7 +134,7 @@ int err_modify_sk_or_null_pointer(struct __sk_buff *skb)
 	return 0;
 }
 
-SEC("tc")
+SEC("?tc")
 int err_no_release(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
@@ -143,7 +143,7 @@ int err_no_release(struct __sk_buff *skb)
 	return 0;
 }
 
-SEC("tc")
+SEC("?tc")
 int err_release_twice(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
@@ -155,7 +155,7 @@ int err_release_twice(struct __sk_buff *skb)
 	return 0;
 }
 
-SEC("tc")
+SEC("?tc")
 int err_release_unchecked(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
@@ -172,7 +172,7 @@ void lookup_no_release(struct __sk_buff *skb)
 	bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
 }
 
-SEC("tc")
+SEC("?tc")
 int err_no_release_subcall(struct __sk_buff *skb)
 {
 	lookup_no_release(skb);
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c
index b7c37ca09544..f8e9256cf18d 100644
--- a/tools/testing/selftests/bpf/progs/test_subprogs.c
+++ b/tools/testing/selftests/bpf/progs/test_subprogs.c
@@ -89,6 +89,11 @@ int prog2(void *ctx)
 	return 0;
 }
 
+static int empty_callback(__u32 index, void *data)
+{
+	return 0;
+}
+
 /* prog3 has the same section name as prog1 */
 SEC("raw_tp/sys_enter")
 int prog3(void *ctx)
@@ -98,6 +103,9 @@ int prog3(void *ctx)
 	if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
 		return 1;
 
+	/* test that ld_imm64 with BPF_PSEUDO_FUNC doesn't get blinded */
+	bpf_loop(1, empty_callback, NULL, 0);
+
 	res3 = sub3(5) + 6; /* (5 + 3 + (4 + 1)) + 6 = 19 */
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
index e6cb09259408..1926facba122 100644
--- a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
+++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
@@ -14,7 +14,7 @@ char current_regs[PT_REGS_SIZE] = {};
 char ctx_regs[PT_REGS_SIZE] = {};
 int uprobe_res = 0;
 
-SEC("uprobe/trigger_func")
+SEC("uprobe")
 int handle_uprobe(struct pt_regs *ctx)
 {
 	struct task_struct *current;
diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
index f030e469d05b..7765720da7d5 100644
--- a/tools/testing/selftests/bpf/progs/test_trampoline_count.c
+++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
@@ -1,20 +1,22 @@
 // SPDX-License-Identifier: GPL-2.0
-#include <stdbool.h>
-#include <stddef.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
-struct task_struct;
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(fentry_test, int a, int *b)
+{
+	return 0;
+}
 
-SEC("fentry/__set_task_comm")
-int BPF_PROG(prog1, struct task_struct *tsk, const char *buf, bool exec)
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(fmod_ret_test, int a, int *b, int ret)
 {
 	return 0;
 }
 
-SEC("fexit/__set_task_comm")
-int BPF_PROG(prog2, struct task_struct *tsk, const char *buf, bool exec)
+SEC("fexit/bpf_modify_return_test")
+int BPF_PROG(fexit_test, int a, int *b, int ret)
 {
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index ef0dde83b85a..17f2f325b3f3 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -21,10 +21,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
-#define ERROR(ret) do {\
-		char fmt[] = "ERROR line:%d ret:%d\n";\
-		bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
-	} while (0)
+#define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret)
 
 struct geneve_opt {
 	__be16	opt_class;
@@ -40,8 +37,15 @@ struct vxlan_metadata {
 	__u32     gbp;
 };
 
-SEC("gre_set_tunnel")
-int _gre_set_tunnel(struct __sk_buff *skb)
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} local_ip_map SEC(".maps");
+
+SEC("tc")
+int gre_set_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
@@ -55,32 +59,31 @@ int _gre_set_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	return TC_ACT_OK;
 }
 
-SEC("gre_get_tunnel")
-int _gre_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int gre_get_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
-	char fmt[] = "key %d remote ip 0x%x\n";
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4);
+	bpf_printk("key %d remote ip 0x%x\n", key.tunnel_id, key.remote_ipv4);
 	return TC_ACT_OK;
 }
 
-SEC("ip6gretap_set_tunnel")
-int _ip6gretap_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6gretap_set_tunnel(struct __sk_buff *skb)
 {
 	struct bpf_tunnel_key key;
 	int ret;
@@ -96,35 +99,34 @@ int _ip6gretap_set_tunnel(struct __sk_buff *skb)
 				     BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
 				     BPF_F_SEQ_NUMBER);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	return TC_ACT_OK;
 }
 
-SEC("ip6gretap_get_tunnel")
-int _ip6gretap_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6gretap_get_tunnel(struct __sk_buff *skb)
 {
-	char fmt[] = "key %d remote ip6 ::%x label %x\n";
 	struct bpf_tunnel_key key;
 	int ret;
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt),
-			 key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+	bpf_printk("key %d remote ip6 ::%x label %x\n",
+		   key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
 
 	return TC_ACT_OK;
 }
 
-SEC("erspan_set_tunnel")
-int _erspan_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int erspan_set_tunnel(struct __sk_buff *skb)
 {
 	struct bpf_tunnel_key key;
 	struct erspan_metadata md;
@@ -139,7 +141,7 @@ int _erspan_set_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_ZERO_CSUM_TX);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -159,17 +161,16 @@ int _erspan_set_tunnel(struct __sk_buff *skb)
 
 	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	return TC_ACT_OK;
 }
 
-SEC("erspan_get_tunnel")
-int _erspan_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int erspan_get_tunnel(struct __sk_buff *skb)
 {
-	char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
 	struct bpf_tunnel_key key;
 	struct erspan_metadata md;
 	__u32 index;
@@ -177,38 +178,34 @@ int _erspan_get_tunnel(struct __sk_buff *skb)
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt),
-			key.tunnel_id, key.remote_ipv4, md.version);
+	bpf_printk("key %d remote ip 0x%x erspan version %d\n",
+		   key.tunnel_id, key.remote_ipv4, md.version);
 
 #ifdef ERSPAN_V1
-	char fmt2[] = "\tindex %x\n";
-
 	index = bpf_ntohl(md.u.index);
-	bpf_trace_printk(fmt2, sizeof(fmt2), index);
+	bpf_printk("\tindex %x\n", index);
 #else
-	char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
-
-	bpf_trace_printk(fmt2, sizeof(fmt2),
-			 md.u.md2.dir,
-			 (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
-			 bpf_ntohl(md.u.md2.timestamp));
+	bpf_printk("\tdirection %d hwid %x timestamp %u\n",
+		   md.u.md2.dir,
+		   (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+		   bpf_ntohl(md.u.md2.timestamp));
 #endif
 
 	return TC_ACT_OK;
 }
 
-SEC("ip4ip6erspan_set_tunnel")
-int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
 {
 	struct bpf_tunnel_key key;
 	struct erspan_metadata md;
@@ -223,7 +220,7 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -244,17 +241,16 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
 
 	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	return TC_ACT_OK;
 }
 
-SEC("ip4ip6erspan_get_tunnel")
-int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
 {
-	char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
 	struct bpf_tunnel_key key;
 	struct erspan_metadata md;
 	__u32 index;
@@ -263,44 +259,88 @@ int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt),
-			key.tunnel_id, key.remote_ipv4, md.version);
+	bpf_printk("ip6erspan get key %d remote ip6 ::%x erspan version %d\n",
+		   key.tunnel_id, key.remote_ipv4, md.version);
 
 #ifdef ERSPAN_V1
-	char fmt2[] = "\tindex %x\n";
-
 	index = bpf_ntohl(md.u.index);
-	bpf_trace_printk(fmt2, sizeof(fmt2), index);
+	bpf_printk("\tindex %x\n", index);
 #else
-	char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
-
-	bpf_trace_printk(fmt2, sizeof(fmt2),
-			 md.u.md2.dir,
-			 (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
-			 bpf_ntohl(md.u.md2.timestamp));
+	bpf_printk("\tdirection %d hwid %x timestamp %u\n",
+		   md.u.md2.dir,
+		   (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
+		   bpf_ntohl(md.u.md2.timestamp));
 #endif
 
 	return TC_ACT_OK;
 }
 
-SEC("vxlan_set_tunnel")
-int _vxlan_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int vxlan_set_tunnel_dst(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
 	struct vxlan_metadata md;
+	__u32 index = 0;
+	__u32 *local_ip = NULL;
+
+	local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+	if (!local_ip) {
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.local_ipv4 = 0xac100164; /* 172.16.1.100 */
+	key.remote_ipv4 = *local_ip;
+	key.tunnel_id = 2;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_ZERO_CSUM_TX);
+	if (ret < 0) {
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
+
+	md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
+	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+	if (ret < 0) {
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("tc")
+int vxlan_set_tunnel_src(struct __sk_buff *skb)
+{
+	int ret;
+	struct bpf_tunnel_key key;
+	struct vxlan_metadata md;
+	__u32 index = 0;
+	__u32 *local_ip = NULL;
+
+	local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+	if (!local_ip) {
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
 
 	__builtin_memset(&key, 0x0, sizeof(key));
+	key.local_ipv4 = *local_ip;
 	key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
 	key.tunnel_id = 2;
 	key.tunnel_tos = 0;
@@ -309,53 +349,106 @@ int _vxlan_set_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_ZERO_CSUM_TX);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
 	ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	return TC_ACT_OK;
 }
 
-SEC("vxlan_get_tunnel")
-int _vxlan_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int vxlan_get_tunnel_src(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
 	struct vxlan_metadata md;
-	char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
+	__u32 index = 0;
+	__u32 *local_ip = NULL;
+
+	local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+	if (!local_ip) {
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt),
-			key.tunnel_id, key.remote_ipv4, md.gbp);
+	if (key.local_ipv4 != *local_ip || md.gbp != 0x800FF) {
+		bpf_printk("vxlan key %d local ip 0x%x remote ip 0x%x gbp 0x%x\n",
+			   key.tunnel_id, key.local_ipv4,
+			   key.remote_ipv4, md.gbp);
+		bpf_printk("local_ip 0x%x\n", *local_ip);
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
 
 	return TC_ACT_OK;
 }
 
-SEC("ip6vxlan_set_tunnel")
-int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb)
 {
 	struct bpf_tunnel_key key;
 	int ret;
+	__u32 index = 0;
+	__u32 *local_ip;
+
+	local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+	if (!local_ip) {
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
 
 	__builtin_memset(&key, 0x0, sizeof(key));
+	key.local_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+	key.remote_ipv6[3] = bpf_htonl(*local_ip);
+	key.tunnel_id = 22;
+	key.tunnel_tos = 0;
+	key.tunnel_ttl = 64;
+
+	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_IPV6);
+	if (ret < 0) {
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
+
+	return TC_ACT_OK;
+}
+
+SEC("tc")
+int ip6vxlan_set_tunnel_src(struct __sk_buff *skb)
+{
+	struct bpf_tunnel_key key;
+	int ret;
+	__u32 index = 0;
+	__u32 *local_ip;
+
+	local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+	if (!local_ip) {
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
+
+	__builtin_memset(&key, 0x0, sizeof(key));
+	key.local_ipv6[3] = bpf_htonl(*local_ip);
 	key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
 	key.tunnel_id = 22;
 	key.tunnel_tos = 0;
@@ -364,35 +457,48 @@ int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	return TC_ACT_OK;
 }
 
-SEC("ip6vxlan_get_tunnel")
-int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6vxlan_get_tunnel_src(struct __sk_buff *skb)
 {
-	char fmt[] = "key %d remote ip6 ::%x label %x\n";
 	struct bpf_tunnel_key key;
 	int ret;
+	__u32 index = 0;
+	__u32 *local_ip;
+
+	local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+	if (!local_ip) {
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt),
-			 key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+	if (bpf_ntohl(key.local_ipv6[3]) != *local_ip) {
+		bpf_printk("ip6vxlan key %d local ip6 ::%x remote ip6 ::%x label 0x%x\n",
+			   key.tunnel_id, bpf_ntohl(key.local_ipv6[3]),
+			   bpf_ntohl(key.remote_ipv6[3]), key.tunnel_label);
+		bpf_printk("local_ip 0x%x\n", *local_ip);
+		log_err(ret);
+		return TC_ACT_SHOT;
+	}
 
 	return TC_ACT_OK;
 }
 
-SEC("geneve_set_tunnel")
-int _geneve_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int geneve_set_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
@@ -416,30 +522,29 @@ int _geneve_set_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_ZERO_CSUM_TX);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	return TC_ACT_OK;
 }
 
-SEC("geneve_get_tunnel")
-int _geneve_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int geneve_get_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
 	struct geneve_opt gopt;
-	char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -447,13 +552,13 @@ int _geneve_get_tunnel(struct __sk_buff *skb)
 	if (ret < 0)
 		gopt.opt_class = 0;
 
-	bpf_trace_printk(fmt, sizeof(fmt),
-			key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+	bpf_printk("key %d remote ip 0x%x geneve class 0x%x\n",
+		   key.tunnel_id, key.remote_ipv4, gopt.opt_class);
 	return TC_ACT_OK;
 }
 
-SEC("ip6geneve_set_tunnel")
-int _ip6geneve_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6geneve_set_tunnel(struct __sk_buff *skb)
 {
 	struct bpf_tunnel_key key;
 	struct geneve_opt gopt;
@@ -468,7 +573,7 @@ int _ip6geneve_set_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -483,17 +588,16 @@ int _ip6geneve_set_tunnel(struct __sk_buff *skb)
 
 	ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	return TC_ACT_OK;
 }
 
-SEC("ip6geneve_get_tunnel")
-int _ip6geneve_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6geneve_get_tunnel(struct __sk_buff *skb)
 {
-	char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
 	struct bpf_tunnel_key key;
 	struct geneve_opt gopt;
 	int ret;
@@ -501,7 +605,7 @@ int _ip6geneve_get_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
@@ -509,14 +613,14 @@ int _ip6geneve_get_tunnel(struct __sk_buff *skb)
 	if (ret < 0)
 		gopt.opt_class = 0;
 
-	bpf_trace_printk(fmt, sizeof(fmt),
-			key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+	bpf_printk("key %d remote ip 0x%x geneve class 0x%x\n",
+		   key.tunnel_id, key.remote_ipv4, gopt.opt_class);
 
 	return TC_ACT_OK;
 }
 
-SEC("ipip_set_tunnel")
-int _ipip_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ipip_set_tunnel(struct __sk_buff *skb)
 {
 	struct bpf_tunnel_key key = {};
 	void *data = (void *)(long)skb->data;
@@ -526,7 +630,7 @@ int _ipip_set_tunnel(struct __sk_buff *skb)
 
 	/* single length check */
 	if (data + sizeof(*iph) > data_end) {
-		ERROR(1);
+		log_err(1);
 		return TC_ACT_SHOT;
 	}
 
@@ -537,32 +641,31 @@ int _ipip_set_tunnel(struct __sk_buff *skb)
 
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	return TC_ACT_OK;
 }
 
-SEC("ipip_get_tunnel")
-int _ipip_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ipip_get_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
-	char fmt[] = "remote ip 0x%x\n";
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
+	bpf_printk("remote ip 0x%x\n", key.remote_ipv4);
 	return TC_ACT_OK;
 }
 
-SEC("ipip6_set_tunnel")
-int _ipip6_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ipip6_set_tunnel(struct __sk_buff *skb)
 {
 	struct bpf_tunnel_key key = {};
 	void *data = (void *)(long)skb->data;
@@ -572,7 +675,7 @@ int _ipip6_set_tunnel(struct __sk_buff *skb)
 
 	/* single length check */
 	if (data + sizeof(*iph) > data_end) {
-		ERROR(1);
+		log_err(1);
 		return TC_ACT_SHOT;
 	}
 
@@ -585,34 +688,33 @@ int _ipip6_set_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	return TC_ACT_OK;
 }
 
-SEC("ipip6_get_tunnel")
-int _ipip6_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ipip6_get_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
-	char fmt[] = "remote ip6 %x::%x\n";
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
-			 bpf_htonl(key.remote_ipv6[3]));
+	bpf_printk("remote ip6 %x::%x\n", bpf_htonl(key.remote_ipv6[0]),
+		   bpf_htonl(key.remote_ipv6[3]));
 	return TC_ACT_OK;
 }
 
-SEC("ip6ip6_set_tunnel")
-int _ip6ip6_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6ip6_set_tunnel(struct __sk_buff *skb)
 {
 	struct bpf_tunnel_key key = {};
 	void *data = (void *)(long)skb->data;
@@ -622,7 +724,7 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb)
 
 	/* single length check */
 	if (data + sizeof(*iph) > data_end) {
-		ERROR(1);
+		log_err(1);
 		return TC_ACT_SHOT;
 	}
 
@@ -634,45 +736,44 @@ int _ip6ip6_set_tunnel(struct __sk_buff *skb)
 	ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
 	return TC_ACT_OK;
 }
 
-SEC("ip6ip6_get_tunnel")
-int _ip6ip6_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6ip6_get_tunnel(struct __sk_buff *skb)
 {
 	int ret;
 	struct bpf_tunnel_key key;
-	char fmt[] = "remote ip6 %x::%x\n";
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
 				     BPF_F_TUNINFO_IPV6);
 	if (ret < 0) {
-		ERROR(ret);
+		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
-			 bpf_htonl(key.remote_ipv6[3]));
+	bpf_printk("remote ip6 %x::%x\n", bpf_htonl(key.remote_ipv6[0]),
+		   bpf_htonl(key.remote_ipv6[3]));
 	return TC_ACT_OK;
 }
 
-SEC("xfrm_get_state")
-int _xfrm_get_state(struct __sk_buff *skb)
+SEC("tc")
+int xfrm_get_state(struct __sk_buff *skb)
 {
 	struct bpf_xfrm_state x;
-	char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
 	int ret;
 
 	ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0);
 	if (ret < 0)
 		return TC_ACT_OK;
 
-	bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi),
-			 bpf_ntohl(x.remote_ipv4));
+	bpf_printk("reqid %d spi 0x%x remote ip 0x%x\n",
+		   x.reqid, bpf_ntohl(x.spi),
+		   bpf_ntohl(x.remote_ipv4));
 	return TC_ACT_OK;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c
new file mode 100644
index 000000000000..fc423e43a3cd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+__u32 perfbuf_val = 0;
+__u32 ringbuf_val = 0;
+
+int test_pid;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} array SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} percpu_array SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} hash SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} percpu_hash SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__type(key, __u32);
+	__type(value, __u32);
+} perfbuf SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 1 << 12);
+} ringbuf SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} prog_array SEC(".maps");
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int sys_nanosleep_enter(void *ctx)
+{
+	int cur_pid;
+
+	cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (cur_pid != test_pid)
+		return 0;
+
+	bpf_perf_event_output(ctx, &perfbuf, BPF_F_CURRENT_CPU, &perfbuf_val, sizeof(perfbuf_val));
+	bpf_ringbuf_output(&ringbuf, &ringbuf_val, sizeof(ringbuf_val), 0);
+
+	return 0;
+}
+
+SEC("perf_event")
+int handle_perf_event(void *ctx)
+{
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
new file mode 100644
index 000000000000..ab75522e2eeb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int uprobe_byname_parm1 = 0;
+int uprobe_byname_ran = 0;
+int uretprobe_byname_rc = 0;
+int uretprobe_byname_ran = 0;
+size_t uprobe_byname2_parm1 = 0;
+int uprobe_byname2_ran = 0;
+char *uretprobe_byname2_rc = NULL;
+int uretprobe_byname2_ran = 0;
+
+int test_pid;
+
+/* This program cannot auto-attach, but that should not stop other
+ * programs from attaching.
+ */
+SEC("uprobe")
+int handle_uprobe_noautoattach(struct pt_regs *ctx)
+{
+	return 0;
+}
+
+SEC("uprobe//proc/self/exe:autoattach_trigger_func")
+int handle_uprobe_byname(struct pt_regs *ctx)
+{
+	uprobe_byname_parm1 = PT_REGS_PARM1_CORE(ctx);
+	uprobe_byname_ran = 1;
+	return 0;
+}
+
+SEC("uretprobe//proc/self/exe:autoattach_trigger_func")
+int handle_uretprobe_byname(struct pt_regs *ctx)
+{
+	uretprobe_byname_rc = PT_REGS_RC_CORE(ctx);
+	uretprobe_byname_ran = 2;
+	return 0;
+}
+
+
+SEC("uprobe/libc.so.6:malloc")
+int handle_uprobe_byname2(struct pt_regs *ctx)
+{
+	int pid = bpf_get_current_pid_tgid() >> 32;
+
+	/* ignore irrelevant invocations */
+	if (test_pid != pid)
+		return 0;
+	uprobe_byname2_parm1 = PT_REGS_PARM1_CORE(ctx);
+	uprobe_byname2_ran = 3;
+	return 0;
+}
+
+SEC("uretprobe/libc.so.6:malloc")
+int handle_uretprobe_byname2(struct pt_regs *ctx)
+{
+	int pid = bpf_get_current_pid_tgid() >> 32;
+
+	/* ignore irrelevant invocations */
+	if (test_pid != pid)
+		return 0;
+	uretprobe_byname2_rc = (char *)PT_REGS_RC_CORE(ctx);
+	uretprobe_byname2_ran = 4;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_urandom_usdt.c b/tools/testing/selftests/bpf/progs/test_urandom_usdt.c
new file mode 100644
index 000000000000..3539b02bd5f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_urandom_usdt.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+int urand_pid;
+
+int urand_read_without_sema_call_cnt;
+int urand_read_without_sema_buf_sz_sum;
+
+SEC("usdt/./urandom_read:urand:read_without_sema")
+int BPF_USDT(urand_read_without_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+	if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&urand_read_without_sema_call_cnt, 1);
+	__sync_fetch_and_add(&urand_read_without_sema_buf_sz_sum, buf_sz);
+
+	return 0;
+}
+
+int urand_read_with_sema_call_cnt;
+int urand_read_with_sema_buf_sz_sum;
+
+SEC("usdt/./urandom_read:urand:read_with_sema")
+int BPF_USDT(urand_read_with_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+	if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&urand_read_with_sema_call_cnt, 1);
+	__sync_fetch_and_add(&urand_read_with_sema_buf_sz_sum, buf_sz);
+
+	return 0;
+}
+
+int urandlib_read_without_sema_call_cnt;
+int urandlib_read_without_sema_buf_sz_sum;
+
+SEC("usdt/./liburandom_read.so:urandlib:read_without_sema")
+int BPF_USDT(urandlib_read_without_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+	if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&urandlib_read_without_sema_call_cnt, 1);
+	__sync_fetch_and_add(&urandlib_read_without_sema_buf_sz_sum, buf_sz);
+
+	return 0;
+}
+
+int urandlib_read_with_sema_call_cnt;
+int urandlib_read_with_sema_buf_sz_sum;
+
+SEC("usdt/./liburandom_read.so:urandlib:read_with_sema")
+int BPF_USDT(urandlib_read_with_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+	if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&urandlib_read_with_sema_call_cnt, 1);
+	__sync_fetch_and_add(&urandlib_read_with_sema_buf_sz_sum, buf_sz);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c
new file mode 100644
index 000000000000..505aab9a5234
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_usdt.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+int my_pid;
+
+int usdt0_called;
+u64 usdt0_cookie;
+int usdt0_arg_cnt;
+int usdt0_arg_ret;
+
+SEC("usdt")
+int usdt0(struct pt_regs *ctx)
+{
+	long tmp;
+
+	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&usdt0_called, 1);
+
+	usdt0_cookie = bpf_usdt_cookie(ctx);
+	usdt0_arg_cnt = bpf_usdt_arg_cnt(ctx);
+	/* should return -ENOENT for any arg_num */
+	usdt0_arg_ret = bpf_usdt_arg(ctx, bpf_get_prandom_u32(), &tmp);
+	return 0;
+}
+
+int usdt3_called;
+u64 usdt3_cookie;
+int usdt3_arg_cnt;
+int usdt3_arg_rets[3];
+u64 usdt3_args[3];
+
+SEC("usdt//proc/self/exe:test:usdt3")
+int usdt3(struct pt_regs *ctx)
+{
+	long tmp;
+
+	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&usdt3_called, 1);
+
+	usdt3_cookie = bpf_usdt_cookie(ctx);
+	usdt3_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+	usdt3_arg_rets[0] = bpf_usdt_arg(ctx, 0, &tmp);
+	usdt3_args[0] = (int)tmp;
+
+	usdt3_arg_rets[1] = bpf_usdt_arg(ctx, 1, &tmp);
+	usdt3_args[1] = (long)tmp;
+
+	usdt3_arg_rets[2] = bpf_usdt_arg(ctx, 2, &tmp);
+	usdt3_args[2] = (uintptr_t)tmp;
+
+	return 0;
+}
+
+int usdt12_called;
+u64 usdt12_cookie;
+int usdt12_arg_cnt;
+u64 usdt12_args[12];
+
+SEC("usdt//proc/self/exe:test:usdt12")
+int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
+		     long a6, __u64 a7, uintptr_t a8, int a9, short a10,
+		     short a11, signed char a12)
+{
+	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&usdt12_called, 1);
+
+	usdt12_cookie = bpf_usdt_cookie(ctx);
+	usdt12_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+	usdt12_args[0] = a1;
+	usdt12_args[1] = a2;
+	usdt12_args[2] = a3;
+	usdt12_args[3] = a4;
+	usdt12_args[4] = a5;
+	usdt12_args[5] = a6;
+	usdt12_args[6] = a7;
+	usdt12_args[7] = a8;
+	usdt12_args[8] = a9;
+	usdt12_args[9] = a10;
+	usdt12_args[10] = a11;
+	usdt12_args[11] = a12;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_usdt_multispec.c b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
new file mode 100644
index 000000000000..aa6de32b50d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+/* this file is linked together with test_usdt.c to validate that usdt.bpf.h
+ * can be included in multiple .bpf.c files forming single final BPF object
+ * file
+ */
+
+extern int my_pid;
+
+int usdt_100_called;
+int usdt_100_sum;
+
+SEC("usdt//proc/self/exe:test:usdt_100")
+int BPF_USDT(usdt_100, int x)
+{
+	long tmp;
+
+	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&usdt_100_called, 1);
+	__sync_fetch_and_add(&usdt_100_sum, x);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 596c4e71bf3a..125d872d7981 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -564,22 +564,22 @@ static bool get_packet_dst(struct real_definition **real,
 	hash = get_packet_hash(pckt, hash_16bytes);
 	if (hash != 0x358459b7 /* jhash of ipv4 packet */  &&
 	    hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
-		return 0;
+		return false;
 	key = 2 * vip_info->vip_num + hash % 2;
 	real_pos = bpf_map_lookup_elem(&ch_rings, &key);
 	if (!real_pos)
-		return 0;
+		return false;
 	key = *real_pos;
 	*real = bpf_map_lookup_elem(&reals, &key);
 	if (!(*real))
-		return 0;
+		return false;
 	if (!(vip_info->flags & (1 << 1))) {
 		__u32 conn_rate_key = 512 + 2;
 		struct lb_stats *conn_rate_stats =
 		    bpf_map_lookup_elem(&stats, &conn_rate_key);
 
 		if (!conn_rate_stats)
-			return 1;
+			return true;
 		cur_time = bpf_ktime_get_ns();
 		if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
 			conn_rate_stats->v1 = 1;
@@ -587,14 +587,14 @@ static bool get_packet_dst(struct real_definition **real,
 		} else {
 			conn_rate_stats->v1 += 1;
 			if (conn_rate_stats->v1 >= 1)
-				return 1;
+				return true;
 		}
 		if (pckt->flow.proto == IPPROTO_UDP)
 			new_dst_lru.atime = cur_time;
 		new_dst_lru.pos = key;
 		bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
 	}
-	return 1;
+	return true;
 }
 
 __attribute__ ((noinline))
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 2ab049b54d6c..694e7cec1823 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -54,7 +54,7 @@ int bench_trigger_fmodret(void *ctx)
 	return -22;
 }
 
-SEC("uprobe/self/uprobe_target")
+SEC("uprobe")
 int bench_trigger_uprobe(void *ctx)
 {
 	__sync_add_and_fetch(&hits, 1);
diff --git a/tools/testing/selftests/bpf/sdt-config.h b/tools/testing/selftests/bpf/sdt-config.h
new file mode 100644
index 000000000000..733045a52771
--- /dev/null
+++ b/tools/testing/selftests/bpf/sdt-config.h
@@ -0,0 +1,6 @@
+/* includes/sys/sdt-config.h.  Generated from sdt-config.h.in by configure.
+
+   This file just defines _SDT_ASM_SECTION_AUTOGROUP_SUPPORT to 0 or 1 to
+   indicate whether the assembler supports "?" in .pushsection directives.  */
+
+#define _SDT_ASM_SECTION_AUTOGROUP_SUPPORT 1
diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h
new file mode 100644
index 000000000000..ca0162b4dc57
--- /dev/null
+++ b/tools/testing/selftests/bpf/sdt.h
@@ -0,0 +1,513 @@
+/* <sys/sdt.h> - Systemtap static probe definition macros.
+
+   This file is dedicated to the public domain, pursuant to CC0
+   (https://creativecommons.org/publicdomain/zero/1.0/)
+*/
+
+#ifndef _SYS_SDT_H
+#define _SYS_SDT_H    1
+
+/*
+  This file defines a family of macros
+
+       STAP_PROBEn(op1, ..., opn)
+
+  that emit a nop into the instruction stream, and some data into an auxiliary
+  note section.  The data in the note section describes the operands, in terms
+  of size and location.  Each location is encoded as assembler operand string.
+  Consumer tools such as gdb or systemtap insert breakpoints on top of
+  the nop, and decode the location operand-strings, like an assembler,
+  to find the values being passed.
+
+  The operand strings are selected by the compiler for each operand.
+  They are constrained by gcc inline-assembler codes.  The default is:
+
+  #define STAP_SDT_ARG_CONSTRAINT nor
+
+  This is a good default if the operands tend to be integral and
+  moderate in number (smaller than number of registers).  In other
+  cases, the compiler may report "'asm' requires impossible reload" or
+  similar.  In this case, consider simplifying the macro call (fewer
+  and simpler operands), reduce optimization, or override the default
+  constraints string via:
+
+  #define STAP_SDT_ARG_CONSTRAINT g
+  #include <sys/sdt.h>
+
+  See also:
+  https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+  https://gcc.gnu.org/onlinedocs/gcc/Constraints.html
+ */
+
+
+
+#ifdef __ASSEMBLER__
+# define _SDT_PROBE(provider, name, n, arglist)	\
+  _SDT_ASM_BODY(provider, name, _SDT_ASM_SUBSTR_1, (_SDT_DEPAREN_##n arglist)) \
+  _SDT_ASM_BASE
+# define _SDT_ASM_1(x)			x;
+# define _SDT_ASM_2(a, b)		a,b;
+# define _SDT_ASM_3(a, b, c)		a,b,c;
+# define _SDT_ASM_5(a, b, c, d, e)	a,b,c,d,e;
+# define _SDT_ASM_STRING_1(x)		.asciz #x;
+# define _SDT_ASM_SUBSTR_1(x)		.ascii #x;
+# define _SDT_DEPAREN_0()				/* empty */
+# define _SDT_DEPAREN_1(a)				a
+# define _SDT_DEPAREN_2(a,b)				a b
+# define _SDT_DEPAREN_3(a,b,c)				a b c
+# define _SDT_DEPAREN_4(a,b,c,d)			a b c d
+# define _SDT_DEPAREN_5(a,b,c,d,e)			a b c d e
+# define _SDT_DEPAREN_6(a,b,c,d,e,f)			a b c d e f
+# define _SDT_DEPAREN_7(a,b,c,d,e,f,g)			a b c d e f g
+# define _SDT_DEPAREN_8(a,b,c,d,e,f,g,h)		a b c d e f g h
+# define _SDT_DEPAREN_9(a,b,c,d,e,f,g,h,i)		a b c d e f g h i
+# define _SDT_DEPAREN_10(a,b,c,d,e,f,g,h,i,j)		a b c d e f g h i j
+# define _SDT_DEPAREN_11(a,b,c,d,e,f,g,h,i,j,k)		a b c d e f g h i j k
+# define _SDT_DEPAREN_12(a,b,c,d,e,f,g,h,i,j,k,l)	a b c d e f g h i j k l
+#else
+#if defined _SDT_HAS_SEMAPHORES
+#define _SDT_NOTE_SEMAPHORE_USE(provider, name) \
+  __asm__ __volatile__ ("" :: "m" (provider##_##name##_semaphore));
+#else
+#define _SDT_NOTE_SEMAPHORE_USE(provider, name)
+#endif
+
+# define _SDT_PROBE(provider, name, n, arglist) \
+  do {									    \
+    _SDT_NOTE_SEMAPHORE_USE(provider, name); \
+    __asm__ __volatile__ (_SDT_ASM_BODY(provider, name, _SDT_ASM_ARGS, (n)) \
+			  :: _SDT_ASM_OPERANDS_##n arglist);		    \
+    __asm__ __volatile__ (_SDT_ASM_BASE);				    \
+  } while (0)
+# define _SDT_S(x)			#x
+# define _SDT_ASM_1(x)			_SDT_S(x) "\n"
+# define _SDT_ASM_2(a, b)		_SDT_S(a) "," _SDT_S(b) "\n"
+# define _SDT_ASM_3(a, b, c)		_SDT_S(a) "," _SDT_S(b) "," \
+					_SDT_S(c) "\n"
+# define _SDT_ASM_5(a, b, c, d, e)	_SDT_S(a) "," _SDT_S(b) "," \
+					_SDT_S(c) "," _SDT_S(d) "," \
+					_SDT_S(e) "\n"
+# define _SDT_ASM_ARGS(n)		_SDT_ASM_TEMPLATE_##n
+# define _SDT_ASM_STRING_1(x)		_SDT_ASM_1(.asciz #x)
+# define _SDT_ASM_SUBSTR_1(x)		_SDT_ASM_1(.ascii #x)
+
+# define _SDT_ARGFMT(no)                _SDT_ASM_1(_SDT_SIGN %n[_SDT_S##no]) \
+                                        _SDT_ASM_1(_SDT_SIZE %n[_SDT_S##no]) \
+                                        _SDT_ASM_1(_SDT_TYPE %n[_SDT_S##no]) \
+                                        _SDT_ASM_SUBSTR(_SDT_ARGTMPL(_SDT_A##no))
+
+
+# ifndef STAP_SDT_ARG_CONSTRAINT
+# if defined __powerpc__
+# define STAP_SDT_ARG_CONSTRAINT        nZr
+# elif defined __arm__
+# define STAP_SDT_ARG_CONSTRAINT        g
+# else
+# define STAP_SDT_ARG_CONSTRAINT        nor
+# endif
+# endif
+
+# define _SDT_STRINGIFY(x)              #x
+# define _SDT_ARG_CONSTRAINT_STRING(x)  _SDT_STRINGIFY(x)
+/* _SDT_S encodes the size and type as 0xSSTT which is decoded by the assembler
+   macros _SDT_SIZE and _SDT_TYPE */
+# define _SDT_ARG(n, x)				    \
+  [_SDT_S##n] "n" ((_SDT_ARGSIGNED (x) ? (int)-1 : 1) * (-(((int) _SDT_ARGSIZE (x)) << 8) + (-(0x7f & __builtin_classify_type (x))))), \
+  [_SDT_A##n] _SDT_ARG_CONSTRAINT_STRING (STAP_SDT_ARG_CONSTRAINT) (_SDT_ARGVAL (x))
+#endif
+#define _SDT_ASM_STRING(x)		_SDT_ASM_STRING_1(x)
+#define _SDT_ASM_SUBSTR(x)		_SDT_ASM_SUBSTR_1(x)
+
+#define _SDT_ARGARRAY(x)	(__builtin_classify_type (x) == 14	\
+				 || __builtin_classify_type (x) == 5)
+
+#ifdef __cplusplus
+# define _SDT_ARGSIGNED(x)	(!_SDT_ARGARRAY (x) \
+				 && __sdt_type<__typeof (x)>::__sdt_signed)
+# define _SDT_ARGSIZE(x)	(_SDT_ARGARRAY (x) \
+				 ? sizeof (void *) : sizeof (x))
+# define _SDT_ARGVAL(x)		(x)
+
+# include <cstddef>
+
+template<typename __sdt_T>
+struct __sdt_type
+{
+  static const bool __sdt_signed = false;
+};
+  
+#define __SDT_ALWAYS_SIGNED(T) \
+template<> struct __sdt_type<T> { static const bool __sdt_signed = true; };
+#define __SDT_COND_SIGNED(T,CT)						\
+template<> struct __sdt_type<T> { static const bool __sdt_signed = ((CT)(-1) < 1); };
+__SDT_ALWAYS_SIGNED(signed char)
+__SDT_ALWAYS_SIGNED(short)
+__SDT_ALWAYS_SIGNED(int)
+__SDT_ALWAYS_SIGNED(long)
+__SDT_ALWAYS_SIGNED(long long)
+__SDT_ALWAYS_SIGNED(volatile signed char)
+__SDT_ALWAYS_SIGNED(volatile short)
+__SDT_ALWAYS_SIGNED(volatile int)
+__SDT_ALWAYS_SIGNED(volatile long)
+__SDT_ALWAYS_SIGNED(volatile long long)
+__SDT_ALWAYS_SIGNED(const signed char)
+__SDT_ALWAYS_SIGNED(const short)
+__SDT_ALWAYS_SIGNED(const int)
+__SDT_ALWAYS_SIGNED(const long)
+__SDT_ALWAYS_SIGNED(const long long)
+__SDT_ALWAYS_SIGNED(const volatile signed char)
+__SDT_ALWAYS_SIGNED(const volatile short)
+__SDT_ALWAYS_SIGNED(const volatile int)
+__SDT_ALWAYS_SIGNED(const volatile long)
+__SDT_ALWAYS_SIGNED(const volatile long long)
+__SDT_COND_SIGNED(char, char)
+__SDT_COND_SIGNED(wchar_t, wchar_t)
+__SDT_COND_SIGNED(volatile char, char)
+__SDT_COND_SIGNED(volatile wchar_t, wchar_t)
+__SDT_COND_SIGNED(const char, char)
+__SDT_COND_SIGNED(const wchar_t, wchar_t)
+__SDT_COND_SIGNED(const volatile char, char)
+__SDT_COND_SIGNED(const volatile wchar_t, wchar_t)
+#if defined (__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
+/* __SDT_COND_SIGNED(char16_t) */
+/* __SDT_COND_SIGNED(char32_t) */
+#endif
+
+template<typename __sdt_E>
+struct __sdt_type<__sdt_E[]> : public __sdt_type<__sdt_E *> {};
+
+template<typename __sdt_E, size_t __sdt_N>
+struct __sdt_type<__sdt_E[__sdt_N]> : public __sdt_type<__sdt_E *> {};
+
+#elif !defined(__ASSEMBLER__)
+__extension__ extern unsigned long long __sdt_unsp;
+# define _SDT_ARGINTTYPE(x)						\
+  __typeof (__builtin_choose_expr (((__builtin_classify_type (x)	\
+				     + 3) & -4) == 4, (x), 0U))
+# define _SDT_ARGSIGNED(x)						\
+  (!__extension__							\
+   (__builtin_constant_p ((((unsigned long long)			\
+			    (_SDT_ARGINTTYPE (x)) __sdt_unsp)		\
+			   & ((unsigned long long)1 << (sizeof (unsigned long long)	\
+				       * __CHAR_BIT__ - 1))) == 0)	\
+    || (_SDT_ARGINTTYPE (x)) -1 > (_SDT_ARGINTTYPE (x)) 0))
+# define _SDT_ARGSIZE(x)	\
+  (_SDT_ARGARRAY (x) ? sizeof (void *) : sizeof (x))
+# define _SDT_ARGVAL(x)		(x)
+#endif
+
+#if defined __powerpc__ || defined __powerpc64__
+# define _SDT_ARGTMPL(id)	%I[id]%[id]
+#elif defined __i386__
+# define _SDT_ARGTMPL(id)	%k[id]  /* gcc.gnu.org/PR80115 sourceware.org/PR24541 */
+#else
+# define _SDT_ARGTMPL(id)	%[id]
+#endif
+
+/* NB: gdb PR24541 highlighted an unspecified corner of the sdt.h
+   operand note format.
+
+   The named register may be a longer or shorter (!) alias for the
+   storage where the value in question is found.  For example, on
+   i386, 64-bit value may be put in register pairs, and the register
+   name stored would identify just one of them.  Previously, gcc was
+   asked to emit the %w[id] (16-bit alias of some registers holding
+   operands), even when a wider 32-bit value was used.
+
+   Bottom line: the byte-width given before the @ sign governs.  If
+   there is a mismatch between that width and that of the named
+   register, then a sys/sdt.h note consumer may need to employ
+   architecture-specific heuristics to figure out where the compiler
+   has actually put the complete value.
+*/
+
+#ifdef __LP64__
+# define _SDT_ASM_ADDR	.8byte
+#else
+# define _SDT_ASM_ADDR	.4byte
+#endif
+
+/* The ia64 and s390 nop instructions take an argument. */
+#if defined(__ia64__) || defined(__s390__) || defined(__s390x__)
+#define _SDT_NOP	nop 0
+#else
+#define _SDT_NOP	nop
+#endif
+
+#define _SDT_NOTE_NAME	"stapsdt"
+#define _SDT_NOTE_TYPE	3
+
+/* If the assembler supports the necessary feature, then we can play
+   nice with code in COMDAT sections, which comes up in C++ code.
+   Without that assembler support, some combinations of probe placements
+   in certain kinds of C++ code may produce link-time errors.  */
+#include "sdt-config.h"
+#if _SDT_ASM_SECTION_AUTOGROUP_SUPPORT
+# define _SDT_ASM_AUTOGROUP "?"
+#else
+# define _SDT_ASM_AUTOGROUP ""
+#endif
+
+#define _SDT_DEF_MACROS							     \
+	_SDT_ASM_1(.altmacro)						     \
+	_SDT_ASM_1(.macro _SDT_SIGN x)				     	     \
+	_SDT_ASM_3(.pushsection .note.stapsdt,"","note")		     \
+	_SDT_ASM_1(.iflt \\x)						     \
+	_SDT_ASM_1(.ascii "-")						     \
+	_SDT_ASM_1(.endif)						     \
+	_SDT_ASM_1(.popsection)						     \
+	_SDT_ASM_1(.endm)						     \
+	_SDT_ASM_1(.macro _SDT_SIZE_ x)					     \
+	_SDT_ASM_3(.pushsection .note.stapsdt,"","note")		     \
+	_SDT_ASM_1(.ascii "\x")						     \
+	_SDT_ASM_1(.popsection)						     \
+	_SDT_ASM_1(.endm)						     \
+	_SDT_ASM_1(.macro _SDT_SIZE x)					     \
+	_SDT_ASM_1(_SDT_SIZE_ %%((-(-\\x*((-\\x>0)-(-\\x<0))))>>8))	     \
+	_SDT_ASM_1(.endm)						     \
+	_SDT_ASM_1(.macro _SDT_TYPE_ x)				             \
+	_SDT_ASM_3(.pushsection .note.stapsdt,"","note")		     \
+	_SDT_ASM_2(.ifc 8,\\x)					     	     \
+	_SDT_ASM_1(.ascii "f")						     \
+	_SDT_ASM_1(.endif)						     \
+	_SDT_ASM_1(.ascii "@")						     \
+	_SDT_ASM_1(.popsection)						     \
+	_SDT_ASM_1(.endm)						     \
+	_SDT_ASM_1(.macro _SDT_TYPE x)				     	     \
+	_SDT_ASM_1(_SDT_TYPE_ %%((\\x)&(0xff)))			     \
+	_SDT_ASM_1(.endm)
+
+#define _SDT_UNDEF_MACROS						      \
+  _SDT_ASM_1(.purgem _SDT_SIGN)						      \
+  _SDT_ASM_1(.purgem _SDT_SIZE_)					      \
+  _SDT_ASM_1(.purgem _SDT_SIZE)						      \
+  _SDT_ASM_1(.purgem _SDT_TYPE_)					      \
+  _SDT_ASM_1(.purgem _SDT_TYPE)
+
+#define _SDT_ASM_BODY(provider, name, pack_args, args, ...)		      \
+  _SDT_DEF_MACROS							      \
+  _SDT_ASM_1(990:	_SDT_NOP)					      \
+  _SDT_ASM_3(		.pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \
+  _SDT_ASM_1(		.balign 4)					      \
+  _SDT_ASM_3(		.4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE)	      \
+  _SDT_ASM_1(991:	.asciz _SDT_NOTE_NAME)				      \
+  _SDT_ASM_1(992:	.balign 4)					      \
+  _SDT_ASM_1(993:	_SDT_ASM_ADDR 990b)				      \
+  _SDT_ASM_1(		_SDT_ASM_ADDR _.stapsdt.base)			      \
+  _SDT_SEMAPHORE(provider,name)						      \
+  _SDT_ASM_STRING(provider)						      \
+  _SDT_ASM_STRING(name)							      \
+  pack_args args							      \
+  _SDT_ASM_SUBSTR(\x00)							      \
+  _SDT_UNDEF_MACROS							      \
+  _SDT_ASM_1(994:	.balign 4)					      \
+  _SDT_ASM_1(		.popsection)
+
+#define _SDT_ASM_BASE							      \
+  _SDT_ASM_1(.ifndef _.stapsdt.base)					      \
+  _SDT_ASM_5(		.pushsection .stapsdt.base,"aG","progbits",	      \
+							.stapsdt.base,comdat) \
+  _SDT_ASM_1(		.weak _.stapsdt.base)				      \
+  _SDT_ASM_1(		.hidden _.stapsdt.base)				      \
+  _SDT_ASM_1(	_.stapsdt.base: .space 1)				      \
+  _SDT_ASM_2(		.size _.stapsdt.base, 1)			      \
+  _SDT_ASM_1(		.popsection)					      \
+  _SDT_ASM_1(.endif)
+
+#if defined _SDT_HAS_SEMAPHORES
+#define _SDT_SEMAPHORE(p,n) \
+	_SDT_ASM_1(		_SDT_ASM_ADDR p##_##n##_semaphore)
+#else
+#define _SDT_SEMAPHORE(p,n) _SDT_ASM_1(		_SDT_ASM_ADDR 0)
+#endif
+
+#define _SDT_ASM_BLANK _SDT_ASM_SUBSTR(\x20)
+#define _SDT_ASM_TEMPLATE_0		/* no arguments */
+#define _SDT_ASM_TEMPLATE_1		_SDT_ARGFMT(1)
+#define _SDT_ASM_TEMPLATE_2		_SDT_ASM_TEMPLATE_1 _SDT_ASM_BLANK _SDT_ARGFMT(2)
+#define _SDT_ASM_TEMPLATE_3		_SDT_ASM_TEMPLATE_2 _SDT_ASM_BLANK _SDT_ARGFMT(3)
+#define _SDT_ASM_TEMPLATE_4		_SDT_ASM_TEMPLATE_3 _SDT_ASM_BLANK _SDT_ARGFMT(4)
+#define _SDT_ASM_TEMPLATE_5		_SDT_ASM_TEMPLATE_4 _SDT_ASM_BLANK _SDT_ARGFMT(5)
+#define _SDT_ASM_TEMPLATE_6		_SDT_ASM_TEMPLATE_5 _SDT_ASM_BLANK _SDT_ARGFMT(6)
+#define _SDT_ASM_TEMPLATE_7		_SDT_ASM_TEMPLATE_6 _SDT_ASM_BLANK _SDT_ARGFMT(7)
+#define _SDT_ASM_TEMPLATE_8		_SDT_ASM_TEMPLATE_7 _SDT_ASM_BLANK _SDT_ARGFMT(8)
+#define _SDT_ASM_TEMPLATE_9		_SDT_ASM_TEMPLATE_8 _SDT_ASM_BLANK _SDT_ARGFMT(9)
+#define _SDT_ASM_TEMPLATE_10		_SDT_ASM_TEMPLATE_9 _SDT_ASM_BLANK _SDT_ARGFMT(10)
+#define _SDT_ASM_TEMPLATE_11		_SDT_ASM_TEMPLATE_10 _SDT_ASM_BLANK _SDT_ARGFMT(11)
+#define _SDT_ASM_TEMPLATE_12		_SDT_ASM_TEMPLATE_11 _SDT_ASM_BLANK _SDT_ARGFMT(12)
+#define _SDT_ASM_OPERANDS_0()		[__sdt_dummy] "g" (0)
+#define _SDT_ASM_OPERANDS_1(arg1)	_SDT_ARG(1, arg1)
+#define _SDT_ASM_OPERANDS_2(arg1, arg2) \
+  _SDT_ASM_OPERANDS_1(arg1), _SDT_ARG(2, arg2)
+#define _SDT_ASM_OPERANDS_3(arg1, arg2, arg3) \
+  _SDT_ASM_OPERANDS_2(arg1, arg2), _SDT_ARG(3, arg3)
+#define _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4) \
+  _SDT_ASM_OPERANDS_3(arg1, arg2, arg3), _SDT_ARG(4, arg4)
+#define _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5) \
+  _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4), _SDT_ARG(5, arg5)
+#define _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6) \
+  _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5), _SDT_ARG(6, arg6)
+#define _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
+  _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6), _SDT_ARG(7, arg7)
+#define _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \
+  _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7), \
+    _SDT_ARG(8, arg8)
+#define _SDT_ASM_OPERANDS_9(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9) \
+  _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8), \
+    _SDT_ARG(9, arg9)
+#define _SDT_ASM_OPERANDS_10(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \
+  _SDT_ASM_OPERANDS_9(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9), \
+    _SDT_ARG(10, arg10)
+#define _SDT_ASM_OPERANDS_11(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \
+  _SDT_ASM_OPERANDS_10(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10), \
+    _SDT_ARG(11, arg11)
+#define _SDT_ASM_OPERANDS_12(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \
+  _SDT_ASM_OPERANDS_11(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11), \
+    _SDT_ARG(12, arg12)
+
+/* These macros can be used in C, C++, or assembly code.
+   In assembly code the arguments should use normal assembly operand syntax.  */
+
+#define STAP_PROBE(provider, name) \
+  _SDT_PROBE(provider, name, 0, ())
+#define STAP_PROBE1(provider, name, arg1) \
+  _SDT_PROBE(provider, name, 1, (arg1))
+#define STAP_PROBE2(provider, name, arg1, arg2) \
+  _SDT_PROBE(provider, name, 2, (arg1, arg2))
+#define STAP_PROBE3(provider, name, arg1, arg2, arg3) \
+  _SDT_PROBE(provider, name, 3, (arg1, arg2, arg3))
+#define STAP_PROBE4(provider, name, arg1, arg2, arg3, arg4) \
+  _SDT_PROBE(provider, name, 4, (arg1, arg2, arg3, arg4))
+#define STAP_PROBE5(provider, name, arg1, arg2, arg3, arg4, arg5) \
+  _SDT_PROBE(provider, name, 5, (arg1, arg2, arg3, arg4, arg5))
+#define STAP_PROBE6(provider, name, arg1, arg2, arg3, arg4, arg5, arg6)	\
+  _SDT_PROBE(provider, name, 6, (arg1, arg2, arg3, arg4, arg5, arg6))
+#define STAP_PROBE7(provider, name, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
+  _SDT_PROBE(provider, name, 7, (arg1, arg2, arg3, arg4, arg5, arg6, arg7))
+#define STAP_PROBE8(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8) \
+  _SDT_PROBE(provider, name, 8, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8))
+#define STAP_PROBE9(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9)\
+  _SDT_PROBE(provider, name, 9, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9))
+#define STAP_PROBE10(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \
+  _SDT_PROBE(provider, name, 10, \
+	     (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10))
+#define STAP_PROBE11(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \
+  _SDT_PROBE(provider, name, 11, \
+	     (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11))
+#define STAP_PROBE12(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \
+  _SDT_PROBE(provider, name, 12, \
+	     (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12))
+
+/* This STAP_PROBEV macro can be used in variadic scenarios, where the
+   number of probe arguments is not known until compile time.  Since
+   variadic macro support may vary with compiler options, you must
+   pre-#define SDT_USE_VARIADIC to enable this type of probe.
+
+   The trick to count __VA_ARGS__ was inspired by this post by
+   Laurent Deniau <laurent.deniau@cern.ch>:
+       http://groups.google.com/group/comp.std.c/msg/346fc464319b1ee5
+
+   Note that our _SDT_NARG is called with an extra 0 arg that's not
+   counted, so we don't have to worry about the behavior of macros
+   called without any arguments.  */
+
+#define _SDT_NARG(...) __SDT_NARG(__VA_ARGS__, 12,11,10,9,8,7,6,5,4,3,2,1,0)
+#define __SDT_NARG(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12, N, ...) N
+#ifdef SDT_USE_VARIADIC
+#define _SDT_PROBE_N(provider, name, N, ...) \
+  _SDT_PROBE(provider, name, N, (__VA_ARGS__))
+#define STAP_PROBEV(provider, name, ...) \
+  _SDT_PROBE_N(provider, name, _SDT_NARG(0, ##__VA_ARGS__), ##__VA_ARGS__)
+#endif
+
+/* These macros are for use in asm statements.  You must compile
+   with -std=gnu99 or -std=c99 to use the STAP_PROBE_ASM macro.
+
+   The STAP_PROBE_ASM macro generates a quoted string to be used in the
+   template portion of the asm statement, concatenated with strings that
+   contain the actual assembly code around the probe site.
+
+   For example:
+
+	asm ("before\n"
+	     STAP_PROBE_ASM(provider, fooprobe, %eax 4(%esi))
+	     "after");
+
+   emits the assembly code for "before\nafter", with a probe in between.
+   The probe arguments are the %eax register, and the value of the memory
+   word located 4 bytes past the address in the %esi register.  Note that
+   because this is a simple asm, not a GNU C extended asm statement, these
+   % characters do not need to be doubled to generate literal %reg names.
+
+   In a GNU C extended asm statement, the probe arguments can be specified
+   using the macro STAP_PROBE_ASM_TEMPLATE(n) for n arguments.  The paired
+   macro STAP_PROBE_ASM_OPERANDS gives the C values of these probe arguments,
+   and appears in the input operand list of the asm statement.  For example:
+
+	asm ("someinsn %0,%1\n" // %0 is output operand, %1 is input operand
+	     STAP_PROBE_ASM(provider, fooprobe, STAP_PROBE_ASM_TEMPLATE(3))
+	     "otherinsn %[namedarg]"
+	     : "r" (outvar)
+	     : "g" (some_value), [namedarg] "i" (1234),
+	       STAP_PROBE_ASM_OPERANDS(3, some_value, some_ptr->field, 1234));
+
+    This is just like writing:
+
+	STAP_PROBE3(provider, fooprobe, some_value, some_ptr->field, 1234));
+
+    but the probe site is right between "someinsn" and "otherinsn".
+
+    The probe arguments in STAP_PROBE_ASM can be given as assembly
+    operands instead, even inside a GNU C extended asm statement.
+    Note that these can use operand templates like %0 or %[name],
+    and likewise they must write %%reg for a literal operand of %reg.  */
+
+#define _SDT_ASM_BODY_1(p,n,...) _SDT_ASM_BODY(p,n,_SDT_ASM_SUBSTR,(__VA_ARGS__))
+#define _SDT_ASM_BODY_2(p,n,...) _SDT_ASM_BODY(p,n,/*_SDT_ASM_STRING */,__VA_ARGS__)
+#define _SDT_ASM_BODY_N2(p,n,no,...) _SDT_ASM_BODY_ ## no(p,n,__VA_ARGS__)
+#define _SDT_ASM_BODY_N1(p,n,no,...) _SDT_ASM_BODY_N2(p,n,no,__VA_ARGS__)
+#define _SDT_ASM_BODY_N(p,n,...) _SDT_ASM_BODY_N1(p,n,_SDT_NARG(0, __VA_ARGS__),__VA_ARGS__)
+
+#if __STDC_VERSION__ >= 199901L
+# define STAP_PROBE_ASM(provider, name, ...)		\
+  _SDT_ASM_BODY_N(provider, name, __VA_ARGS__)					\
+  _SDT_ASM_BASE
+# define STAP_PROBE_ASM_OPERANDS(n, ...) _SDT_ASM_OPERANDS_##n(__VA_ARGS__)
+#else
+# define STAP_PROBE_ASM(provider, name, args)	\
+  _SDT_ASM_BODY(provider, name, /* _SDT_ASM_STRING */, (args))	\
+  _SDT_ASM_BASE
+#endif
+#define STAP_PROBE_ASM_TEMPLATE(n) _SDT_ASM_TEMPLATE_##n,"use _SDT_ASM_TEMPLATE_"
+
+
+/* DTrace compatible macro names.  */
+#define DTRACE_PROBE(provider,probe)		\
+  STAP_PROBE(provider,probe)
+#define DTRACE_PROBE1(provider,probe,parm1)	\
+  STAP_PROBE1(provider,probe,parm1)
+#define DTRACE_PROBE2(provider,probe,parm1,parm2)	\
+  STAP_PROBE2(provider,probe,parm1,parm2)
+#define DTRACE_PROBE3(provider,probe,parm1,parm2,parm3) \
+  STAP_PROBE3(provider,probe,parm1,parm2,parm3)
+#define DTRACE_PROBE4(provider,probe,parm1,parm2,parm3,parm4)	\
+  STAP_PROBE4(provider,probe,parm1,parm2,parm3,parm4)
+#define DTRACE_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5)	\
+  STAP_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5)
+#define DTRACE_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6) \
+  STAP_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6)
+#define DTRACE_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7) \
+  STAP_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7)
+#define DTRACE_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8) \
+  STAP_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8)
+#define DTRACE_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9) \
+  STAP_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9)
+#define DTRACE_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10) \
+  STAP_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10)
+#define DTRACE_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11) \
+  STAP_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11)
+#define DTRACE_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12) \
+  STAP_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12)
+
+
+#endif /* sys/sdt.h */
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
index 6bf21e47882a..c0e7acd698ed 100755
--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -180,7 +180,7 @@ class FileExtractor(object):
         @enum_name: name of the enum to parse
         """
         start_marker = re.compile(f'enum {enum_name} {{\n')
-        pattern = re.compile('^\s*(BPF_\w+),?$')
+        pattern = re.compile('^\s*(BPF_\w+),?(\s+/\*.*\*/)?$')
         end_marker = re.compile('^};')
         parser = BlockParser(self.reader)
         parser.search_block(start_marker)
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
index d6a1be4d8020..0861ea60dcdd 100644
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -6,7 +6,7 @@
 #include <stdlib.h>
 #include <sys/sysinfo.h>
 
-#include "bpf_rlimit.h"
+#include "bpf_util.h"
 #include "cgroup_helpers.h"
 #include "testing_helpers.h"
 
@@ -44,13 +44,16 @@ int main(int argc, char **argv)
 	unsigned long long *percpu_value;
 	int cpu, nproc;
 
-	nproc = get_nprocs_conf();
+	nproc = bpf_num_possible_cpus();
 	percpu_value = malloc(sizeof(*percpu_value) * nproc);
 	if (!percpu_value) {
 		printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
 		goto err;
 	}
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	map_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL, sizeof(key),
 				sizeof(value), 0, NULL);
 	if (map_fd < 0) {
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index c299d3452695..7886265846a0 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -15,7 +15,6 @@
 
 #include "cgroup_helpers.h"
 #include "testing_helpers.h"
-#include "bpf_rlimit.h"
 
 #define DEV_CGROUP_PROG "./dev_cgroup.o"
 
@@ -28,6 +27,9 @@ int main(int argc, char **argv)
 	int prog_fd, cgroup_fd;
 	__u32 prog_cnt;
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	if (bpf_prog_test_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
 			  &obj, &prog_fd)) {
 		printf("Failed to load DEV_CGROUP program\n");
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index aa294612e0a7..c028d621c744 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -26,7 +26,6 @@
 #include <bpf/bpf.h>
 
 #include "bpf_util.h"
-#include "bpf_rlimit.h"
 
 struct tlpm_node {
 	struct tlpm_node *next;
@@ -409,16 +408,13 @@ static void test_lpm_ipaddr(void)
 
 	/* Test some lookups that should not match any entry */
 	inet_pton(AF_INET, "10.0.0.1", key_ipv4->data);
-	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT);
 
 	inet_pton(AF_INET, "11.11.11.11", key_ipv4->data);
-	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT);
 
 	inet_pton(AF_INET6, "2a00:ffff::", key_ipv6->data);
-	assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -ENOENT);
 
 	close(map_fd_ipv4);
 	close(map_fd_ipv6);
@@ -475,18 +471,15 @@ static void test_lpm_delete(void)
 	/* remove non-existent node */
 	key->prefixlen = 32;
 	inet_pton(AF_INET, "10.0.0.1", key->data);
-	assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
-		errno == ENOENT);
+	assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT);
 
 	key->prefixlen = 30; // unused prefix so far
 	inet_pton(AF_INET, "192.255.0.0", key->data);
-	assert(bpf_map_delete_elem(map_fd, key) == -1 &&
-		errno == ENOENT);
+	assert(bpf_map_delete_elem(map_fd, key) == -ENOENT);
 
 	key->prefixlen = 16; // same prefix as the root node
 	inet_pton(AF_INET, "192.255.0.0", key->data);
-	assert(bpf_map_delete_elem(map_fd, key) == -1 &&
-		errno == ENOENT);
+	assert(bpf_map_delete_elem(map_fd, key) == -ENOENT);
 
 	/* assert initial lookup */
 	key->prefixlen = 32;
@@ -531,8 +524,7 @@ static void test_lpm_delete(void)
 
 	key->prefixlen = 32;
 	inet_pton(AF_INET, "192.168.128.1", key->data);
-	assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
-		errno == ENOENT);
+	assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT);
 
 	close(map_fd);
 }
@@ -553,8 +545,7 @@ static void test_lpm_get_next_key(void)
 	assert(map_fd >= 0);
 
 	/* empty tree. get_next_key should return ENOENT */
-	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -ENOENT);
 
 	/* get and verify the first key, get the second one should fail. */
 	key_p->prefixlen = 16;
@@ -566,8 +557,7 @@ static void test_lpm_get_next_key(void)
 	assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
 	       key_p->data[1] == 168);
 
-	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
 
 	/* no exact matching key should get the first one in post order. */
 	key_p->prefixlen = 8;
@@ -591,8 +581,7 @@ static void test_lpm_get_next_key(void)
 	       next_key_p->data[1] == 168);
 
 	memcpy(key_p, next_key_p, key_size);
-	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
 
 	/* Add one more element (total three) */
 	key_p->prefixlen = 24;
@@ -615,8 +604,7 @@ static void test_lpm_get_next_key(void)
 	       next_key_p->data[1] == 168);
 
 	memcpy(key_p, next_key_p, key_size);
-	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
 
 	/* Add one more element (total four) */
 	key_p->prefixlen = 24;
@@ -644,8 +632,7 @@ static void test_lpm_get_next_key(void)
 	       next_key_p->data[1] == 168);
 
 	memcpy(key_p, next_key_p, key_size);
-	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
 
 	/* Add one more element (total five) */
 	key_p->prefixlen = 28;
@@ -679,8 +666,7 @@ static void test_lpm_get_next_key(void)
 	       next_key_p->data[1] == 168);
 
 	memcpy(key_p, next_key_p, key_size);
-	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
 
 	/* no exact matching key should return the first one in post order */
 	key_p->prefixlen = 22;
@@ -791,6 +777,9 @@ int main(void)
 	/* we want predictable, pseudo random tests */
 	srand(0xf00ba1);
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	test_lpm_basic();
 	test_lpm_order();
 
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 563bbe18c172..4d0650cfb5cd 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -18,7 +18,6 @@
 #include <bpf/libbpf.h>
 
 #include "bpf_util.h"
-#include "bpf_rlimit.h"
 #include "../../../include/linux/filter.h"
 
 #define LOCAL_FREE_TARGET	(128)
@@ -176,24 +175,20 @@ static void test_lru_sanity0(int map_type, int map_flags)
 				    BPF_NOEXIST));
 
 	/* BPF_NOEXIST means: add new element if it doesn't exist */
-	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
-	       /* key=1 already exists */
-	       && errno == EEXIST);
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST);
+	/* key=1 already exists */
 
-	assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -1 &&
-	       errno == EINVAL);
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -EINVAL);
 
 	/* insert key=2 element */
 
 	/* check that key=2 is not found */
 	key = 2;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	/* BPF_EXIST means: update existing element */
-	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
-	       /* key=2 is not there */
-	       errno == ENOENT);
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT);
+	/* key=2 is not there */
 
 	assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
 
@@ -201,8 +196,7 @@ static void test_lru_sanity0(int map_type, int map_flags)
 
 	/* check that key=3 is not found */
 	key = 3;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	/* check that key=1 can be found and mark the ref bit to
 	 * stop LRU from removing key=1
@@ -218,8 +212,7 @@ static void test_lru_sanity0(int map_type, int map_flags)
 
 	/* key=2 has been removed from the LRU */
 	key = 2;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	/* lookup elem key=1 and delete it, then check it doesn't exist */
 	key = 1;
@@ -382,8 +375,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	end_key = 1 + batch_size;
 	value[0] = 4321;
 	for (key = 1; key < end_key; key++) {
-		assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-		       errno == ENOENT);
+		assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
 		assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value));
@@ -563,8 +555,7 @@ static void do_test_lru_sanity5(unsigned long long last_key, int map_fd)
 	assert(!bpf_map_lookup_elem_with_ref_bit(map_fd, key, value));
 
 	/* Cannot find the last key because it was removed by LRU */
-	assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -ENOENT);
 }
 
 /* Test map with only one element */
@@ -712,21 +703,18 @@ static void test_lru_sanity7(int map_type, int map_flags)
 				    BPF_NOEXIST));
 
 	/* BPF_NOEXIST means: add new element if it doesn't exist */
-	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
-	       /* key=1 already exists */
-	       && errno == EEXIST);
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST);
+	/* key=1 already exists */
 
 	/* insert key=2 element */
 
 	/* check that key=2 is not found */
 	key = 2;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	/* BPF_EXIST means: update existing element */
-	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
-	       /* key=2 is not there */
-	       errno == ENOENT);
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT);
+	/* key=2 is not there */
 
 	assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
 
@@ -734,8 +722,7 @@ static void test_lru_sanity7(int map_type, int map_flags)
 
 	/* check that key=3 is not found */
 	key = 3;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	/* check that key=1 can be found and mark the ref bit to
 	 * stop LRU from removing key=1
@@ -758,8 +745,7 @@ static void test_lru_sanity7(int map_type, int map_flags)
 
 	/* key=2 has been removed from the LRU */
 	key = 2;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
 
@@ -806,21 +792,18 @@ static void test_lru_sanity8(int map_type, int map_flags)
 	assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
 
 	/* BPF_NOEXIST means: add new element if it doesn't exist */
-	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
-	       /* key=1 already exists */
-	       && errno == EEXIST);
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST);
+	/* key=1 already exists */
 
 	/* insert key=2 element */
 
 	/* check that key=2 is not found */
 	key = 2;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	/* BPF_EXIST means: update existing element */
-	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
-	       /* key=2 is not there */
-	       errno == ENOENT);
+	assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT);
+	/* key=2 is not there */
 
 	assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
 	assert(!bpf_map_update_elem(expected_map_fd, &key, value,
@@ -830,8 +813,7 @@ static void test_lru_sanity8(int map_type, int map_flags)
 
 	/* check that key=3 is not found */
 	key = 3;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	/* check that key=1 can be found and do _not_ mark ref bit.
 	 * this will be evicted on next update.
@@ -854,8 +836,7 @@ static void test_lru_sanity8(int map_type, int map_flags)
 
 	/* key=1 has been removed from the LRU */
 	key = 1;
-	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
-	       errno == ENOENT);
+	assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
 
 	assert(map_equal(lru_map_fd, expected_map_fd));
 
@@ -878,6 +859,9 @@ int main(int argc, char **argv)
 	assert(nr_cpus != -1);
 	printf("nr_cpus:%d\n\n", nr_cpus);
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	for (f = 0; f < ARRAY_SIZE(map_flags); f++) {
 		unsigned int tgt_free = (map_flags[f] & BPF_F_NO_COMMON_LRU) ?
 			PERCPU_FREE_TARGET : LOCAL_FREE_TARGET;
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index edaffd43da83..6cd6ef9fc20b 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -184,7 +184,7 @@ def bpftool_prog_list(expected=None, ns=""):
 def bpftool_map_list(expected=None, ns=""):
     _, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
     # Remove the base maps
-    maps = [m for m in maps if m not in base_maps and m.get('name') not in base_map_names]
+    maps = [m for m in maps if m not in base_maps and m.get('name') and m.get('name') not in base_map_names]
     if expected is not None:
         if len(maps) != expected:
             fail(True, "%d BPF maps loaded, expected %d" %
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 2ecb73a65206..c639f2e56fc5 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -3,6 +3,7 @@
  */
 #define _GNU_SOURCE
 #include "test_progs.h"
+#include "testing_helpers.h"
 #include "cgroup_helpers.h"
 #include <argp.h>
 #include <pthread.h>
@@ -17,6 +18,93 @@
 #include <sys/socket.h>
 #include <sys/un.h>
 
+static bool verbose(void)
+{
+	return env.verbosity > VERBOSE_NONE;
+}
+
+static void stdio_hijack_init(char **log_buf, size_t *log_cnt)
+{
+#ifdef __GLIBC__
+	if (verbose() && env.worker_id == -1) {
+		/* nothing to do, output to stdout by default */
+		return;
+	}
+
+	fflush(stdout);
+	fflush(stderr);
+
+	stdout = open_memstream(log_buf, log_cnt);
+	if (!stdout) {
+		stdout = env.stdout;
+		perror("open_memstream");
+		return;
+	}
+
+	if (env.subtest_state)
+		env.subtest_state->stdout = stdout;
+	else
+		env.test_state->stdout = stdout;
+
+	stderr = stdout;
+#endif
+}
+
+static void stdio_hijack(char **log_buf, size_t *log_cnt)
+{
+#ifdef __GLIBC__
+	if (verbose() && env.worker_id == -1) {
+		/* nothing to do, output to stdout by default */
+		return;
+	}
+
+	env.stdout = stdout;
+	env.stderr = stderr;
+
+	stdio_hijack_init(log_buf, log_cnt);
+#endif
+}
+
+static void stdio_restore_cleanup(void)
+{
+#ifdef __GLIBC__
+	if (verbose() && env.worker_id == -1) {
+		/* nothing to do, output to stdout by default */
+		return;
+	}
+
+	fflush(stdout);
+
+	if (env.subtest_state) {
+		fclose(env.subtest_state->stdout);
+		env.subtest_state->stdout = NULL;
+		stdout = env.test_state->stdout;
+		stderr = env.test_state->stdout;
+	} else {
+		fclose(env.test_state->stdout);
+		env.test_state->stdout = NULL;
+	}
+#endif
+}
+
+static void stdio_restore(void)
+{
+#ifdef __GLIBC__
+	if (verbose() && env.worker_id == -1) {
+		/* nothing to do, output to stdout by default */
+		return;
+	}
+
+	if (stdout == env.stdout)
+		return;
+
+	stdio_restore_cleanup();
+
+	stdout = env.stdout;
+	stderr = env.stderr;
+#endif
+}
+
 /* Adapted from perf/util/string.c */
 static bool glob_match(const char *str, const char *pat)
 {
@@ -50,19 +138,8 @@ struct prog_test_def {
 	int test_num;
 	void (*run_test)(void);
 	void (*run_serial_test)(void);
-	bool force_log;
-	int error_cnt;
-	int skip_cnt;
-	int sub_succ_cnt;
 	bool should_run;
-	bool tested;
 	bool need_cgroup_cleanup;
-
-	char *subtest_name;
-	int subtest_num;
-
-	/* store counts before subtest started */
-	int old_error_cnt;
 };
 
 /* Override C runtime library's usleep() implementation to ensure nanosleep()
@@ -84,12 +161,13 @@ static bool should_run(struct test_selector *sel, int num, const char *name)
 	int i;
 
 	for (i = 0; i < sel->blacklist.cnt; i++) {
-		if (glob_match(name, sel->blacklist.strs[i]))
+		if (glob_match(name, sel->blacklist.tests[i].name) &&
+		    !sel->blacklist.tests[i].subtest_cnt)
 			return false;
 	}
 
 	for (i = 0; i < sel->whitelist.cnt; i++) {
-		if (glob_match(name, sel->whitelist.strs[i]))
+		if (glob_match(name, sel->whitelist.tests[i].name))
 			return true;
 	}
 
@@ -99,33 +177,138 @@ static bool should_run(struct test_selector *sel, int num, const char *name)
 	return num < sel->num_set_len && sel->num_set[num];
 }
 
-static void dump_test_log(const struct prog_test_def *test, bool failed)
+static bool should_run_subtest(struct test_selector *sel,
+			       struct test_selector *subtest_sel,
+			       int subtest_num,
+			       const char *test_name,
+			       const char *subtest_name)
 {
-	if (stdout == env.stdout)
-		return;
+	int i, j;
 
-	/* worker always holds log */
-	if (env.worker_id != -1)
-		return;
+	for (i = 0; i < sel->blacklist.cnt; i++) {
+		if (glob_match(test_name, sel->blacklist.tests[i].name)) {
+			if (!sel->blacklist.tests[i].subtest_cnt)
+				return false;
+
+			for (j = 0; j < sel->blacklist.tests[i].subtest_cnt; j++) {
+				if (glob_match(subtest_name,
+					       sel->blacklist.tests[i].subtests[j]))
+					return false;
+			}
+		}
+	}
 
-	fflush(stdout); /* exports env.log_buf & env.log_cnt */
+	for (i = 0; i < sel->whitelist.cnt; i++) {
+		if (glob_match(test_name, sel->whitelist.tests[i].name)) {
+			if (!sel->whitelist.tests[i].subtest_cnt)
+				return true;
 
-	if (env.verbosity > VERBOSE_NONE || test->force_log || failed) {
-		if (env.log_cnt) {
-			env.log_buf[env.log_cnt] = '\0';
-			fprintf(env.stdout, "%s", env.log_buf);
-			if (env.log_buf[env.log_cnt - 1] != '\n')
-				fprintf(env.stdout, "\n");
+			for (j = 0; j < sel->whitelist.tests[i].subtest_cnt; j++) {
+				if (glob_match(subtest_name,
+					       sel->whitelist.tests[i].subtests[j]))
+					return true;
+			}
 		}
 	}
+
+	if (!sel->whitelist.cnt && !subtest_sel->num_set)
+		return true;
+
+	return subtest_num < subtest_sel->num_set_len && subtest_sel->num_set[subtest_num];
+}
+
+static char *test_result(bool failed, bool skipped)
+{
+	return failed ? "FAIL" : (skipped ? "SKIP" : "OK");
 }
 
-static void skip_account(void)
+static void print_test_log(char *log_buf, size_t log_cnt)
 {
-	if (env.test->skip_cnt) {
-		env.skip_cnt++;
-		env.test->skip_cnt = 0;
+	log_buf[log_cnt] = '\0';
+	fprintf(env.stdout, "%s", log_buf);
+	if (log_buf[log_cnt - 1] != '\n')
+		fprintf(env.stdout, "\n");
+}
+
+#define TEST_NUM_WIDTH 7
+
+static void print_test_name(int test_num, const char *test_name, char *result)
+{
+	fprintf(env.stdout, "#%-*d %s", TEST_NUM_WIDTH, test_num, test_name);
+
+	if (result)
+		fprintf(env.stdout, ":%s", result);
+
+	fprintf(env.stdout, "\n");
+}
+
+static void print_subtest_name(int test_num, int subtest_num,
+			       const char *test_name, char *subtest_name,
+			       char *result)
+{
+	char test_num_str[TEST_NUM_WIDTH + 1];
+
+	snprintf(test_num_str, sizeof(test_num_str), "%d/%d", test_num, subtest_num);
+
+	fprintf(env.stdout, "#%-*s %s/%s",
+		TEST_NUM_WIDTH, test_num_str,
+		test_name, subtest_name);
+
+	if (result)
+		fprintf(env.stdout, ":%s", result);
+
+	fprintf(env.stdout, "\n");
+}
+
+static void dump_test_log(const struct prog_test_def *test,
+			  const struct test_state *test_state,
+			  bool skip_ok_subtests,
+			  bool par_exec_result)
+{
+	bool test_failed = test_state->error_cnt > 0;
+	bool force_log = test_state->force_log;
+	bool print_test = verbose() || force_log || test_failed;
+	int i;
+	struct subtest_state *subtest_state;
+	bool subtest_failed;
+	bool subtest_filtered;
+	bool print_subtest;
+
+	/* we do not print anything in the worker thread */
+	if (env.worker_id != -1)
+		return;
+
+	/* there is nothing to print when verbose log is used and execution
+	 * is not in parallel mode
+	 */
+	if (verbose() && !par_exec_result)
+		return;
+
+	if (test_state->log_cnt && print_test)
+		print_test_log(test_state->log_buf, test_state->log_cnt);
+
+	for (i = 0; i < test_state->subtest_num; i++) {
+		subtest_state = &test_state->subtest_states[i];
+		subtest_failed = subtest_state->error_cnt;
+		subtest_filtered = subtest_state->filtered;
+		print_subtest = verbose() || force_log || subtest_failed;
+
+		if ((skip_ok_subtests && !subtest_failed) || subtest_filtered)
+			continue;
+
+		if (subtest_state->log_cnt && print_subtest) {
+			print_test_log(subtest_state->log_buf,
+				       subtest_state->log_cnt);
+		}
+
+		print_subtest_name(test->test_num, i + 1,
+				   test->test_name, subtest_state->name,
+				   test_result(subtest_state->error_cnt,
+					       subtest_state->skipped));
 	}
+
+	print_test_name(test->test_num, test->test_name,
+			test_result(test_failed, test_state->skip_cnt));
 }
 
 static void stdio_restore(void);
@@ -135,7 +318,6 @@ static void stdio_restore(void);
  */
 static void reset_affinity(void)
 {
-
 	cpu_set_t cpuset;
 	int i, err;
 
@@ -178,68 +360,100 @@ static void restore_netns(void)
 void test__end_subtest(void)
 {
 	struct prog_test_def *test = env.test;
-	int sub_error_cnt = test->error_cnt - test->old_error_cnt;
-
-	dump_test_log(test, sub_error_cnt);
-
-	fprintf(stdout, "#%d/%d %s/%s:%s\n",
-	       test->test_num, test->subtest_num, test->test_name, test->subtest_name,
-	       sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
+	struct test_state *test_state = env.test_state;
+	struct subtest_state *subtest_state = env.subtest_state;
+
+	if (subtest_state->error_cnt) {
+		test_state->error_cnt++;
+	} else {
+		if (!subtest_state->skipped)
+			test_state->sub_succ_cnt++;
+		else
+			test_state->skip_cnt++;
+	}
 
-	if (sub_error_cnt)
-		test->error_cnt++;
-	else if (test->skip_cnt == 0)
-		test->sub_succ_cnt++;
-	skip_account();
+	if (verbose() && !env.workers)
+		print_subtest_name(test->test_num, test_state->subtest_num,
+				   test->test_name, subtest_state->name,
+				   test_result(subtest_state->error_cnt,
+					       subtest_state->skipped));
 
-	free(test->subtest_name);
-	test->subtest_name = NULL;
+	stdio_restore_cleanup();
+	env.subtest_state = NULL;
 }
 
-bool test__start_subtest(const char *name)
+bool test__start_subtest(const char *subtest_name)
 {
 	struct prog_test_def *test = env.test;
+	struct test_state *state = env.test_state;
+	struct subtest_state *subtest_state;
+	size_t sub_state_size = sizeof(*subtest_state);
 
-	if (test->subtest_name)
+	if (env.subtest_state)
 		test__end_subtest();
 
-	test->subtest_num++;
+	state->subtest_num++;
+	state->subtest_states =
+		realloc(state->subtest_states,
+			state->subtest_num * sub_state_size);
+	if (!state->subtest_states) {
+		fprintf(stderr, "Not enough memory to allocate subtest result\n");
+		return false;
+	}
+
+	subtest_state = &state->subtest_states[state->subtest_num - 1];
 
-	if (!name || !name[0]) {
+	memset(subtest_state, 0, sub_state_size);
+
+	if (!subtest_name || !subtest_name[0]) {
 		fprintf(env.stderr,
 			"Subtest #%d didn't provide sub-test name!\n",
-			test->subtest_num);
+			state->subtest_num);
 		return false;
 	}
 
-	if (!should_run(&env.subtest_selector, test->subtest_num, name))
-		return false;
-
-	test->subtest_name = strdup(name);
-	if (!test->subtest_name) {
+	subtest_state->name = strdup(subtest_name);
+	if (!subtest_state->name) {
 		fprintf(env.stderr,
 			"Subtest #%d: failed to copy subtest name!\n",
-			test->subtest_num);
+			state->subtest_num);
+		return false;
+	}
+
+	if (!should_run_subtest(&env.test_selector,
+				&env.subtest_selector,
+				state->subtest_num,
+				test->test_name,
+				subtest_name)) {
+		subtest_state->filtered = true;
 		return false;
 	}
-	env.test->old_error_cnt = env.test->error_cnt;
+
+	env.subtest_state = subtest_state;
+	stdio_hijack_init(&subtest_state->log_buf, &subtest_state->log_cnt);
 
 	return true;
 }
 
 void test__force_log(void)
 {
-	env.test->force_log = true;
+	env.test_state->force_log = true;
 }
 
 void test__skip(void)
 {
-	env.test->skip_cnt++;
+	if (env.subtest_state)
+		env.subtest_state->skipped = true;
+	else
+		env.test_state->skip_cnt++;
 }
 
 void test__fail(void)
 {
-	env.test->error_cnt++;
+	if (env.subtest_state)
+		env.subtest_state->error_cnt++;
+	else
+		env.test_state->error_cnt++;
 }
 
 int test__join_cgroup(const char *path)
@@ -418,14 +632,14 @@ static void unload_bpf_testmod(void)
 		fprintf(env.stderr, "Failed to trigger kernel-side RCU sync!\n");
 	if (delete_module("bpf_testmod", 0)) {
 		if (errno == ENOENT) {
-			if (env.verbosity > VERBOSE_NONE)
+			if (verbose())
 				fprintf(stdout, "bpf_testmod.ko is already unloaded.\n");
 			return;
 		}
 		fprintf(env.stderr, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno);
 		return;
 	}
-	if (env.verbosity > VERBOSE_NONE)
+	if (verbose())
 		fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n");
 }
 
@@ -436,7 +650,7 @@ static int load_bpf_testmod(void)
 	/* ensure previous instance of the module is unloaded */
 	unload_bpf_testmod();
 
-	if (env.verbosity > VERBOSE_NONE)
+	if (verbose())
 		fprintf(stdout, "Loading bpf_testmod.ko...\n");
 
 	fd = open("bpf_testmod.ko", O_RDONLY);
@@ -451,7 +665,7 @@ static int load_bpf_testmod(void)
 	}
 	close(fd);
 
-	if (env.verbosity > VERBOSE_NONE)
+	if (verbose())
 		fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n");
 	return 0;
 }
@@ -472,8 +686,11 @@ static struct prog_test_def prog_test_defs[] = {
 #include <prog_tests/tests.h>
 #undef DEFINE_TEST
 };
+
 static const int prog_test_cnt = ARRAY_SIZE(prog_test_defs);
 
+static struct test_state test_states[ARRAY_SIZE(prog_test_defs)];
+
 const char *argp_program_version = "test_progs 0.1";
 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
 static const char argp_program_doc[] = "BPF selftests test runner";
@@ -527,63 +744,29 @@ static int libbpf_print_fn(enum libbpf_print_level level,
 	return 0;
 }
 
-static void free_str_set(const struct str_set *set)
+static void free_test_filter_set(const struct test_filter_set *set)
 {
-	int i;
+	int i, j;
 
 	if (!set)
 		return;
 
-	for (i = 0; i < set->cnt; i++)
-		free((void *)set->strs[i]);
-	free(set->strs);
-}
-
-static int parse_str_list(const char *s, struct str_set *set, bool is_glob_pattern)
-{
-	char *input, *state = NULL, *next, **tmp, **strs = NULL;
-	int i, cnt = 0;
+	for (i = 0; i < set->cnt; i++) {
+		free((void *)set->tests[i].name);
+		for (j = 0; j < set->tests[i].subtest_cnt; j++)
+			free((void *)set->tests[i].subtests[j]);
 
-	input = strdup(s);
-	if (!input)
-		return -ENOMEM;
-
-	while ((next = strtok_r(state ? NULL : input, ",", &state))) {
-		tmp = realloc(strs, sizeof(*strs) * (cnt + 1));
-		if (!tmp)
-			goto err;
-		strs = tmp;
-
-		if (is_glob_pattern) {
-			strs[cnt] = strdup(next);
-			if (!strs[cnt])
-				goto err;
-		} else {
-			strs[cnt] = malloc(strlen(next) + 2 + 1);
-			if (!strs[cnt])
-				goto err;
-			sprintf(strs[cnt], "*%s*", next);
-		}
-
-		cnt++;
+		free((void *)set->tests[i].subtests);
 	}
 
-	tmp = realloc(set->strs, sizeof(*strs) * (cnt + set->cnt));
-	if (!tmp)
-		goto err;
-	memcpy(tmp + set->cnt, strs, sizeof(*strs) * cnt);
-	set->strs = (const char **)tmp;
-	set->cnt += cnt;
+	free((void *)set->tests);
+}
 
-	free(input);
-	free(strs);
-	return 0;
-err:
-	for (i = 0; i < cnt; i++)
-		free(strs[i]);
-	free(strs);
-	free(input);
-	return -ENOMEM;
+static void free_test_selector(struct test_selector *test_selector)
+{
+	free_test_filter_set(&test_selector->blacklist);
+	free_test_filter_set(&test_selector->whitelist);
+	free(test_selector->num_set);
 }
 
 extern int extra_prog_load_log_flags;
@@ -615,33 +798,17 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	}
 	case ARG_TEST_NAME_GLOB_ALLOWLIST:
 	case ARG_TEST_NAME: {
-		char *subtest_str = strchr(arg, '/');
-
-		if (subtest_str) {
-			*subtest_str = '\0';
-			if (parse_str_list(subtest_str + 1,
-					   &env->subtest_selector.whitelist,
-					   key == ARG_TEST_NAME_GLOB_ALLOWLIST))
-				return -ENOMEM;
-		}
-		if (parse_str_list(arg, &env->test_selector.whitelist,
-				   key == ARG_TEST_NAME_GLOB_ALLOWLIST))
+		if (parse_test_list(arg,
+				    &env->test_selector.whitelist,
+				    key == ARG_TEST_NAME_GLOB_ALLOWLIST))
 			return -ENOMEM;
 		break;
 	}
 	case ARG_TEST_NAME_GLOB_DENYLIST:
 	case ARG_TEST_NAME_BLACKLIST: {
-		char *subtest_str = strchr(arg, '/');
-
-		if (subtest_str) {
-			*subtest_str = '\0';
-			if (parse_str_list(subtest_str + 1,
-					   &env->subtest_selector.blacklist,
-					   key == ARG_TEST_NAME_GLOB_DENYLIST))
-				return -ENOMEM;
-		}
-		if (parse_str_list(arg, &env->test_selector.blacklist,
-				   key == ARG_TEST_NAME_GLOB_DENYLIST))
+		if (parse_test_list(arg,
+				    &env->test_selector.blacklist,
+				    key == ARG_TEST_NAME_GLOB_DENYLIST))
 			return -ENOMEM;
 		break;
 	}
@@ -665,7 +832,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 			}
 		}
 
-		if (env->verbosity > VERBOSE_NONE) {
+		if (verbose()) {
 			if (setenv("SELFTESTS_VERBOSE", "1", 1) == -1) {
 				fprintf(stderr,
 					"Unable to setenv SELFTESTS_VERBOSE=1 (errno=%d)",
@@ -706,44 +873,6 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	return 0;
 }
 
-static void stdio_hijack(void)
-{
-#ifdef __GLIBC__
-	env.stdout = stdout;
-	env.stderr = stderr;
-
-	if (env.verbosity > VERBOSE_NONE && env.worker_id == -1) {
-		/* nothing to do, output to stdout by default */
-		return;
-	}
-
-	/* stdout and stderr -> buffer */
-	fflush(stdout);
-
-	stdout = open_memstream(&env.log_buf, &env.log_cnt);
-	if (!stdout) {
-		stdout = env.stdout;
-		perror("open_memstream");
-		return;
-	}
-
-	stderr = stdout;
-#endif
-}
-
-static void stdio_restore(void)
-{
-#ifdef __GLIBC__
-	if (stdout == env.stdout)
-		return;
-
-	fclose(stdout);
-
-	stdout = env.stdout;
-	stderr = env.stderr;
-#endif
-}
-
 /*
  * Determine if test_progs is running as a "flavored" test runner and switch
  * into corresponding sub-directory to load correct BPF objects.
@@ -761,13 +890,15 @@ int cd_flavor_subdir(const char *exec_name)
 	const char *flavor = strrchr(exec_name, '/');
 
 	if (!flavor)
-		return 0;
-	flavor++;
+		flavor = exec_name;
+	else
+		flavor++;
+
 	flavor = strrchr(flavor, '-');
 	if (!flavor)
 		return 0;
 	flavor++;
-	if (env.verbosity > VERBOSE_NONE)
+	if (verbose())
 		fprintf(stdout,	"Switching to flavor '%s' subdirectory...\n", flavor);
 
 	return chdir(flavor);
@@ -820,8 +951,10 @@ void crash_handler(int signum)
 
 	sz = backtrace(bt, ARRAY_SIZE(bt));
 
-	if (env.test)
-		dump_test_log(env.test, true);
+	if (env.test) {
+		env.test_state->error_cnt++;
+		dump_test_log(env.test, env.test_state, true, false);
+	}
 	if (env.stdout)
 		stdio_restore();
 	if (env.worker_id != -1)
@@ -843,28 +976,22 @@ static int current_test_idx;
 static pthread_mutex_t current_test_lock;
 static pthread_mutex_t stdout_output_lock;
 
-struct test_result {
-	int error_cnt;
-	int skip_cnt;
-	int sub_succ_cnt;
-
-	size_t log_cnt;
-	char *log_buf;
-};
-
-static struct test_result test_results[ARRAY_SIZE(prog_test_defs)];
-
 static inline const char *str_msg(const struct msg *msg, char *buf)
 {
 	switch (msg->type) {
 	case MSG_DO_TEST:
-		sprintf(buf, "MSG_DO_TEST %d", msg->do_test.test_num);
+		sprintf(buf, "MSG_DO_TEST %d", msg->do_test.num);
 		break;
 	case MSG_TEST_DONE:
 		sprintf(buf, "MSG_TEST_DONE %d (log: %d)",
-			msg->test_done.test_num,
+			msg->test_done.num,
 			msg->test_done.have_log);
 		break;
+	case MSG_SUBTEST_DONE:
+		sprintf(buf, "MSG_SUBTEST_DONE %d (log: %d)",
+			msg->subtest_done.num,
+			msg->subtest_done.have_log);
+		break;
 	case MSG_TEST_LOG:
 		sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)",
 			strlen(msg->test_log.log_buf),
@@ -907,8 +1034,12 @@ static int recv_message(int sock, struct msg *msg)
 static void run_one_test(int test_num)
 {
 	struct prog_test_def *test = &prog_test_defs[test_num];
+	struct test_state *state = &test_states[test_num];
 
 	env.test = test;
+	env.test_state = state;
+
+	stdio_hijack(&state->log_buf, &state->log_cnt);
 
 	if (test->run_test)
 		test->run_test();
@@ -916,17 +1047,23 @@ static void run_one_test(int test_num)
 		test->run_serial_test();
 
 	/* ensure last sub-test is finalized properly */
-	if (test->subtest_name)
+	if (env.subtest_state)
 		test__end_subtest();
 
-	test->tested = true;
+	state->tested = true;
 
-	dump_test_log(test, test->error_cnt);
+	if (verbose() && env.worker_id == -1)
+		print_test_name(test_num + 1, test->test_name,
+				test_result(state->error_cnt, state->skip_cnt));
 
 	reset_affinity();
 	restore_netns();
 	if (test->need_cgroup_cleanup)
 		cleanup_cgroup_environment();
+
+	stdio_restore();
+
+	dump_test_log(test, state, false, false);
 }
 
 struct dispatch_data {
@@ -934,18 +1071,90 @@ struct dispatch_data {
 	int sock_fd;
 };
 
+static int read_prog_test_msg(int sock_fd, struct msg *msg, enum msg_type type)
+{
+	if (recv_message(sock_fd, msg) < 0)
+		return 1;
+
+	if (msg->type != type) {
+		printf("%s: unexpected message type %d. expected %d\n", __func__, msg->type, type);
+		return 1;
+	}
+
+	return 0;
+}
+
+static int dispatch_thread_read_log(int sock_fd, char **log_buf, size_t *log_cnt)
+{
+	FILE *log_fp = NULL;
+	int result = 0;
+
+	log_fp = open_memstream(log_buf, log_cnt);
+	if (!log_fp)
+		return 1;
+
+	while (true) {
+		struct msg msg;
+
+		if (read_prog_test_msg(sock_fd, &msg, MSG_TEST_LOG)) {
+			result = 1;
+			goto out;
+		}
+
+		fprintf(log_fp, "%s", msg.test_log.log_buf);
+		if (msg.test_log.is_last)
+			break;
+	}
+
+out:
+	fclose(log_fp);
+	log_fp = NULL;
+	return result;
+}
+
+static int dispatch_thread_send_subtests(int sock_fd, struct test_state *state)
+{
+	struct msg msg;
+	struct subtest_state *subtest_state;
+	int subtest_num = state->subtest_num;
+
+	state->subtest_states = malloc(subtest_num * sizeof(*subtest_state));
+
+	for (int i = 0; i < subtest_num; i++) {
+		subtest_state = &state->subtest_states[i];
+
+		memset(subtest_state, 0, sizeof(*subtest_state));
+
+		if (read_prog_test_msg(sock_fd, &msg, MSG_SUBTEST_DONE))
+			return 1;
+
+		subtest_state->name = strdup(msg.subtest_done.name);
+		subtest_state->error_cnt = msg.subtest_done.error_cnt;
+		subtest_state->skipped = msg.subtest_done.skipped;
+		subtest_state->filtered = msg.subtest_done.filtered;
+
+		/* collect all logs */
+		if (msg.subtest_done.have_log)
+			if (dispatch_thread_read_log(sock_fd,
+						     &subtest_state->log_buf,
+						     &subtest_state->log_cnt))
+				return 1;
+	}
+
+	return 0;
+}
+
 static void *dispatch_thread(void *ctx)
 {
 	struct dispatch_data *data = ctx;
 	int sock_fd;
-	FILE *log_fp = NULL;
 
 	sock_fd = data->sock_fd;
 
 	while (true) {
 		int test_to_run = -1;
 		struct prog_test_def *test;
-		struct test_result *result;
+		struct test_state *state;
 
 		/* grab a test */
 		{
@@ -970,8 +1179,9 @@ static void *dispatch_thread(void *ctx)
 		{
 			struct msg msg_do_test;
 
+			memset(&msg_do_test, 0, sizeof(msg_do_test));
 			msg_do_test.type = MSG_DO_TEST;
-			msg_do_test.do_test.test_num = test_to_run;
+			msg_do_test.do_test.num = test_to_run;
 			if (send_message(sock_fd, &msg_do_test) < 0) {
 				perror("Fail to send command");
 				goto done;
@@ -980,72 +1190,45 @@ static void *dispatch_thread(void *ctx)
 		}
 
 		/* wait for test done */
-		{
-			int err;
-			struct msg msg_test_done;
+		do {
+			struct msg msg;
 
-			err = recv_message(sock_fd, &msg_test_done);
-			if (err < 0)
-				goto error;
-			if (msg_test_done.type != MSG_TEST_DONE)
+			if (read_prog_test_msg(sock_fd, &msg, MSG_TEST_DONE))
 				goto error;
-			if (test_to_run != msg_test_done.test_done.test_num)
+			if (test_to_run != msg.test_done.num)
 				goto error;
 
-			test->tested = true;
-			result = &test_results[test_to_run];
-
-			result->error_cnt = msg_test_done.test_done.error_cnt;
-			result->skip_cnt = msg_test_done.test_done.skip_cnt;
-			result->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt;
+			state = &test_states[test_to_run];
+			state->tested = true;
+			state->error_cnt = msg.test_done.error_cnt;
+			state->skip_cnt = msg.test_done.skip_cnt;
+			state->sub_succ_cnt = msg.test_done.sub_succ_cnt;
+			state->subtest_num = msg.test_done.subtest_num;
 
 			/* collect all logs */
-			if (msg_test_done.test_done.have_log) {
-				log_fp = open_memstream(&result->log_buf, &result->log_cnt);
-				if (!log_fp)
+			if (msg.test_done.have_log) {
+				if (dispatch_thread_read_log(sock_fd,
+							     &state->log_buf,
+							     &state->log_cnt))
 					goto error;
+			}
 
-				while (true) {
-					struct msg msg_log;
-
-					if (recv_message(sock_fd, &msg_log) < 0)
-						goto error;
-					if (msg_log.type != MSG_TEST_LOG)
-						goto error;
+			/* collect all subtests and subtest logs */
+			if (!state->subtest_num)
+				break;
 
-					fprintf(log_fp, "%s", msg_log.test_log.log_buf);
-					if (msg_log.test_log.is_last)
-						break;
-				}
-				fclose(log_fp);
-				log_fp = NULL;
-			}
-			/* output log */
-			{
-				pthread_mutex_lock(&stdout_output_lock);
-
-				if (result->log_cnt) {
-					result->log_buf[result->log_cnt] = '\0';
-					fprintf(stdout, "%s", result->log_buf);
-					if (result->log_buf[result->log_cnt - 1] != '\n')
-						fprintf(stdout, "\n");
-				}
-
-				fprintf(stdout, "#%d %s:%s\n",
-					test->test_num, test->test_name,
-					result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK"));
-
-				pthread_mutex_unlock(&stdout_output_lock);
-			}
+			if (dispatch_thread_send_subtests(sock_fd, state))
+				goto error;
+		} while (false);
 
-		} /* wait for test done */
+		pthread_mutex_lock(&stdout_output_lock);
+		dump_test_log(test, state, false, true);
+		pthread_mutex_unlock(&stdout_output_lock);
 	} /* while (true) */
 error:
 	if (env.debug)
 		fprintf(stderr, "[%d]: Protocol/IO error: %s.\n", data->worker_id, strerror(errno));
 
-	if (log_fp)
-		fclose(log_fp);
 done:
 	{
 		struct msg msg_exit;
@@ -1060,38 +1243,56 @@ done:
 	return NULL;
 }
 
-static void print_all_error_logs(void)
+static void calculate_summary_and_print_errors(struct test_env *env)
 {
 	int i;
+	int succ_cnt = 0, fail_cnt = 0, sub_succ_cnt = 0, skip_cnt = 0;
 
-	if (env.fail_cnt)
-		fprintf(stdout, "\nAll error logs:\n");
-
-	/* print error logs again */
 	for (i = 0; i < prog_test_cnt; i++) {
-		struct prog_test_def *test;
-		struct test_result *result;
-
-		test = &prog_test_defs[i];
-		result = &test_results[i];
+		struct test_state *state = &test_states[i];
 
-		if (!test->tested || !result->error_cnt)
+		if (!state->tested)
 			continue;
 
-		fprintf(stdout, "\n#%d %s:%s\n",
-			test->test_num, test->test_name,
-			result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK"));
+		sub_succ_cnt += state->sub_succ_cnt;
+		skip_cnt += state->skip_cnt;
+
+		if (state->error_cnt)
+			fail_cnt++;
+		else
+			succ_cnt++;
+	}
 
-		if (result->log_cnt) {
-			result->log_buf[result->log_cnt] = '\0';
-			fprintf(stdout, "%s", result->log_buf);
-			if (result->log_buf[result->log_cnt - 1] != '\n')
-				fprintf(stdout, "\n");
+	/*
+	 * We only print error logs summary when there are failed tests and
+	 * verbose mode is not enabled. Otherwise, results may be incosistent.
+	 *
+	 */
+	if (!verbose() && fail_cnt) {
+		printf("\nAll error logs:\n");
+
+		/* print error logs again */
+		for (i = 0; i < prog_test_cnt; i++) {
+			struct prog_test_def *test = &prog_test_defs[i];
+			struct test_state *state = &test_states[i];
+
+			if (!state->tested || !state->error_cnt)
+				continue;
+
+			dump_test_log(test, state, true, true);
 		}
 	}
+
+	printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
+	       succ_cnt, sub_succ_cnt, skip_cnt, fail_cnt);
+
+	env->succ_cnt = succ_cnt;
+	env->sub_succ_cnt = sub_succ_cnt;
+	env->fail_cnt = fail_cnt;
+	env->skip_cnt = skip_cnt;
 }
 
-static int server_main(void)
+static void server_main(void)
 {
 	pthread_t *dispatcher_threads;
 	struct dispatch_data *data;
@@ -1147,60 +1348,18 @@ static int server_main(void)
 
 	for (int i = 0; i < prog_test_cnt; i++) {
 		struct prog_test_def *test = &prog_test_defs[i];
-		struct test_result *result = &test_results[i];
 
 		if (!test->should_run || !test->run_serial_test)
 			continue;
 
-		stdio_hijack();
-
 		run_one_test(i);
-
-		stdio_restore();
-		if (env.log_buf) {
-			result->log_cnt = env.log_cnt;
-			result->log_buf = strdup(env.log_buf);
-
-			free(env.log_buf);
-			env.log_buf = NULL;
-			env.log_cnt = 0;
-		}
-		restore_netns();
-
-		fprintf(stdout, "#%d %s:%s\n",
-			test->test_num, test->test_name,
-			test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
-
-		result->error_cnt = test->error_cnt;
-		result->skip_cnt = test->skip_cnt;
-		result->sub_succ_cnt = test->sub_succ_cnt;
 	}
 
 	/* generate summary */
 	fflush(stderr);
 	fflush(stdout);
 
-	for (i = 0; i < prog_test_cnt; i++) {
-		struct prog_test_def *current_test;
-		struct test_result *result;
-
-		current_test = &prog_test_defs[i];
-		result = &test_results[i];
-
-		if (!current_test->tested)
-			continue;
-
-		env.succ_cnt += result->error_cnt ? 0 : 1;
-		env.skip_cnt += result->skip_cnt;
-		if (result->error_cnt)
-			env.fail_cnt++;
-		env.sub_succ_cnt += result->sub_succ_cnt;
-	}
-
-	print_all_error_logs();
-
-	fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
-		env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
+	calculate_summary_and_print_errors(&env);
 
 	/* reap all workers */
 	for (i = 0; i < env.workers; i++) {
@@ -1210,8 +1369,91 @@ static int server_main(void)
 		if (pid != env.worker_pids[i])
 			perror("Unable to reap worker");
 	}
+}
 
-	return 0;
+static void worker_main_send_log(int sock, char *log_buf, size_t log_cnt)
+{
+	char *src;
+	size_t slen;
+
+	src = log_buf;
+	slen = log_cnt;
+	while (slen) {
+		struct msg msg_log;
+		char *dest;
+		size_t len;
+
+		memset(&msg_log, 0, sizeof(msg_log));
+		msg_log.type = MSG_TEST_LOG;
+		dest = msg_log.test_log.log_buf;
+		len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen;
+		memcpy(dest, src, len);
+
+		src += len;
+		slen -= len;
+		if (!slen)
+			msg_log.test_log.is_last = true;
+
+		assert(send_message(sock, &msg_log) >= 0);
+	}
+}
+
+static void free_subtest_state(struct subtest_state *state)
+{
+	if (state->log_buf) {
+		free(state->log_buf);
+		state->log_buf = NULL;
+		state->log_cnt = 0;
+	}
+	free(state->name);
+	state->name = NULL;
+}
+
+static int worker_main_send_subtests(int sock, struct test_state *state)
+{
+	int i, result = 0;
+	struct msg msg;
+	struct subtest_state *subtest_state;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.type = MSG_SUBTEST_DONE;
+
+	for (i = 0; i < state->subtest_num; i++) {
+		subtest_state = &state->subtest_states[i];
+
+		msg.subtest_done.num = i;
+
+		strncpy(msg.subtest_done.name, subtest_state->name, MAX_SUBTEST_NAME);
+
+		msg.subtest_done.error_cnt = subtest_state->error_cnt;
+		msg.subtest_done.skipped = subtest_state->skipped;
+		msg.subtest_done.filtered = subtest_state->filtered;
+		msg.subtest_done.have_log = false;
+
+		if (verbose() || state->force_log || subtest_state->error_cnt) {
+			if (subtest_state->log_cnt)
+				msg.subtest_done.have_log = true;
+		}
+
+		if (send_message(sock, &msg) < 0) {
+			perror("Fail to send message done");
+			result = 1;
+			goto out;
+		}
+
+		/* send logs */
+		if (msg.subtest_done.have_log)
+			worker_main_send_log(sock, subtest_state->log_buf, subtest_state->log_cnt);
+
+		free_subtest_state(subtest_state);
+		free(subtest_state->name);
+	}
+
+out:
+	for (; i < state->subtest_num; i++)
+		free_subtest_state(&state->subtest_states[i]);
+	free(state->subtest_states);
+	return result;
 }
 
 static int worker_main(int sock)
@@ -1232,12 +1474,10 @@ static int worker_main(int sock)
 					env.worker_id);
 			goto out;
 		case MSG_DO_TEST: {
-			int test_to_run;
-			struct prog_test_def *test;
-			struct msg msg_done;
-
-			test_to_run = msg.do_test.test_num;
-			test = &prog_test_defs[test_to_run];
+			int test_to_run = msg.do_test.num;
+			struct prog_test_def *test = &prog_test_defs[test_to_run];
+			struct test_state *state = &test_states[test_to_run];
+			struct msg msg;
 
 			if (env.debug)
 				fprintf(stderr, "[%d]: #%d:%s running.\n",
@@ -1245,60 +1485,40 @@ static int worker_main(int sock)
 					test_to_run + 1,
 					test->test_name);
 
-			stdio_hijack();
-
 			run_one_test(test_to_run);
 
-			stdio_restore();
-
-			memset(&msg_done, 0, sizeof(msg_done));
-			msg_done.type = MSG_TEST_DONE;
-			msg_done.test_done.test_num = test_to_run;
-			msg_done.test_done.error_cnt = test->error_cnt;
-			msg_done.test_done.skip_cnt = test->skip_cnt;
-			msg_done.test_done.sub_succ_cnt = test->sub_succ_cnt;
-			msg_done.test_done.have_log = false;
-
-			if (env.verbosity > VERBOSE_NONE || test->force_log || test->error_cnt) {
-				if (env.log_cnt)
-					msg_done.test_done.have_log = true;
+			memset(&msg, 0, sizeof(msg));
+			msg.type = MSG_TEST_DONE;
+			msg.test_done.num = test_to_run;
+			msg.test_done.error_cnt = state->error_cnt;
+			msg.test_done.skip_cnt = state->skip_cnt;
+			msg.test_done.sub_succ_cnt = state->sub_succ_cnt;
+			msg.test_done.subtest_num = state->subtest_num;
+			msg.test_done.have_log = false;
+
+			if (verbose() || state->force_log || state->error_cnt) {
+				if (state->log_cnt)
+					msg.test_done.have_log = true;
 			}
-			if (send_message(sock, &msg_done) < 0) {
+			if (send_message(sock, &msg) < 0) {
 				perror("Fail to send message done");
 				goto out;
 			}
 
 			/* send logs */
-			if (msg_done.test_done.have_log) {
-				char *src;
-				size_t slen;
-
-				src = env.log_buf;
-				slen = env.log_cnt;
-				while (slen) {
-					struct msg msg_log;
-					char *dest;
-					size_t len;
-
-					memset(&msg_log, 0, sizeof(msg_log));
-					msg_log.type = MSG_TEST_LOG;
-					dest = msg_log.test_log.log_buf;
-					len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen;
-					memcpy(dest, src, len);
-
-					src += len;
-					slen -= len;
-					if (!slen)
-						msg_log.test_log.is_last = true;
-
-					assert(send_message(sock, &msg_log) >= 0);
-				}
-			}
-			if (env.log_buf) {
-				free(env.log_buf);
-				env.log_buf = NULL;
-				env.log_cnt = 0;
+			if (msg.test_done.have_log)
+				worker_main_send_log(sock, state->log_buf, state->log_cnt);
+
+			if (state->log_buf) {
+				free(state->log_buf);
+				state->log_buf = NULL;
+				state->log_cnt = 0;
 			}
+
+			if (state->subtest_num)
+				if (worker_main_send_subtests(sock, state))
+					goto out;
+
 			if (env.debug)
 				fprintf(stderr, "[%d]: #%d:%s done.\n",
 					env.worker_id,
@@ -1316,6 +1536,23 @@ out:
 	return 0;
 }
 
+static void free_test_states(void)
+{
+	int i, j;
+
+	for (i = 0; i < ARRAY_SIZE(prog_test_defs); i++) {
+		struct test_state *test_state = &test_states[i];
+
+		for (j = 0; j < test_state->subtest_num; j++)
+			free_subtest_state(&test_state->subtest_states[j]);
+
+		free(test_state->subtest_states);
+		free(test_state->log_buf);
+		test_state->subtest_states = NULL;
+		test_state->log_buf = NULL;
+	}
+}
+
 int main(int argc, char **argv)
 {
 	static const struct argp argp = {
@@ -1428,7 +1665,6 @@ int main(int argc, char **argv)
 
 	for (i = 0; i < prog_test_cnt; i++) {
 		struct prog_test_def *test = &prog_test_defs[i];
-		struct test_result *result;
 
 		if (!test->should_run)
 			continue;
@@ -1444,34 +1680,7 @@ int main(int argc, char **argv)
 			continue;
 		}
 
-		stdio_hijack();
-
 		run_one_test(i);
-
-		stdio_restore();
-
-		fprintf(env.stdout, "#%d %s:%s\n",
-			test->test_num, test->test_name,
-			test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
-
-		result = &test_results[i];
-		result->error_cnt = test->error_cnt;
-		if (env.log_buf) {
-			result->log_buf = strdup(env.log_buf);
-			result->log_cnt = env.log_cnt;
-
-			free(env.log_buf);
-			env.log_buf = NULL;
-			env.log_cnt = 0;
-		}
-
-		if (test->error_cnt)
-			env.fail_cnt++;
-		else
-			env.succ_cnt++;
-
-		skip_account();
-		env.sub_succ_cnt += test->sub_succ_cnt;
 	}
 
 	if (env.get_test_cnt) {
@@ -1482,21 +1691,16 @@ int main(int argc, char **argv)
 	if (env.list_test_names)
 		goto out;
 
-	print_all_error_logs();
-
-	fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
-		env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
+	calculate_summary_and_print_errors(&env);
 
 	close(env.saved_netns_fd);
 out:
 	if (!env.list_test_names && env.has_testmod)
 		unload_bpf_testmod();
-	free_str_set(&env.test_selector.blacklist);
-	free_str_set(&env.test_selector.whitelist);
-	free(env.test_selector.num_set);
-	free_str_set(&env.subtest_selector.blacklist);
-	free_str_set(&env.subtest_selector.whitelist);
-	free(env.subtest_selector.num_set);
+
+	free_test_selector(&env.test_selector);
+	free_test_selector(&env.subtest_selector);
+	free_test_states();
 
 	if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
 		return EXIT_NO_TEST;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 93c1ff705533..5fe1365c2bb1 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -25,6 +25,7 @@ typedef __u16 __sum16;
 #include <sys/wait.h>
 #include <sys/types.h>
 #include <sys/time.h>
+#include <sys/param.h>
 #include <fcntl.h>
 #include <pthread.h>
 #include <linux/bpf.h>
@@ -37,7 +38,6 @@ typedef __u16 __sum16;
 #include <bpf/bpf_endian.h>
 #include "trace_helpers.h"
 #include "testing_helpers.h"
-#include "flow_dissector_load.h"
 
 enum verbosity {
 	VERBOSE_NONE,
@@ -46,18 +46,52 @@ enum verbosity {
 	VERBOSE_SUPER,
 };
 
-struct str_set {
-	const char **strs;
+struct test_filter {
+	char *name;
+	char **subtests;
+	int subtest_cnt;
+};
+
+struct test_filter_set {
+	struct test_filter *tests;
 	int cnt;
 };
 
 struct test_selector {
-	struct str_set whitelist;
-	struct str_set blacklist;
+	struct test_filter_set whitelist;
+	struct test_filter_set blacklist;
 	bool *num_set;
 	int num_set_len;
 };
 
+struct subtest_state {
+	char *name;
+	size_t log_cnt;
+	char *log_buf;
+	int error_cnt;
+	bool skipped;
+	bool filtered;
+
+	FILE *stdout;
+};
+
+struct test_state {
+	bool tested;
+	bool force_log;
+
+	int error_cnt;
+	int skip_cnt;
+	int sub_succ_cnt;
+
+	struct subtest_state *subtest_states;
+	int subtest_num;
+
+	size_t log_cnt;
+	char *log_buf;
+
+	FILE *stdout;
+};
+
 struct test_env {
 	struct test_selector test_selector;
 	struct test_selector subtest_selector;
@@ -70,12 +104,12 @@ struct test_env {
 	bool get_test_cnt;
 	bool list_test_names;
 
-	struct prog_test_def *test; /* current running tests */
+	struct prog_test_def *test; /* current running test */
+	struct test_state *test_state; /* current running test state */
+	struct subtest_state *subtest_state; /* current running subtest state */
 
 	FILE *stdout;
 	FILE *stderr;
-	char *log_buf;
-	size_t log_cnt;
 	int nr_cpus;
 
 	int succ_cnt; /* successful tests */
@@ -92,39 +126,51 @@ struct test_env {
 };
 
 #define MAX_LOG_TRUNK_SIZE 8192
+#define MAX_SUBTEST_NAME 1024
 enum msg_type {
 	MSG_DO_TEST = 0,
 	MSG_TEST_DONE = 1,
 	MSG_TEST_LOG = 2,
+	MSG_SUBTEST_DONE = 3,
 	MSG_EXIT = 255,
 };
 struct msg {
 	enum msg_type type;
 	union {
 		struct {
-			int test_num;
+			int num;
 		} do_test;
 		struct {
-			int test_num;
+			int num;
 			int sub_succ_cnt;
 			int error_cnt;
 			int skip_cnt;
 			bool have_log;
+			int subtest_num;
 		} test_done;
 		struct {
 			char log_buf[MAX_LOG_TRUNK_SIZE + 1];
 			bool is_last;
 		} test_log;
+		struct {
+			int num;
+			char name[MAX_SUBTEST_NAME + 1];
+			int error_cnt;
+			bool skipped;
+			bool filtered;
+			bool have_log;
+		} subtest_done;
 	};
 };
 
 extern struct test_env env;
 
-extern void test__force_log();
-extern bool test__start_subtest(const char *name);
-extern void test__skip(void);
-extern void test__fail(void);
-extern int test__join_cgroup(const char *path);
+void test__force_log(void);
+bool test__start_subtest(const char *name);
+void test__end_subtest(void);
+void test__skip(void);
+void test__fail(void);
+int test__join_cgroup(const char *path);
 
 #define PRINT_FAIL(format...)                                                  \
 	({                                                                     \
@@ -267,6 +313,17 @@ extern int test__join_cgroup(const char *path);
 	___ok;								\
 })
 
+#define ASSERT_HAS_SUBSTR(str, substr, name) ({				\
+	static int duration = 0;					\
+	const char *___str = str;					\
+	const char *___substr = substr;					\
+	bool ___ok = strstr(___str, ___substr) != NULL;			\
+	CHECK(!___ok, (name),						\
+	      "unexpected %s: '%s' is not a substring of '%s'\n",	\
+	      (name), ___substr, ___str);				\
+	___ok;								\
+})
+
 #define ASSERT_OK(res, name) ({						\
 	static int duration = 0;					\
 	long long ___res = (res);					\
@@ -332,6 +389,8 @@ int trigger_module_test_write(int write_sz);
 #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
 #elif defined(__s390x__)
 #define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep"
+#elif defined(__aarch64__)
+#define SYS_NANOSLEEP_KPROBE_NAME "__arm64_sys_nanosleep"
 #else
 #define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep"
 #endif
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
index 4a64306728ab..3256de30f563 100644
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
@@ -15,7 +15,6 @@
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
-#include "bpf_rlimit.h"
 #include "cgroup_helpers.h"
 
 #define CGROUP_PATH		"/skb_cgroup_test"
@@ -160,6 +159,9 @@ int main(int argc, char **argv)
 		exit(EXIT_FAILURE);
 	}
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	cgfd = cgroup_setup_and_join(CGROUP_PATH);
 	if (cgfd < 0)
 		goto err;
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index fe10f8134278..810c3740b2cc 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -14,7 +14,6 @@
 
 #include "cgroup_helpers.h"
 #include <bpf/bpf_endian.h>
-#include "bpf_rlimit.h"
 #include "bpf_util.h"
 
 #define CG_PATH		"/foo"
@@ -493,7 +492,7 @@ static int run_test_case(int cgfd, const struct sock_test *test)
 			goto err;
 	}
 
-	if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) {
+	if (attach_sock_prog(cgfd, progfd, test->attach_type) < 0) {
 		if (test->result == ATTACH_REJECT)
 			goto out;
 		else
@@ -541,6 +540,9 @@ int main(int argc, char **argv)
 	if (cgfd < 0)
 		goto err;
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	if (run_tests(cgfd))
 		goto err;
 
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index f3d5d7ac6505..458564fcfc82 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -19,7 +19,6 @@
 #include <bpf/libbpf.h>
 
 #include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
 #include "bpf_util.h"
 
 #ifndef ENOTSUPP
@@ -1418,6 +1417,9 @@ int main(int argc, char **argv)
 	if (cgfd < 0)
 		goto err;
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	if (run_tests(cgfd))
 		goto err;
 
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index dfb4f5c0fcb9..0fbaccdc8861 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -18,7 +18,6 @@
 #include <sched.h>
 
 #include <sys/time.h>
-#include <sys/resource.h>
 #include <sys/types.h>
 #include <sys/sendfile.h>
 
@@ -37,7 +36,6 @@
 #include <bpf/libbpf.h>
 
 #include "bpf_util.h"
-#include "bpf_rlimit.h"
 #include "cgroup_helpers.h"
 
 int running;
@@ -2017,6 +2015,9 @@ int main(int argc, char **argv)
 		cg_created = 1;
 	}
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	if (test == SELFTESTS) {
 		err = test_selftest(cg_fd, &options);
 		goto out;
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index 4f6cf833b522..57620e7c9048 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -14,7 +14,6 @@
 #include <bpf/libbpf.h>
 
 #include <bpf/bpf_endian.h>
-#include "bpf_rlimit.h"
 #include "bpf_util.h"
 #include "cgroup_helpers.h"
 #include "testing_helpers.h"
@@ -1561,7 +1560,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test)
 			goto err;
 	}
 
-	if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) == -1) {
+	if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) < 0) {
 		if (test->result == ATTACH_REJECT)
 			goto out;
 		else
@@ -1618,6 +1617,9 @@ int main(int argc, char **argv)
 	if (cgfd < 0)
 		goto err;
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	if (run_tests(cgfd))
 		goto err;
 
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
index 0851c42ee31c..5546b05a0486 100644
--- a/tools/testing/selftests/bpf/test_tag.c
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -20,7 +20,6 @@
 #include <bpf/bpf.h>
 
 #include "../../../include/linux/filter.h"
-#include "bpf_rlimit.h"
 #include "testing_helpers.h"
 
 static struct bpf_insn prog[BPF_MAXINSNS];
@@ -189,6 +188,9 @@ int main(void)
 	uint32_t tests = 0;
 	int i, fd_map;
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	fd_map = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int),
 				sizeof(int), 1, &opts);
 	assert(fd_map > 0);
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
index e7775d3bbe08..5c8ef062f760 100644
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
@@ -15,7 +15,6 @@
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
 
-#include "bpf_rlimit.h"
 #include "cgroup_helpers.h"
 
 static int start_server(const struct sockaddr *addr, socklen_t len, bool dual)
@@ -235,6 +234,9 @@ int main(int argc, char **argv)
 		exit(1);
 	}
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	results = get_map_fd_by_prog_id(atoi(argv[1]), &xdp);
 	if (results < 0) {
 		log_err("Can't get map");
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 4c5114765b23..8284db8b0f13 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -19,7 +19,6 @@
 #include <linux/perf_event.h>
 #include <linux/err.h>
 
-#include "bpf_rlimit.h"
 #include "bpf_util.h"
 #include "cgroup_helpers.h"
 
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index 2817d9948d59..e9ebc67d73f7 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -45,6 +45,7 @@
 # 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
 # 6) Forward the packet to the overlay tnl dev
 
+BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel"
 PING_ARG="-c 3 -w 10 -q"
 ret=0
 GREEN='\033[0;92m'
@@ -155,52 +156,6 @@ add_ip6erspan_tunnel()
 	ip link set dev $DEV up
 }
 
-add_vxlan_tunnel()
-{
-	# Set static ARP entry here because iptables set-mark works
-	# on L3 packet, as a result not applying to ARP packets,
-	# causing errors at get_tunnel_{key/opt}.
-
-	# at_ns0 namespace
-	ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE \
-		id 2 dstport 4789 gbp remote 172.16.1.200
-	ip netns exec at_ns0 \
-		ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
-	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-	ip netns exec at_ns0 \
-		ip neigh add 10.1.1.200 lladdr 52:54:00:d9:02:00 dev $DEV_NS
-	ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
-
-	# root namespace
-	ip link add dev $DEV type $TYPE external gbp dstport 4789
-	ip link set dev $DEV address 52:54:00:d9:02:00 up
-	ip addr add dev $DEV 10.1.1.200/24
-	ip neigh add 10.1.1.100 lladdr 52:54:00:d9:01:00 dev $DEV
-}
-
-add_ip6vxlan_tunnel()
-{
-	#ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0
-	ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0
-	ip netns exec at_ns0 ip link set dev veth0 up
-	#ip -4 addr del 172.16.1.200 dev veth1
-	ip -6 addr add dev veth1 ::22/96
-	ip link set dev veth1 up
-
-	# at_ns0 namespace
-	ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \
-		local ::11 remote ::22
-	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-	ip netns exec at_ns0 ip link set dev $DEV_NS up
-
-	# root namespace
-	ip link add dev $DEV type $TYPE external dstport 4789
-	ip addr add dev $DEV 10.1.1.200/24
-	ip link set dev $DEV up
-}
-
 add_geneve_tunnel()
 {
 	# at_ns0 namespace
@@ -403,58 +358,6 @@ test_ip6erspan()
         echo -e ${GREEN}"PASS: $TYPE"${NC}
 }
 
-test_vxlan()
-{
-	TYPE=vxlan
-	DEV_NS=vxlan00
-	DEV=vxlan11
-	ret=0
-
-	check $TYPE
-	config_device
-	add_vxlan_tunnel
-	attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
-	ping $PING_ARG 10.1.1.100
-	check_err $?
-	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
-	check_err $?
-	cleanup
-
-	if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: $TYPE"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6vxlan()
-{
-	TYPE=vxlan
-	DEV_NS=ip6vxlan00
-	DEV=ip6vxlan11
-	ret=0
-
-	check $TYPE
-	config_device
-	add_ip6vxlan_tunnel
-	ip link set dev veth1 mtu 1500
-	attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel
-	# underlay
-	ping6 $PING_ARG ::11
-	# ip4 over ip6
-	ping $PING_ARG 10.1.1.100
-	check_err $?
-	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
-	check_err $?
-	cleanup
-
-	if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: ip6$TYPE"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
-}
-
 test_geneve()
 {
 	TYPE=geneve
@@ -641,9 +544,11 @@ test_xfrm_tunnel()
 	config_device
 	> /sys/kernel/debug/tracing/trace
 	setup_xfrm_tunnel
+	mkdir -p ${BPF_PIN_TUNNEL_DIR}
+	bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR}
 	tc qdisc add dev veth1 clsact
-	tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \
-		sec xfrm_get_state
+	tc filter add dev veth1 proto ip ingress bpf da object-pinned \
+		${BPF_PIN_TUNNEL_DIR}/xfrm_get_state
 	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
 	sleep 1
 	grep "reqid 1" /sys/kernel/debug/tracing/trace
@@ -666,13 +571,17 @@ attach_bpf()
 	DEV=$1
 	SET=$2
 	GET=$3
+	mkdir -p ${BPF_PIN_TUNNEL_DIR}
+	bpftool prog loadall ./test_tunnel_kern.o ${BPF_PIN_TUNNEL_DIR}/
 	tc qdisc add dev $DEV clsact
-	tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET
-	tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET
+	tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET
+	tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET
 }
 
 cleanup()
 {
+        rm -rf ${BPF_PIN_TUNNEL_DIR}
+
 	ip netns delete at_ns0 2> /dev/null
 	ip link del veth1 2> /dev/null
 	ip link del ipip11 2> /dev/null
@@ -681,8 +590,6 @@ cleanup()
 	ip link del gretap11 2> /dev/null
 	ip link del ip6gre11 2> /dev/null
 	ip link del ip6gretap11 2> /dev/null
-	ip link del vxlan11 2> /dev/null
-	ip link del ip6vxlan11 2> /dev/null
 	ip link del geneve11 2> /dev/null
 	ip link del ip6geneve11 2> /dev/null
 	ip link del erspan11 2> /dev/null
@@ -714,7 +621,6 @@ enable_debug()
 {
 	echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
 	echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
-	echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control
 	echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
 	echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
 }
@@ -750,14 +656,6 @@ bpf_tunnel_test()
 	test_ip6erspan v2
 	errors=$(( $errors + $? ))
 
-	echo "Testing VXLAN tunnel..."
-	test_vxlan
-	errors=$(( $errors + $? ))
-
-	echo "Testing IP6VXLAN tunnel..."
-	test_ip6vxlan
-	errors=$(( $errors + $? ))
-
 	echo "Testing GENEVE tunnel..."
 	test_geneve
 	errors=$(( $errors + $? ))
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index a2cd236c32eb..372579c9f45e 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -53,7 +53,7 @@
 #define MAX_INSNS	BPF_MAXINSNS
 #define MAX_TEST_INSNS	1000000
 #define MAX_FIXUPS	8
-#define MAX_NR_MAPS	22
+#define MAX_NR_MAPS	23
 #define MAX_TEST_RUNS	8
 #define POINTER_VALUE	0xcafe4all
 #define TEST_DATA_LEN	64
@@ -101,6 +101,7 @@ struct bpf_test {
 	int fixup_map_reuseport_array[MAX_FIXUPS];
 	int fixup_map_ringbuf[MAX_FIXUPS];
 	int fixup_map_timer[MAX_FIXUPS];
+	int fixup_map_kptr[MAX_FIXUPS];
 	struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS];
 	/* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
 	 * Can be a tab-separated sequence of expected strings. An empty string
@@ -621,8 +622,15 @@ static int create_cgroup_storage(bool percpu)
  * struct timer {
  *   struct bpf_timer t;
  * };
+ * struct btf_ptr {
+ *   struct prog_test_ref_kfunc __kptr *ptr;
+ *   struct prog_test_ref_kfunc __kptr_ref *ptr;
+ *   struct prog_test_member __kptr_ref *ptr;
+ * }
  */
-static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t";
+static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t"
+				  "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_ref"
+				  "\0prog_test_member";
 static __u32 btf_raw_types[] = {
 	/* int */
 	BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),  /* [1] */
@@ -638,6 +646,22 @@ static __u32 btf_raw_types[] = {
 	/* struct timer */                              /* [5] */
 	BTF_TYPE_ENC(35, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16),
 	BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */
+	/* struct prog_test_ref_kfunc */		/* [6] */
+	BTF_STRUCT_ENC(51, 0, 0),
+	BTF_STRUCT_ENC(89, 0, 0),			/* [7] */
+	/* type tag "kptr" */
+	BTF_TYPE_TAG_ENC(75, 6),			/* [8] */
+	/* type tag "kptr_ref" */
+	BTF_TYPE_TAG_ENC(80, 6),			/* [9] */
+	BTF_TYPE_TAG_ENC(80, 7),			/* [10] */
+	BTF_PTR_ENC(8),					/* [11] */
+	BTF_PTR_ENC(9),					/* [12] */
+	BTF_PTR_ENC(10),				/* [13] */
+	/* struct btf_ptr */				/* [14] */
+	BTF_STRUCT_ENC(43, 3, 24),
+	BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr *ptr; */
+	BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr_ref *ptr; */
+	BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr_ref *ptr; */
 };
 
 static int load_btf(void)
@@ -727,6 +751,25 @@ static int create_map_timer(void)
 	return fd;
 }
 
+static int create_map_kptr(void)
+{
+	LIBBPF_OPTS(bpf_map_create_opts, opts,
+		.btf_key_type_id = 1,
+		.btf_value_type_id = 14,
+	);
+	int fd, btf_fd;
+
+	btf_fd = load_btf();
+	if (btf_fd < 0)
+		return -1;
+
+	opts.btf_fd = btf_fd;
+	fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 24, 1, &opts);
+	if (fd < 0)
+		printf("Failed to create map with btf_id pointer\n");
+	return fd;
+}
+
 static char bpf_vlog[UINT_MAX >> 8];
 
 static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
@@ -754,6 +797,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 	int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
 	int *fixup_map_ringbuf = test->fixup_map_ringbuf;
 	int *fixup_map_timer = test->fixup_map_timer;
+	int *fixup_map_kptr = test->fixup_map_kptr;
 	struct kfunc_btf_id_pair *fixup_kfunc_btf_id = test->fixup_kfunc_btf_id;
 
 	if (test->fill_helper) {
@@ -947,6 +991,13 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 			fixup_map_timer++;
 		} while (*fixup_map_timer);
 	}
+	if (*fixup_map_kptr) {
+		map_fds[22] = create_map_kptr();
+		do {
+			prog[*fixup_map_kptr].imm = map_fds[22];
+			fixup_map_kptr++;
+		} while (*fixup_map_kptr);
+	}
 
 	/* Patch in kfunc BTF IDs */
 	if (fixup_kfunc_btf_id->kfunc) {
diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c
index 8d6918c3b4a2..70feda97cee5 100644
--- a/tools/testing/selftests/bpf/test_verifier_log.c
+++ b/tools/testing/selftests/bpf/test_verifier_log.c
@@ -11,8 +11,6 @@
 
 #include <bpf/bpf.h>
 
-#include "bpf_rlimit.h"
-
 #define LOG_SIZE (1 << 20)
 
 #define err(str...)	printf("ERROR: " str)
@@ -141,6 +139,9 @@ int main(int argc, char **argv)
 
 	memset(log, 1, LOG_SIZE);
 
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
 	/* Test incorrect attr */
 	printf("Test log_level 0...\n");
 	test_log_bad(log, LOG_SIZE, 0);
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index cd7bf32e6a17..567500299231 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -43,7 +43,6 @@
 #   ** veth<xxxx> in root namespace
 #   ** veth<yyyy> in af_xdp<xxxx> namespace
 #   ** namespace af_xdp<xxxx>
-#   * create a spec file veth.spec that includes this run-time configuration
 #   *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid
 #       conflict with any existing interface
 #   * tests the veth and xsk layers of the topology
@@ -77,7 +76,7 @@
 
 . xsk_prereqs.sh
 
-while getopts "cvD" flag
+while getopts "vD" flag
 do
 	case "${flag}" in
 		v) verbose=1;;
@@ -88,7 +87,7 @@ done
 TEST_NAME="PREREQUISITES"
 
 URANDOM=/dev/urandom
-[ ! -e "${URANDOM}" ] && { echo "${URANDOM} not found. Skipping tests."; test_exit 1 1; }
+[ ! -e "${URANDOM}" ] && { echo "${URANDOM} not found. Skipping tests."; test_exit $ksft_fail; }
 
 VETH0_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4)
 VETH0=ve${VETH0_POSTFIX}
@@ -98,6 +97,13 @@ NS0=root
 NS1=af_xdp${VETH1_POSTFIX}
 MTU=1500
 
+trap ctrl_c INT
+
+function ctrl_c() {
+        cleanup_exit ${VETH0} ${VETH1} ${NS1}
+	exit 1
+}
+
 setup_vethPairs() {
 	if [[ $verbose -eq 1 ]]; then
 	        echo "setting up ${VETH0}: namespace: ${NS0}"
@@ -110,6 +116,14 @@ setup_vethPairs() {
 	if [[ $verbose -eq 1 ]]; then
 	        echo "setting up ${VETH1}: namespace: ${NS1}"
 	fi
+
+	if [[ $busy_poll -eq 1 ]]; then
+	        echo 2 > /sys/class/net/${VETH0}/napi_defer_hard_irqs
+		echo 200000 > /sys/class/net/${VETH0}/gro_flush_timeout
+		echo 2 > /sys/class/net/${VETH1}/napi_defer_hard_irqs
+		echo 200000 > /sys/class/net/${VETH1}/gro_flush_timeout
+	fi
+
 	ip link set ${VETH1} netns ${NS1}
 	ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU}
 	ip link set ${VETH0} mtu ${MTU}
@@ -130,17 +144,12 @@ if [ $retval -ne 0 ]; then
 	exit $retval
 fi
 
-echo "${VETH0}:${VETH1},${NS1}" > ${SPECFILE}
-
-validate_veth_spec_file
-
 if [[ $verbose -eq 1 ]]; then
-        echo "Spec file created: ${SPECFILE}"
-	VERBOSE_ARG="-v"
+	ARGS+="-v "
 fi
 
 if [[ $dump_pkts -eq 1 ]]; then
-	DUMP_PKTS_ARG="-D"
+	ARGS="-D "
 fi
 
 test_status $retval "${TEST_NAME}"
@@ -149,23 +158,31 @@ test_status $retval "${TEST_NAME}"
 
 statusList=()
 
-TEST_NAME="XSK KSELFTESTS"
+TEST_NAME="XSK_SELFTESTS_SOFTIRQ"
 
 execxdpxceiver
 
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
+cleanup_exit ${VETH0} ${VETH1} ${NS1}
+TEST_NAME="XSK_SELFTESTS_BUSY_POLL"
+busy_poll=1
+
+setup_vethPairs
+execxdpxceiver
 
 ## END TESTS
 
 cleanup_exit ${VETH0} ${VETH1} ${NS1}
 
-for _status in "${statusList[@]}"
+failures=0
+echo -e "\nSummary:"
+for i in "${!statusList[@]}"
 do
-	if [ $_status -ne 0 ]; then
-		test_exit $ksft_fail 0
+	if [ ${statusList[$i]} -ne 0 ]; then
+	        test_status ${statusList[$i]} ${nameList[$i]}
+		failures=1
 	fi
 done
 
-test_exit $ksft_pass 0
+if [ $failures -eq 0 ]; then
+        echo "All tests successful!"
+fi
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 795b6798ccee..9695318e8132 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -6,6 +6,7 @@
 #include <errno.h>
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
+#include "test_progs.h"
 #include "testing_helpers.h"
 
 int parse_num_list(const char *s, bool **num_set, int *num_set_len)
@@ -60,7 +61,7 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
 			set[i] = true;
 	}
 
-	if (!set)
+	if (!set || parsing_end)
 		return -EINVAL;
 
 	*num_set = set;
@@ -69,6 +70,94 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
 	return 0;
 }
 
+int parse_test_list(const char *s,
+		    struct test_filter_set *set,
+		    bool is_glob_pattern)
+{
+	char *input, *state = NULL, *next;
+	struct test_filter *tmp, *tests = NULL;
+	int i, j, cnt = 0;
+
+	input = strdup(s);
+	if (!input)
+		return -ENOMEM;
+
+	while ((next = strtok_r(state ? NULL : input, ",", &state))) {
+		char *subtest_str = strchr(next, '/');
+		char *pattern = NULL;
+		int glob_chars = 0;
+
+		tmp = realloc(tests, sizeof(*tests) * (cnt + 1));
+		if (!tmp)
+			goto err;
+		tests = tmp;
+
+		tests[cnt].subtest_cnt = 0;
+		tests[cnt].subtests = NULL;
+
+		if (is_glob_pattern) {
+			pattern = "%s";
+		} else {
+			pattern = "*%s*";
+			glob_chars = 2;
+		}
+
+		if (subtest_str) {
+			char **tmp_subtests = NULL;
+			int subtest_cnt = tests[cnt].subtest_cnt;
+
+			*subtest_str = '\0';
+			subtest_str += 1;
+			tmp_subtests = realloc(tests[cnt].subtests,
+					       sizeof(*tmp_subtests) *
+					       (subtest_cnt + 1));
+			if (!tmp_subtests)
+				goto err;
+			tests[cnt].subtests = tmp_subtests;
+
+			tests[cnt].subtests[subtest_cnt] =
+				malloc(strlen(subtest_str) + glob_chars + 1);
+			if (!tests[cnt].subtests[subtest_cnt])
+				goto err;
+			sprintf(tests[cnt].subtests[subtest_cnt],
+				pattern,
+				subtest_str);
+
+			tests[cnt].subtest_cnt++;
+		}
+
+		tests[cnt].name = malloc(strlen(next) + glob_chars + 1);
+		if (!tests[cnt].name)
+			goto err;
+		sprintf(tests[cnt].name, pattern, next);
+
+		cnt++;
+	}
+
+	tmp = realloc(set->tests, sizeof(*tests) * (cnt + set->cnt));
+	if (!tmp)
+		goto err;
+
+	memcpy(tmp +  set->cnt, tests, sizeof(*tests) * cnt);
+	set->tests = tmp;
+	set->cnt += cnt;
+
+	free(tests);
+	free(input);
+	return 0;
+
+err:
+	for (i = 0; i < cnt; i++) {
+		for (j = 0; j < tests[i].subtest_cnt; j++)
+			free(tests[i].subtests[j]);
+
+		free(tests[i].name);
+	}
+	free(tests);
+	free(input);
+	return -ENOMEM;
+}
+
 __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)
 {
 	__u32 info_len = sizeof(*info);
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index f46ebc476ee8..6ec00bf79cb5 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -12,3 +12,11 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
 			  size_t insns_cnt, const char *license,
 			  __u32 kern_version, char *log_buf,
 			  size_t log_buf_sz);
+
+/*
+ * below function is exported for testing in prog_test test
+ */
+struct test_filter_set;
+int parse_test_list(const char *s,
+		    struct test_filter_set *test_set,
+		    bool is_glob_pattern);
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 3d6217e3aff7..9c4be2cdb21a 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -25,15 +25,12 @@ static int ksym_cmp(const void *p1, const void *p2)
 
 int load_kallsyms(void)
 {
-	FILE *f = fopen("/proc/kallsyms", "r");
+	FILE *f;
 	char func[256], buf[256];
 	char symbol;
 	void *addr;
 	int i = 0;
 
-	if (!f)
-		return -ENOENT;
-
 	/*
 	 * This is called/used from multiplace places,
 	 * load symbols just once.
@@ -41,6 +38,10 @@ int load_kallsyms(void)
 	if (sym_cnt)
 		return 0;
 
+	f = fopen("/proc/kallsyms", "r");
+	if (!f)
+		return -ENOENT;
+
 	while (fgets(buf, sizeof(buf), f)) {
 		if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
 			break;
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
index db781052758d..e92644d0fa75 100644
--- a/tools/testing/selftests/bpf/urandom_read.c
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -1,32 +1,85 @@
+#include <stdbool.h>
 #include <stdio.h>
 #include <unistd.h>
+#include <errno.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <stdlib.h>
+#include <signal.h>
+
+#define _SDT_HAS_SEMAPHORES 1
+#include "sdt.h"
+
+#define SEC(name) __attribute__((section(name), used))
 
 #define BUF_SIZE 256
 
+/* defined in urandom_read_aux.c */
+void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz);
+/* these are coming from urandom_read_lib{1,2}.c */
+void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz);
+void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz);
+
+unsigned short urand_read_with_sema_semaphore SEC(".probes");
+
 static __attribute__((noinline))
 void urandom_read(int fd, int count)
 {
-       char buf[BUF_SIZE];
-       int i;
+	char buf[BUF_SIZE];
+	int i;
+
+	for (i = 0; i < count; ++i) {
+		read(fd, buf, BUF_SIZE);
+
+		/* trigger USDTs defined in executable itself */
+		urand_read_without_sema(i, count, BUF_SIZE);
+		STAP_PROBE3(urand, read_with_sema, i, count, BUF_SIZE);
 
-       for (i = 0; i < count; ++i)
-               read(fd, buf, BUF_SIZE);
+		/* trigger USDTs defined in shared lib */
+		urandlib_read_without_sema(i, count, BUF_SIZE);
+		urandlib_read_with_sema(i, count, BUF_SIZE);
+	}
+}
+
+static volatile bool parent_ready;
+
+static void handle_sigpipe(int sig)
+{
+	parent_ready = true;
 }
 
 int main(int argc, char *argv[])
 {
 	int fd = open("/dev/urandom", O_RDONLY);
 	int count = 4;
+	bool report_pid = false;
 
 	if (fd < 0)
 		return 1;
 
-	if (argc == 2)
+	if (argc >= 2)
 		count = atoi(argv[1]);
+	if (argc >= 3) {
+		report_pid = true;
+		/* install SIGPIPE handler to catch when parent closes their
+		 * end of the pipe (on the other side of our stdout)
+		 */
+		signal(SIGPIPE, handle_sigpipe);
+	}
+
+	/* report PID and wait for parent process to send us "signal" by
+	 * closing stdout
+	 */
+	if (report_pid) {
+		while (!parent_ready) {
+			fprintf(stdout, "%d\n", getpid());
+			fflush(stdout);
+		}
+		/* at this point stdout is closed, parent process knows our
+		 * PID and is ready to trace us
+		 */
+	}
 
 	urandom_read(fd, count);
 
diff --git a/tools/testing/selftests/bpf/urandom_read_aux.c b/tools/testing/selftests/bpf/urandom_read_aux.c
new file mode 100644
index 000000000000..6132edcfea74
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_aux.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "sdt.h"
+
+void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz)
+{
+	/* semaphore-less USDT */
+	STAP_PROBE3(urand, read_without_sema, iter_num, iter_cnt, read_sz);
+}
diff --git a/tools/testing/selftests/bpf/urandom_read_lib1.c b/tools/testing/selftests/bpf/urandom_read_lib1.c
new file mode 100644
index 000000000000..86186e24b740
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_lib1.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#define _SDT_HAS_SEMAPHORES 1
+#include "sdt.h"
+
+#define SEC(name) __attribute__((section(name), used))
+
+unsigned short urandlib_read_with_sema_semaphore SEC(".probes");
+
+void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz)
+{
+	STAP_PROBE3(urandlib, read_with_sema, iter_num, iter_cnt, read_sz);
+}
diff --git a/tools/testing/selftests/bpf/urandom_read_lib2.c b/tools/testing/selftests/bpf/urandom_read_lib2.c
new file mode 100644
index 000000000000..9d401ad9838f
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_lib2.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "sdt.h"
+
+void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz)
+{
+	STAP_PROBE3(urandlib, read_without_sema, iter_num, iter_cnt, read_sz);
+}
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 2e03decb11b6..743ed34c1238 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -139,6 +139,26 @@
 	},
 },
 {
+	"calls: invalid kfunc call: don't match first member type when passed to release kfunc",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = REJECT,
+	.errstr = "kernel function bpf_kfunc_call_memb1_release args#0 expected pointer",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_kfunc_call_memb_acquire", 1 },
+		{ "bpf_kfunc_call_memb1_release", 5 },
+	},
+},
+{
 	"calls: invalid kfunc call: PTR_TO_BTF_ID with negative offset",
 	.insns = {
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c
new file mode 100644
index 000000000000..6914904344c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/map_kptr.c
@@ -0,0 +1,469 @@
+/* Common tests */
+{
+	"map_kptr: BPF_ST imm != 0",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "BPF_ST imm must be 0 when storing to kptr at off=0",
+},
+{
+	"map_kptr: size != bpf_size_to_bytes(BPF_DW)",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "kptr access size must be BPF_DW",
+},
+{
+	"map_kptr: map_value non-const var_off",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, 0),
+	BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1),
+	BPF_EXIT_INSN(),
+	BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "kptr access cannot have variable offset",
+},
+{
+	"map_kptr: bpf_kptr_xchg non-const var_off",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, 0),
+	BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1),
+	BPF_EXIT_INSN(),
+	BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_3),
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "R1 doesn't have constant offset. kptr has to be at the constant offset",
+},
+{
+	"map_kptr: unaligned boundary load/store",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 7),
+	BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "kptr access misaligned expected=0 off=7",
+},
+{
+	"map_kptr: reject var_off != 0",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+	BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1),
+	BPF_EXIT_INSN(),
+	BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+	BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "variable untrusted_ptr_ access var_off=(0x0; 0x7) disallowed",
+},
+/* Tests for unreferened PTR_TO_BTF_ID */
+{
+	"map_kptr: unref: reject btf_struct_ids_match == false",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+	BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc expected=ptr_prog_test",
+},
+{
+	"map_kptr: unref: loaded pointer marked as untrusted",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "R0 invalid mem access 'untrusted_ptr_or_null_'",
+},
+{
+	"map_kptr: unref: correct in kernel type size",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 32),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "access beyond struct prog_test_ref_kfunc at off 32 size 8",
+},
+{
+	"map_kptr: unref: inherit PTR_UNTRUSTED on struct walk",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 16),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_this_cpu_ptr),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "R1 type=untrusted_ptr_ expected=percpu_ptr_",
+},
+{
+	"map_kptr: unref: no reference state created",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = ACCEPT,
+},
+{
+	"map_kptr: unref: bpf_kptr_xchg rejected",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "off=0 kptr isn't referenced kptr",
+},
+{
+	"map_kptr: unref: bpf_kfunc_call_test_kptr_get rejected",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_MOV64_IMM(BPF_REG_3, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "arg#0 no referenced kptr at map value offset=0",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_kfunc_call_test_kptr_get", 13 },
+	}
+},
+/* Tests for referenced PTR_TO_BTF_ID */
+{
+	"map_kptr: ref: loaded pointer marked as untrusted",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_1, 0),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 8),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_this_cpu_ptr),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_",
+},
+{
+	"map_kptr: ref: reject off != 0",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member",
+},
+{
+	"map_kptr: ref: reference state created and released on xchg",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+	BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "Unreleased reference id=5 alloc_insn=20",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_kfunc_call_test_acquire", 15 },
+	}
+},
+{
+	"map_kptr: ref: reject STX",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, 0),
+	BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "store to referenced kptr disallowed",
+},
+{
+	"map_kptr: ref: reject ST",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ST_MEM(BPF_DW, BPF_REG_0, 8, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "store to referenced kptr disallowed",
+},
+{
+	"map_kptr: reject helper access to kptr",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_LD_MAP_FD(BPF_REG_6, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.fixup_map_kptr = { 1 },
+	.result = REJECT,
+	.errstr = "kptr cannot be accessed indirectly by helper",
+},
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index fbd682520e47..57a83d763ec1 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -796,7 +796,7 @@
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
-	.errstr = "reference has not been acquired before",
+	.errstr = "R1 must be referenced when passed to release function",
 },
 {
 	/* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index 86b24cad27a7..d11d0b28be41 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -417,7 +417,7 @@
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
-	.errstr = "reference has not been acquired before",
+	.errstr = "R1 must be referenced when passed to release function",
 },
 {
 	"bpf_sk_release(bpf_sk_fullsock(skb->sk))",
@@ -436,7 +436,7 @@
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
-	.errstr = "reference has not been acquired before",
+	.errstr = "R1 must be referenced when passed to release function",
 },
 {
 	"bpf_sk_release(bpf_tcp_sock(skb->sk))",
@@ -455,7 +455,7 @@
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
-	.errstr = "reference has not been acquired before",
+	.errstr = "R1 must be referenced when passed to release function",
 },
 {
 	"sk_storage_get(map, skb->sk, NULL, 0): value == NULL",
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
index aaedbf4955c3..c03b3a75991f 100644
--- a/tools/testing/selftests/bpf/xdp_redirect_multi.c
+++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
@@ -10,7 +10,6 @@
 #include <net/if.h>
 #include <unistd.h>
 #include <libgen.h>
-#include <sys/resource.h>
 #include <sys/ioctl.h>
 #include <sys/types.h>
 #include <sys/socket.h>
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index c567856fd1bc..5b6f977870f8 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -12,7 +12,6 @@
 #include <string.h>
 #include <unistd.h>
 #include <libgen.h>
-#include <sys/resource.h>
 #include <net/if.h>
 #include <sys/types.h>
 #include <sys/socket.h>
@@ -89,7 +88,6 @@ int main(int argc, char **argv)
 {
 	__u32 mode_flags = XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE;
 	struct addrinfo *a, hints = { .ai_family = AF_INET };
-	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
 	__u16 count = XDPING_DEFAULT_COUNT;
 	struct pinginfo pinginfo = { 0 };
 	const char *optstr = "c:I:NsS";
@@ -167,10 +165,8 @@ int main(int argc, char **argv)
 		freeaddrinfo(a);
 	}
 
-	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
-		perror("setrlimit(RLIMIT_MEMLOCK)");
-		return 1;
-	}
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index 5f8296d29e77..e5992a6b5e09 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -90,7 +90,8 @@
 #include <string.h>
 #include <stddef.h>
 #include <sys/mman.h>
-#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/time.h>
 #include <sys/types.h>
 #include <sys/queue.h>
 #include <time.h>
@@ -123,9 +124,17 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
 #define mode_string(test) (test)->ifobj_tx->xdp_flags & XDP_FLAGS_SKB_MODE ? "SKB" : "DRV"
+#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : ""
 
-#define print_ksft_result(test)						\
-	(ksft_test_result_pass("PASS: %s %s\n", mode_string(test), (test)->name))
+static void report_failure(struct test_spec *test)
+{
+	if (test->fail)
+		return;
+
+	ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test),
+			      test->name);
+	test->fail = true;
+}
 
 static void memset32_htonl(void *dest, u32 val, u32 size)
 {
@@ -265,6 +274,26 @@ static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size
 	return 0;
 }
 
+static void enable_busy_poll(struct xsk_socket_info *xsk)
+{
+	int sock_opt;
+
+	sock_opt = 1;
+	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
+		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
+		exit_with_error(errno);
+
+	sock_opt = 20;
+	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
+		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
+		exit_with_error(errno);
+
+	sock_opt = BATCH_SIZE;
+	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
+		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
+		exit_with_error(errno);
+}
+
 static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
 				struct ifobject *ifobject, bool shared)
 {
@@ -288,8 +317,8 @@ static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_inf
 
 static struct option long_options[] = {
 	{"interface", required_argument, 0, 'i'},
-	{"queue", optional_argument, 0, 'q'},
-	{"dump-pkts", optional_argument, 0, 'D'},
+	{"busy-poll", no_argument, 0, 'b'},
+	{"dump-pkts", no_argument, 0, 'D'},
 	{"verbose", no_argument, 0, 'v'},
 	{0, 0, 0, 0}
 };
@@ -300,9 +329,9 @@ static void usage(const char *prog)
 		"  Usage: %s [OPTIONS]\n"
 		"  Options:\n"
 		"  -i, --interface      Use interface\n"
-		"  -q, --queue=n        Use queue n (default 0)\n"
 		"  -D, --dump-pkts      Dump packets L2 - L5\n"
-		"  -v, --verbose        Verbose output\n";
+		"  -v, --verbose        Verbose output\n"
+		"  -b, --busy-poll      Enable busy poll\n";
 
 	ksft_print_msg(str, prog);
 }
@@ -348,7 +377,7 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
 	for (;;) {
 		char *sptr, *token;
 
-		c = getopt_long(argc, argv, "i:Dv", long_options, &option_index);
+		c = getopt_long(argc, argv, "i:Dvb", long_options, &option_index);
 		if (c == -1)
 			break;
 
@@ -374,6 +403,10 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
 		case 'v':
 			opt_verbose = true;
 			break;
+		case 'b':
+			ifobj_tx->busy_poll = true;
+			ifobj_rx->busy_poll = true;
+			break;
 		default:
 			usage(basename(argv[0]));
 			ksft_exit_xfail();
@@ -391,8 +424,10 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 
 		ifobj->xsk = &ifobj->xsk_arr[0];
 		ifobj->use_poll = false;
-		ifobj->pacing_on = true;
+		ifobj->use_fill_ring = true;
+		ifobj->release_rx = true;
 		ifobj->pkt_stream = test->pkt_stream_default;
+		ifobj->validation_func = NULL;
 
 		if (i == 0) {
 			ifobj->rx_on = false;
@@ -417,6 +452,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 	test->current_step = 0;
 	test->total_steps = 1;
 	test->nb_sockets = 1;
+	test->fail = false;
 }
 
 static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
@@ -468,9 +504,10 @@ static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
 	return &pkt_stream->pkts[pkt_nb];
 }
 
-static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream)
+static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent)
 {
 	while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) {
+		(*pkts_sent)++;
 		if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid)
 			return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++];
 		pkt_stream->rx_pkt_nb++;
@@ -486,10 +523,16 @@ static void pkt_stream_delete(struct pkt_stream *pkt_stream)
 
 static void pkt_stream_restore_default(struct test_spec *test)
 {
-	if (test->ifobj_tx->pkt_stream != test->pkt_stream_default) {
+	struct pkt_stream *tx_pkt_stream = test->ifobj_tx->pkt_stream;
+
+	if (tx_pkt_stream != test->pkt_stream_default) {
 		pkt_stream_delete(test->ifobj_tx->pkt_stream);
 		test->ifobj_tx->pkt_stream = test->pkt_stream_default;
 	}
+
+	if (test->ifobj_rx->pkt_stream != test->pkt_stream_default &&
+	    test->ifobj_rx->pkt_stream != tx_pkt_stream)
+		pkt_stream_delete(test->ifobj_rx->pkt_stream);
 	test->ifobj_rx->pkt_stream = test->pkt_stream_default;
 }
 
@@ -511,6 +554,16 @@ static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
 	return pkt_stream;
 }
 
+static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 len)
+{
+	pkt->addr = addr;
+	pkt->len = len;
+	if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom)
+		pkt->valid = false;
+	else
+		pkt->valid = true;
+}
+
 static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len)
 {
 	struct pkt_stream *pkt_stream;
@@ -522,14 +575,9 @@ static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb
 
 	pkt_stream->nb_pkts = nb_pkts;
 	for (i = 0; i < nb_pkts; i++) {
-		pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size;
-		pkt_stream->pkts[i].len = pkt_len;
+		pkt_set(umem, &pkt_stream->pkts[i], (i % umem->num_frames) * umem->frame_size,
+			pkt_len);
 		pkt_stream->pkts[i].payload = i;
-
-		if (pkt_len > umem->frame_size)
-			pkt_stream->pkts[i].valid = false;
-		else
-			pkt_stream->pkts[i].valid = true;
 	}
 
 	return pkt_stream;
@@ -557,15 +605,27 @@ static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int off
 	u32 i;
 
 	pkt_stream = pkt_stream_clone(umem, test->pkt_stream_default);
-	for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2) {
-		pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size + offset;
-		pkt_stream->pkts[i].len = pkt_len;
-	}
+	for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2)
+		pkt_set(umem, &pkt_stream->pkts[i],
+			(i % umem->num_frames) * umem->frame_size + offset, pkt_len);
 
 	test->ifobj_tx->pkt_stream = pkt_stream;
 	test->ifobj_rx->pkt_stream = pkt_stream;
 }
 
+static void pkt_stream_receive_half(struct test_spec *test)
+{
+	struct xsk_umem_info *umem = test->ifobj_rx->umem;
+	struct pkt_stream *pkt_stream = test->ifobj_tx->pkt_stream;
+	u32 i;
+
+	test->ifobj_rx->pkt_stream = pkt_stream_generate(umem, pkt_stream->nb_pkts,
+							 pkt_stream->pkts[0].len);
+	pkt_stream = test->ifobj_rx->pkt_stream;
+	for (i = 1; i < pkt_stream->nb_pkts; i += 2)
+		pkt_stream->pkts[i].valid = false;
+}
+
 static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
 {
 	struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb);
@@ -576,7 +636,7 @@ static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
 
 	if (!pkt)
 		return NULL;
-	if (!pkt->valid || pkt->len < PKT_SIZE)
+	if (!pkt->valid || pkt->len < MIN_PKT_SIZE)
 		return pkt;
 
 	data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr);
@@ -663,8 +723,7 @@ static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt
 	if (offset == expected_offset)
 		return true;
 
-	ksft_test_result_fail("ERROR: [%s] expected [%u], got [%u]\n", __func__, expected_offset,
-			      offset);
+	ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset);
 	return false;
 }
 
@@ -674,19 +733,18 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
 	struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr));
 
 	if (!pkt) {
-		ksft_test_result_fail("ERROR: [%s] too many packets received\n", __func__);
+		ksft_print_msg("[%s] too many packets received\n", __func__);
 		return false;
 	}
 
-	if (len < PKT_SIZE) {
-		/*Do not try to verify packets that are smaller than minimum size. */
+	if (len < MIN_PKT_SIZE || pkt->len < MIN_PKT_SIZE) {
+		/* Do not try to verify packets that are smaller than minimum size. */
 		return true;
 	}
 
 	if (pkt->len != len) {
-		ksft_test_result_fail
-			("ERROR: [%s] expected length [%d], got length [%d]\n",
-			 __func__, pkt->len, len);
+		ksft_print_msg("[%s] expected length [%d], got length [%d]\n",
+			       __func__, pkt->len, len);
 		return false;
 	}
 
@@ -697,9 +755,8 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
 			pkt_dump(data, PKT_SIZE);
 
 		if (pkt->payload != seqnum) {
-			ksft_test_result_fail
-				("ERROR: [%s] expected seqnum [%d], got seqnum [%d]\n",
-					__func__, pkt->payload, seqnum);
+			ksft_print_msg("[%s] expected seqnum [%d], got seqnum [%d]\n",
+				       __func__, pkt->payload, seqnum);
 			return false;
 		}
 	} else {
@@ -717,12 +774,25 @@ static void kick_tx(struct xsk_socket_info *xsk)
 	int ret;
 
 	ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
-	if (ret >= 0 || errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN)
+	if (ret >= 0)
 		return;
+	if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
+		usleep(100);
+		return;
+	}
 	exit_with_error(errno);
 }
 
-static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
+static void kick_rx(struct xsk_socket_info *xsk)
+{
+	int ret;
+
+	ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
+	if (ret < 0)
+		exit_with_error(errno);
+}
+
+static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 {
 	unsigned int rcvd;
 	u32 idx;
@@ -735,26 +805,45 @@ static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 		if (rcvd > xsk->outstanding_tx) {
 			u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
 
-			ksft_test_result_fail("ERROR: [%s] Too many packets completed\n",
-					      __func__);
+			ksft_print_msg("[%s] Too many packets completed\n", __func__);
 			ksft_print_msg("Last completion address: %llx\n", addr);
-			return;
+			return TEST_FAILURE;
 		}
 
 		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
 		xsk->outstanding_tx -= rcvd;
 	}
+
+	return TEST_PASS;
 }
 
-static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *xsk,
-			 struct pollfd *fds)
+static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
 {
-	struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream);
+	struct timeval tv_end, tv_now, tv_timeout = {RECV_TMOUT, 0};
+	u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkts_sent = 0;
+	struct pkt_stream *pkt_stream = ifobj->pkt_stream;
+	struct xsk_socket_info *xsk = ifobj->xsk;
 	struct xsk_umem_info *umem = xsk->umem;
-	u32 idx_rx = 0, idx_fq = 0, rcvd, i;
+	struct pkt *pkt;
 	int ret;
 
+	ret = gettimeofday(&tv_now, NULL);
+	if (ret)
+		exit_with_error(errno);
+	timeradd(&tv_now, &tv_timeout, &tv_end);
+
+	pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
 	while (pkt) {
+		ret = gettimeofday(&tv_now, NULL);
+		if (ret)
+			exit_with_error(errno);
+		if (timercmp(&tv_now, &tv_end, >)) {
+			ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
+			return TEST_FAILURE;
+		}
+
+		kick_rx(xsk);
+
 		rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
 		if (!rcvd) {
 			if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
@@ -765,54 +854,53 @@ static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *
 			continue;
 		}
 
-		ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
-		while (ret != rcvd) {
-			if (ret < 0)
-				exit_with_error(-ret);
-			if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
-				ret = poll(fds, 1, POLL_TMOUT);
+		if (ifobj->use_fill_ring) {
+			ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+			while (ret != rcvd) {
 				if (ret < 0)
 					exit_with_error(-ret);
+				if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
+					ret = poll(fds, 1, POLL_TMOUT);
+					if (ret < 0)
+						exit_with_error(-ret);
+				}
+				ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
 			}
-			ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
 		}
 
 		for (i = 0; i < rcvd; i++) {
 			const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
 			u64 addr = desc->addr, orig;
 
-			if (!pkt) {
-				ksft_test_result_fail("ERROR: [%s] Received too many packets.\n",
-						      __func__);
-				ksft_print_msg("Last packet has addr: %llx len: %u\n",
-					       addr, desc->len);
-				return;
-			}
-
 			orig = xsk_umem__extract_addr(addr);
 			addr = xsk_umem__add_offset_to_addr(addr);
 
-			if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len))
-				return;
-			if (!is_offset_correct(umem, pkt_stream, addr, pkt->addr))
-				return;
+			if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len) ||
+			    !is_offset_correct(umem, pkt_stream, addr, pkt->addr))
+				return TEST_FAILURE;
 
-			*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
-			pkt = pkt_stream_get_next_rx_pkt(pkt_stream);
+			if (ifobj->use_fill_ring)
+				*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
+			pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
 		}
 
-		xsk_ring_prod__submit(&umem->fq, rcvd);
-		xsk_ring_cons__release(&xsk->rx, rcvd);
+		if (ifobj->use_fill_ring)
+			xsk_ring_prod__submit(&umem->fq, rcvd);
+		if (ifobj->release_rx)
+			xsk_ring_cons__release(&xsk->rx, rcvd);
 
 		pthread_mutex_lock(&pacing_mutex);
-		pkts_in_flight -= rcvd;
+		pkts_in_flight -= pkts_sent;
 		if (pkts_in_flight < umem->num_frames)
 			pthread_cond_signal(&pacing_cond);
 		pthread_mutex_unlock(&pacing_mutex);
+		pkts_sent = 0;
 	}
+
+	return TEST_PASS;
 }
 
-static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb)
+static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb)
 {
 	struct xsk_socket_info *xsk = ifobject->xsk;
 	u32 i, idx, valid_pkts = 0;
@@ -822,21 +910,22 @@ static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb)
 
 	for (i = 0; i < BATCH_SIZE; i++) {
 		struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
-		struct pkt *pkt = pkt_generate(ifobject, pkt_nb);
+		struct pkt *pkt = pkt_generate(ifobject, *pkt_nb);
 
 		if (!pkt)
 			break;
 
 		tx_desc->addr = pkt->addr;
 		tx_desc->len = pkt->len;
-		pkt_nb++;
+		(*pkt_nb)++;
 		if (pkt->valid)
 			valid_pkts++;
 	}
 
 	pthread_mutex_lock(&pacing_mutex);
 	pkts_in_flight += valid_pkts;
-	if (ifobject->pacing_on && pkts_in_flight >= ifobject->umem->num_frames - BATCH_SIZE) {
+	/* pkts_in_flight might be negative if many invalid packets are sent */
+	if (pkts_in_flight >= (int)(ifobject->umem->num_frames - BATCH_SIZE)) {
 		kick_tx(xsk);
 		pthread_cond_wait(&pacing_cond, &pacing_mutex);
 	}
@@ -844,10 +933,11 @@ static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb)
 
 	xsk_ring_prod__submit(&xsk->tx, i);
 	xsk->outstanding_tx += valid_pkts;
-	complete_pkts(xsk, i);
+	if (complete_pkts(xsk, i))
+		return TEST_FAILURE;
 
 	usleep(10);
-	return i;
+	return TEST_PASS;
 }
 
 static void wait_for_tx_completion(struct xsk_socket_info *xsk)
@@ -856,7 +946,7 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk)
 		complete_pkts(xsk, BATCH_SIZE);
 }
 
-static void send_pkts(struct ifobject *ifobject)
+static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
 {
 	struct pollfd fds = { };
 	u32 pkt_cnt = 0;
@@ -865,6 +955,8 @@ static void send_pkts(struct ifobject *ifobject)
 	fds.events = POLLOUT;
 
 	while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
+		int err;
+
 		if (ifobject->use_poll) {
 			int ret;
 
@@ -876,58 +968,95 @@ static void send_pkts(struct ifobject *ifobject)
 				continue;
 		}
 
-		pkt_cnt += __send_pkts(ifobject, pkt_cnt);
+		err = __send_pkts(ifobject, &pkt_cnt);
+		if (err || test->fail)
+			return TEST_FAILURE;
 	}
 
 	wait_for_tx_completion(ifobject->xsk);
+	return TEST_PASS;
 }
 
-static bool rx_stats_are_valid(struct ifobject *ifobject)
+static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats)
+{
+	int fd = xsk_socket__fd(xsk), err;
+	socklen_t optlen, expected_len;
+
+	optlen = sizeof(*stats);
+	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen);
+	if (err) {
+		ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+			       __func__, -err, strerror(-err));
+		return TEST_FAILURE;
+	}
+
+	expected_len = sizeof(struct xdp_statistics);
+	if (optlen != expected_len) {
+		ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n",
+			       __func__, expected_len, optlen);
+		return TEST_FAILURE;
+	}
+
+	return TEST_PASS;
+}
+
+static int validate_rx_dropped(struct ifobject *ifobject)
 {
-	u32 xsk_stat = 0, expected_stat = ifobject->pkt_stream->nb_pkts;
 	struct xsk_socket *xsk = ifobject->xsk->xsk;
-	int fd = xsk_socket__fd(xsk);
 	struct xdp_statistics stats;
-	socklen_t optlen;
 	int err;
 
-	optlen = sizeof(stats);
-	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
-	if (err) {
-		ksft_test_result_fail("ERROR Rx: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
-				      __func__, -err, strerror(-err));
-		return true;
-	}
+	kick_rx(ifobject->xsk);
 
-	if (optlen == sizeof(struct xdp_statistics)) {
-		switch (stat_test_type) {
-		case STAT_TEST_RX_DROPPED:
-			xsk_stat = stats.rx_dropped;
-			break;
-		case STAT_TEST_TX_INVALID:
-			return true;
-		case STAT_TEST_RX_FULL:
-			xsk_stat = stats.rx_ring_full;
-			if (ifobject->umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
-				expected_stat = ifobject->umem->num_frames - RX_FULL_RXQSIZE;
-			else
-				expected_stat = XSK_RING_PROD__DEFAULT_NUM_DESCS - RX_FULL_RXQSIZE;
-			break;
-		case STAT_TEST_RX_FILL_EMPTY:
-			xsk_stat = stats.rx_fill_ring_empty_descs;
-			break;
-		default:
-			break;
-		}
+	err = get_xsk_stats(xsk, &stats);
+	if (err)
+		return TEST_FAILURE;
 
-		if (xsk_stat == expected_stat)
-			return true;
-	}
+	if (stats.rx_dropped == ifobject->pkt_stream->nb_pkts / 2)
+		return TEST_PASS;
 
-	return false;
+	return TEST_FAILURE;
+}
+
+static int validate_rx_full(struct ifobject *ifobject)
+{
+	struct xsk_socket *xsk = ifobject->xsk->xsk;
+	struct xdp_statistics stats;
+	int err;
+
+	usleep(1000);
+	kick_rx(ifobject->xsk);
+
+	err = get_xsk_stats(xsk, &stats);
+	if (err)
+		return TEST_FAILURE;
+
+	if (stats.rx_ring_full)
+		return TEST_PASS;
+
+	return TEST_FAILURE;
+}
+
+static int validate_fill_empty(struct ifobject *ifobject)
+{
+	struct xsk_socket *xsk = ifobject->xsk->xsk;
+	struct xdp_statistics stats;
+	int err;
+
+	usleep(1000);
+	kick_rx(ifobject->xsk);
+
+	err = get_xsk_stats(xsk, &stats);
+	if (err)
+		return TEST_FAILURE;
+
+	if (stats.rx_fill_ring_empty_descs)
+		return TEST_PASS;
+
+	return TEST_FAILURE;
 }
 
-static void tx_stats_validate(struct ifobject *ifobject)
+static int validate_tx_invalid_descs(struct ifobject *ifobject)
 {
 	struct xsk_socket *xsk = ifobject->xsk->xsk;
 	int fd = xsk_socket__fd(xsk);
@@ -938,16 +1067,18 @@ static void tx_stats_validate(struct ifobject *ifobject)
 	optlen = sizeof(stats);
 	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
 	if (err) {
-		ksft_test_result_fail("ERROR Tx: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
-				      __func__, -err, strerror(-err));
-		return;
+		ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+			       __func__, -err, strerror(-err));
+		return TEST_FAILURE;
 	}
 
-	if (stats.tx_invalid_descs == ifobject->pkt_stream->nb_pkts)
-		return;
+	if (stats.tx_invalid_descs != ifobject->pkt_stream->nb_pkts / 2) {
+		ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
+			       __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
+		return TEST_FAILURE;
+	}
 
-	ksft_test_result_fail("ERROR: [%s] tx_invalid_descs incorrect. Got [%u] expected [%u]\n",
-			      __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
+	return TEST_PASS;
 }
 
 static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
@@ -985,6 +1116,9 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 				exit_with_error(-ret);
 			usleep(USLEEP_MAX);
 		}
+
+		if (ifobject->busy_poll)
+			enable_busy_poll(&ifobject->xsk_arr[i]);
 	}
 
 	ifobject->xsk = &ifobject->xsk_arr[0];
@@ -1017,18 +1151,21 @@ static void *worker_testapp_validate_tx(void *arg)
 {
 	struct test_spec *test = (struct test_spec *)arg;
 	struct ifobject *ifobject = test->ifobj_tx;
+	int err;
 
 	if (test->current_step == 1)
 		thread_common_ops(test, ifobject);
 
 	print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts,
 		      ifobject->ifname);
-	send_pkts(ifobject);
+	err = send_pkts(test, ifobject);
 
-	if (stat_test_type == STAT_TEST_TX_INVALID)
-		tx_stats_validate(ifobject);
+	if (!err && ifobject->validation_func)
+		err = ifobject->validation_func(ifobject);
+	if (err)
+		report_failure(test);
 
-	if (test->total_steps == test->current_step)
+	if (test->total_steps == test->current_step || err)
 		testapp_cleanup_xsk_res(ifobject);
 	pthread_exit(NULL);
 }
@@ -1069,6 +1206,7 @@ static void *worker_testapp_validate_rx(void *arg)
 	struct test_spec *test = (struct test_spec *)arg;
 	struct ifobject *ifobject = test->ifobj_rx;
 	struct pollfd fds = { };
+	int err;
 
 	if (test->current_step == 1)
 		thread_common_ops(test, ifobject);
@@ -1080,18 +1218,23 @@ static void *worker_testapp_validate_rx(void *arg)
 
 	pthread_barrier_wait(&barr);
 
-	if (test_type == TEST_TYPE_STATS)
-		while (!rx_stats_are_valid(ifobject))
-			continue;
-	else
-		receive_pkts(ifobject->pkt_stream, ifobject->xsk, &fds);
+	err = receive_pkts(ifobject, &fds);
+
+	if (!err && ifobject->validation_func)
+		err = ifobject->validation_func(ifobject);
+	if (err) {
+		report_failure(test);
+		pthread_mutex_lock(&pacing_mutex);
+		pthread_cond_signal(&pacing_cond);
+		pthread_mutex_unlock(&pacing_mutex);
+	}
 
-	if (test->total_steps == test->current_step)
+	if (test->total_steps == test->current_step || err)
 		testapp_cleanup_xsk_res(ifobject);
 	pthread_exit(NULL);
 }
 
-static void testapp_validate_traffic(struct test_spec *test)
+static int testapp_validate_traffic(struct test_spec *test)
 {
 	struct ifobject *ifobj_tx = test->ifobj_tx;
 	struct ifobject *ifobj_rx = test->ifobj_rx;
@@ -1116,6 +1259,8 @@ static void testapp_validate_traffic(struct test_spec *test)
 
 	pthread_join(t1, NULL);
 	pthread_join(t0, NULL);
+
+	return !!test->fail;
 }
 
 static void testapp_teardown(struct test_spec *test)
@@ -1124,7 +1269,8 @@ static void testapp_teardown(struct test_spec *test)
 
 	test_spec_set_name(test, "TEARDOWN");
 	for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
-		testapp_validate_traffic(test);
+		if (testapp_validate_traffic(test))
+			return;
 		test_spec_reset(test);
 	}
 }
@@ -1147,7 +1293,8 @@ static void testapp_bidi(struct test_spec *test)
 	test->ifobj_tx->rx_on = true;
 	test->ifobj_rx->tx_on = true;
 	test->total_steps = 2;
-	testapp_validate_traffic(test);
+	if (testapp_validate_traffic(test))
+		return;
 
 	print_verbose("Switching Tx/Rx vectors\n");
 	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
@@ -1175,7 +1322,8 @@ static void testapp_bpf_res(struct test_spec *test)
 	test_spec_set_name(test, "BPF_RES");
 	test->total_steps = 2;
 	test->nb_sockets = 2;
-	testapp_validate_traffic(test);
+	if (testapp_validate_traffic(test))
+		return;
 
 	swap_xsk_resources(test->ifobj_tx, test->ifobj_rx);
 	testapp_validate_traffic(test);
@@ -1188,53 +1336,58 @@ static void testapp_headroom(struct test_spec *test)
 	testapp_validate_traffic(test);
 }
 
-static void testapp_stats(struct test_spec *test)
+static void testapp_stats_rx_dropped(struct test_spec *test)
 {
-	int i;
+	test_spec_set_name(test, "STAT_RX_DROPPED");
+	test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
+		XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
+	pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0);
+	pkt_stream_receive_half(test);
+	test->ifobj_rx->validation_func = validate_rx_dropped;
+	testapp_validate_traffic(test);
+}
 
-	for (i = 0; i < STAT_TEST_TYPE_MAX; i++) {
-		test_spec_reset(test);
-		stat_test_type = i;
-		/* No or few packets will be received so cannot pace packets */
-		test->ifobj_tx->pacing_on = false;
-
-		switch (stat_test_type) {
-		case STAT_TEST_RX_DROPPED:
-			test_spec_set_name(test, "STAT_RX_DROPPED");
-			test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
-				XDP_PACKET_HEADROOM - 1;
-			testapp_validate_traffic(test);
-			break;
-		case STAT_TEST_RX_FULL:
-			test_spec_set_name(test, "STAT_RX_FULL");
-			test->ifobj_rx->xsk->rxqsize = RX_FULL_RXQSIZE;
-			testapp_validate_traffic(test);
-			break;
-		case STAT_TEST_TX_INVALID:
-			test_spec_set_name(test, "STAT_TX_INVALID");
-			pkt_stream_replace(test, DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE);
-			testapp_validate_traffic(test);
+static void testapp_stats_tx_invalid_descs(struct test_spec *test)
+{
+	test_spec_set_name(test, "STAT_TX_INVALID");
+	pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0);
+	test->ifobj_tx->validation_func = validate_tx_invalid_descs;
+	testapp_validate_traffic(test);
 
-			pkt_stream_restore_default(test);
-			break;
-		case STAT_TEST_RX_FILL_EMPTY:
-			test_spec_set_name(test, "STAT_RX_FILL_EMPTY");
-			test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, 0,
-									 MIN_PKT_SIZE);
-			if (!test->ifobj_rx->pkt_stream)
-				exit_with_error(ENOMEM);
-			test->ifobj_rx->pkt_stream->use_addr_for_fill = true;
-			testapp_validate_traffic(test);
-
-			pkt_stream_restore_default(test);
-			break;
-		default:
-			break;
-		}
-	}
+	pkt_stream_restore_default(test);
+}
+
+static void testapp_stats_rx_full(struct test_spec *test)
+{
+	test_spec_set_name(test, "STAT_RX_FULL");
+	pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE);
+	test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
+							 DEFAULT_UMEM_BUFFERS, PKT_SIZE);
+	if (!test->ifobj_rx->pkt_stream)
+		exit_with_error(ENOMEM);
+
+	test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
+	test->ifobj_rx->release_rx = false;
+	test->ifobj_rx->validation_func = validate_rx_full;
+	testapp_validate_traffic(test);
 
-	/* To only see the whole stat set being completed unless an individual test fails. */
-	test_spec_set_name(test, "STATS");
+	pkt_stream_restore_default(test);
+}
+
+static void testapp_stats_fill_empty(struct test_spec *test)
+{
+	test_spec_set_name(test, "STAT_RX_FILL_EMPTY");
+	pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, PKT_SIZE);
+	test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem,
+							 DEFAULT_UMEM_BUFFERS, PKT_SIZE);
+	if (!test->ifobj_rx->pkt_stream)
+		exit_with_error(ENOMEM);
+
+	test->ifobj_rx->use_fill_ring = false;
+	test->ifobj_rx->validation_func = validate_fill_empty;
+	testapp_validate_traffic(test);
+
+	pkt_stream_restore_default(test);
 }
 
 /* Simple test */
@@ -1283,10 +1436,10 @@ static void testapp_single_pkt(struct test_spec *test)
 static void testapp_invalid_desc(struct test_spec *test)
 {
 	struct pkt pkts[] = {
-		/* Zero packet length at address zero allowed */
-		{0, 0, 0, true},
-		/* Zero packet length allowed */
-		{0x1000, 0, 0, true},
+		/* Zero packet address allowed */
+		{0, PKT_SIZE, 0, true},
+		/* Allowed packet */
+		{0x1000, PKT_SIZE, 0, true},
 		/* Straddling the start of umem */
 		{-2, PKT_SIZE, 0, false},
 		/* Packet too large */
@@ -1339,14 +1492,18 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *
 
 static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type)
 {
-	test_type = type;
-
-	/* reset defaults after potential previous test */
-	stat_test_type = -1;
-
-	switch (test_type) {
-	case TEST_TYPE_STATS:
-		testapp_stats(test);
+	switch (type) {
+	case TEST_TYPE_STATS_RX_DROPPED:
+		testapp_stats_rx_dropped(test);
+		break;
+	case TEST_TYPE_STATS_TX_INVALID_DESCS:
+		testapp_stats_tx_invalid_descs(test);
+		break;
+	case TEST_TYPE_STATS_RX_FULL:
+		testapp_stats_rx_full(test);
+		break;
+	case TEST_TYPE_STATS_FILL_EMPTY:
+		testapp_stats_fill_empty(test);
 		break;
 	case TEST_TYPE_TEARDOWN:
 		testapp_teardown(test);
@@ -1369,7 +1526,7 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
 		test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE");
 		test->ifobj_tx->umem->frame_size = 2048;
 		test->ifobj_rx->umem->frame_size = 2048;
-		pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+		pkt_stream_replace(test, DEFAULT_PKT_CNT, PKT_SIZE);
 		testapp_validate_traffic(test);
 
 		pkt_stream_restore_default(test);
@@ -1411,7 +1568,9 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
 		break;
 	}
 
-	print_ksft_result(test);
+	if (!test->fail)
+		ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test),
+				      test->name);
 }
 
 static struct ifobject *ifobject_create(void)
@@ -1448,14 +1607,13 @@ static void ifobject_delete(struct ifobject *ifobj)
 
 int main(int argc, char **argv)
 {
-	struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
 	struct pkt_stream *pkt_stream_default;
 	struct ifobject *ifobj_tx, *ifobj_rx;
+	u32 i, j, failed_tests = 0;
 	struct test_spec test;
-	u32 i, j;
 
-	if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
-		exit_with_error(errno);
+	/* Use libbpf 1.0 API mode */
+	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
 
 	ifobj_tx = ifobject_create();
 	if (!ifobj_tx)
@@ -1491,12 +1649,17 @@ int main(int argc, char **argv)
 			test_spec_init(&test, ifobj_tx, ifobj_rx, i);
 			run_pkt_test(&test, i, j);
 			usleep(USLEEP_MAX);
+
+			if (test.fail)
+				failed_tests++;
 		}
 
 	pkt_stream_delete(pkt_stream_default);
 	ifobject_delete(ifobj_tx);
 	ifobject_delete(ifobj_rx);
 
-	ksft_exit_pass();
-	return 0;
+	if (failed_tests)
+		ksft_exit_fail();
+	else
+		ksft_exit_pass();
 }
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 62a3e6388632..8f672b0fe0e1 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -17,6 +17,16 @@
 #define PF_XDP AF_XDP
 #endif
 
+#ifndef SO_BUSY_POLL_BUDGET
+#define SO_BUSY_POLL_BUDGET 70
+#endif
+
+#ifndef SO_PREFER_BUSY_POLL
+#define SO_PREFER_BUSY_POLL 69
+#endif
+
+#define TEST_PASS 0
+#define TEST_FAILURE -1
 #define MAX_INTERFACES 2
 #define MAX_INTERFACE_NAME_CHARS 7
 #define MAX_INTERFACES_NAMESPACE_CHARS 10
@@ -25,9 +35,10 @@
 #define MAX_TEARDOWN_ITER 10
 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
 			sizeof(struct udphdr))
-#define MIN_PKT_SIZE 64
+#define MIN_ETH_PKT_SIZE 64
 #define ETH_FCS_SIZE 4
-#define PKT_SIZE (MIN_PKT_SIZE - ETH_FCS_SIZE)
+#define MIN_PKT_SIZE (MIN_ETH_PKT_SIZE - ETH_FCS_SIZE)
+#define PKT_SIZE (MIN_PKT_SIZE)
 #define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr))
 #define IP_PKT_VER 0x4
 #define IP_PKT_TOS 0x9
@@ -37,6 +48,7 @@
 #define SOCK_RECONF_CTR 10
 #define BATCH_SIZE 64
 #define POLL_TMOUT 1000
+#define RECV_TMOUT 3
 #define DEFAULT_PKT_CNT (4 * 1024)
 #define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
 #define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE)
@@ -64,24 +76,16 @@ enum test_type {
 	TEST_TYPE_HEADROOM,
 	TEST_TYPE_TEARDOWN,
 	TEST_TYPE_BIDI,
-	TEST_TYPE_STATS,
+	TEST_TYPE_STATS_RX_DROPPED,
+	TEST_TYPE_STATS_TX_INVALID_DESCS,
+	TEST_TYPE_STATS_RX_FULL,
+	TEST_TYPE_STATS_FILL_EMPTY,
 	TEST_TYPE_BPF_RES,
 	TEST_TYPE_MAX
 };
 
-enum stat_test_type {
-	STAT_TEST_RX_DROPPED,
-	STAT_TEST_TX_INVALID,
-	STAT_TEST_RX_FULL,
-	STAT_TEST_RX_FILL_EMPTY,
-	STAT_TEST_TYPE_MAX
-};
-
 static bool opt_pkt_dump;
-static int test_type;
-
 static bool opt_verbose;
-static int stat_test_type;
 
 struct xsk_umem_info {
 	struct xsk_ring_prod fq;
@@ -117,6 +121,8 @@ struct pkt_stream {
 	bool use_addr_for_fill;
 };
 
+struct ifobject;
+typedef int (*validation_func_t)(struct ifobject *ifobj);
 typedef void *(*thread_func_t)(void *arg);
 
 struct ifobject {
@@ -126,6 +132,7 @@ struct ifobject {
 	struct xsk_socket_info *xsk_arr;
 	struct xsk_umem_info *umem;
 	thread_func_t func_ptr;
+	validation_func_t validation_func;
 	struct pkt_stream *pkt_stream;
 	int ns_fd;
 	int xsk_map_fd;
@@ -138,7 +145,9 @@ struct ifobject {
 	bool tx_on;
 	bool rx_on;
 	bool use_poll;
-	bool pacing_on;
+	bool busy_poll;
+	bool use_fill_ring;
+	bool release_rx;
 	u8 dst_mac[ETH_ALEN];
 	u8 src_mac[ETH_ALEN];
 };
@@ -150,6 +159,7 @@ struct test_spec {
 	u16 total_steps;
 	u16 current_step;
 	u16 nb_sockets;
+	bool fail;
 	char name[MAX_TEST_NAME_SIZE];
 };
 
@@ -157,6 +167,6 @@ pthread_barrier_t barr;
 pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
 pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER;
 
-u32 pkts_in_flight;
+int pkts_in_flight;
 
 #endif				/* XDPXCEIVER_H */
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index bf29d2549bee..684e813803ec 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -8,7 +8,6 @@ ksft_xfail=2
 ksft_xpass=3
 ksft_skip=4
 
-SPECFILE=veth.spec
 XSKOBJ=xdpxceiver
 
 validate_root_exec()
@@ -16,7 +15,7 @@ validate_root_exec()
 	msg="skip all tests:"
 	if [ $UID != 0 ]; then
 		echo $msg must be run as root >&2
-		test_exit $ksft_fail 2
+		test_exit $ksft_fail
 	else
 		return $ksft_pass
 	fi
@@ -27,39 +26,31 @@ validate_veth_support()
 	msg="skip all tests:"
 	if [ $(ip link add $1 type veth 2>/dev/null; echo $?;) != 0 ]; then
 		echo $msg veth kernel support not available >&2
-		test_exit $ksft_skip 1
+		test_exit $ksft_skip
 	else
 		ip link del $1
 		return $ksft_pass
 	fi
 }
 
-validate_veth_spec_file()
-{
-	if [ ! -f ${SPECFILE} ]; then
-		test_exit $ksft_skip 1
-	fi
-}
-
 test_status()
 {
 	statusval=$1
-	if [ $statusval -eq 2 ]; then
-		echo -e "$2: [ FAIL ]"
-	elif [ $statusval -eq 1 ]; then
-		echo -e "$2: [ SKIPPED ]"
-	elif [ $statusval -eq 0 ]; then
-		echo -e "$2: [ PASS ]"
+	if [ $statusval -eq $ksft_fail ]; then
+		echo "$2: [ FAIL ]"
+	elif [ $statusval -eq $ksft_skip ]; then
+		echo "$2: [ SKIPPED ]"
+	elif [ $statusval -eq $ksft_pass ]; then
+		echo "$2: [ PASS ]"
 	fi
 }
 
 test_exit()
 {
-	retval=$1
-	if [ $2 -ne 0 ]; then
-		test_status $2 $(basename $0)
+	if [ $1 -ne 0 ]; then
+		test_status $1 $(basename $0)
 	fi
-	exit $retval
+	exit 1
 }
 
 clear_configs()
@@ -74,9 +65,6 @@ clear_configs()
 	#veth node inside NS won't get removed so we explicitly remove it
 	[ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] &&
 		{ ip link del $1; }
-	if [ -f ${SPECFILE} ]; then
-		rm -f ${SPECFILE}
-	fi
 }
 
 cleanup_exit()
@@ -86,10 +74,19 @@ cleanup_exit()
 
 validate_ip_utility()
 {
-	[ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip 1; }
+	[ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip; }
 }
 
 execxdpxceiver()
 {
-	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${VERBOSE_ARG} ${DUMP_PKTS_ARG}
+        if [[ $busy_poll -eq 1 ]]; then
+	        ARGS+="-b "
+	fi
+
+	./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${ARGS}
+
+	retval=$?
+	test_status $retval "${TEST_NAME}"
+	statusList+=($retval)
+	nameList+=(${TEST_NAME})
 }
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
new file mode 120000
index 000000000000..f5eb940c4c7c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_locked_port.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
new file mode 120000
index 000000000000..76492da525f7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_mdb.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
new file mode 120000
index 000000000000..81a7e0df0474
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_mld.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
new file mode 120000
index 000000000000..9831ed74376a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_vlan_aware.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
new file mode 120000
index 000000000000..7f3c3f0bf719
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_vlan_mcast.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
new file mode 120000
index 000000000000..bf1a57e6bde1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
@@ -0,0 +1 @@
+../../../net/forwarding/bridge_vlan_unaware.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/forwarding.config b/tools/testing/selftests/drivers/net/dsa/forwarding.config
new file mode 100644
index 000000000000..7adc1396fae0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/forwarding.config
@@ -0,0 +1,2 @@
+NETIF_CREATE=no
+STABLE_MAC_ADDRS=yes
diff --git a/tools/testing/selftests/drivers/net/dsa/lib.sh b/tools/testing/selftests/drivers/net/dsa/lib.sh
new file mode 120000
index 000000000000..39c96828c5ef
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/lib.sh
@@ -0,0 +1 @@
+../../../net/forwarding/lib.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/local_termination.sh b/tools/testing/selftests/drivers/net/dsa/local_termination.sh
new file mode 120000
index 000000000000..c08166f84501
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/local_termination.sh
@@ -0,0 +1 @@
+../../../net/forwarding/local_termination.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
new file mode 120000
index 000000000000..b9757466bc97
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
@@ -0,0 +1 @@
+../../../net/forwarding/no_forwarding.sh
+\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
new file mode 100755
index 000000000000..08a922d8b86a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
@@ -0,0 +1,280 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# In addition to the common variables, user might use:
+# LC_SLOT - If not set, all probed line cards are going to be tested,
+#	    with an exception of the "activation_16x100G_test".
+#	    It set, only the selected line card is going to be used
+#	    for tests, including "activation_16x100G_test".
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	unprovision_test
+	provision_test
+	activation_16x100G_test
+"
+
+NUM_NETIFS=0
+
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+until_lc_state_is()
+{
+	local state=$1; shift
+	local current=$("$@")
+
+	echo "$current"
+	[ "$current" == "$state" ]
+}
+
+until_lc_state_is_not()
+{
+	! until_lc_state_is "$@"
+}
+
+lc_state_get()
+{
+	local lc=$1
+
+	devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].state"
+}
+
+lc_wait_until_state_changes()
+{
+	local lc=$1
+	local state=$2
+	local timeout=$3 # ms
+
+	busywait "$timeout" until_lc_state_is_not "$state" lc_state_get "$lc"
+}
+
+lc_wait_until_state_becomes()
+{
+	local lc=$1
+	local state=$2
+	local timeout=$3 # ms
+
+	busywait "$timeout" until_lc_state_is "$state" lc_state_get "$lc"
+}
+
+until_lc_port_count_is()
+{
+	local port_count=$1; shift
+	local current=$("$@")
+
+	echo "$current"
+	[ $current == $port_count ]
+}
+
+lc_port_count_get()
+{
+	local lc=$1
+
+	devlink port -j | jq -e -r ".[][] | select(.lc==$lc) | .port" | wc -l
+}
+
+lc_wait_until_port_count_is()
+{
+	local lc=$1
+	local port_count=$2
+	local timeout=$3 # ms
+
+	busywait "$timeout" until_lc_port_count_is "$port_count" lc_port_count_get "$lc"
+}
+
+PROV_UNPROV_TIMEOUT=8000 # ms
+POST_PROV_ACT_TIMEOUT=2000 # ms
+PROV_PORTS_INSTANTIATION_TIMEOUT=15000 # ms
+
+unprovision_one()
+{
+	local lc=$1
+	local state
+
+	state=$(lc_state_get $lc)
+	check_err $? "Failed to get state of linecard $lc"
+	if [[ "$state" == "unprovisioned" ]]; then
+		return
+	fi
+
+	log_info "Unprovisioning linecard $lc"
+
+	devlink lc set $DEVLINK_DEV lc $lc notype
+	check_err $? "Failed to trigger linecard $lc unprovisioning"
+
+	state=$(lc_wait_until_state_changes $lc "unprovisioning" \
+		$PROV_UNPROV_TIMEOUT)
+	check_err $? "Failed to unprovision linecard $lc (timeout)"
+
+	[ "$state" == "unprovisioned" ]
+	check_err $? "Failed to unprovision linecard $lc (state=$state)"
+}
+
+provision_one()
+{
+	local lc=$1
+	local type=$2
+	local state
+
+	log_info "Provisioning linecard $lc"
+
+	devlink lc set $DEVLINK_DEV lc $lc type $type
+	check_err $? "Failed trigger linecard $lc provisioning"
+
+	state=$(lc_wait_until_state_changes $lc "provisioning" \
+		$PROV_UNPROV_TIMEOUT)
+	check_err $? "Failed to provision linecard $lc (timeout)"
+
+	[ "$state" == "provisioned" ] || [ "$state" == "active" ]
+	check_err $? "Failed to provision linecard $lc (state=$state)"
+
+	provisioned_type=$(devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].type")
+	[ "$provisioned_type" == "$type" ]
+	check_err $? "Wrong provision type returned for linecard $lc (got \"$provisioned_type\", expected \"$type\")"
+
+	# Wait for possible activation to make sure the state
+	# won't change after return from this function.
+	state=$(lc_wait_until_state_becomes $lc "active" \
+		$POST_PROV_ACT_TIMEOUT)
+}
+
+unprovision_test()
+{
+	RET=0
+	local lc
+
+	lc=$LC_SLOT
+	unprovision_one $lc
+	log_test "Unprovision"
+}
+
+LC_16X100G_TYPE="16x100G"
+LC_16X100G_PORT_COUNT=16
+
+supported_types_check()
+{
+	local lc=$1
+	local supported_types_count
+	local type_index
+	local lc_16x100_found=false
+
+	supported_types_count=$(devlink lc show $DEVLINK_DEV lc $lc -j | \
+				jq -e -r ".[][][].supported_types | length")
+	[ $supported_types_count != 0 ]
+	check_err $? "No supported types found for linecard $lc"
+	for (( type_index=0; type_index<$supported_types_count; type_index++ ))
+	do
+		type=$(devlink lc show $DEVLINK_DEV lc $lc -j | \
+		       jq -e -r ".[][][].supported_types[$type_index]")
+		if [[ "$type" == "$LC_16X100G_TYPE" ]]; then
+			lc_16x100_found=true
+			break
+		fi
+	done
+	[ $lc_16x100_found = true ]
+	check_err $? "16X100G not found between supported types of linecard $lc"
+}
+
+ports_check()
+{
+	local lc=$1
+	local expected_port_count=$2
+	local port_count
+
+	port_count=$(lc_wait_until_port_count_is $lc $expected_port_count \
+		$PROV_PORTS_INSTANTIATION_TIMEOUT)
+	[ $port_count != 0 ]
+	check_err $? "No port associated with linecard $lc"
+	[ $port_count == $expected_port_count ]
+	check_err $? "Unexpected port count linecard $lc (got $port_count, expected $expected_port_count)"
+}
+
+provision_test()
+{
+	RET=0
+	local lc
+	local type
+	local state
+
+	lc=$LC_SLOT
+	supported_types_check $lc
+	state=$(lc_state_get $lc)
+	check_err $? "Failed to get state of linecard $lc"
+	if [[ "$state" != "unprovisioned" ]]; then
+		unprovision_one $lc
+	fi
+	provision_one $lc $LC_16X100G_TYPE
+	ports_check $lc $LC_16X100G_PORT_COUNT
+	log_test "Provision"
+}
+
+ACTIVATION_TIMEOUT=20000 # ms
+
+interface_check()
+{
+	ip link set $h1 up
+	ip link set $h2 up
+	ifaces_upped=true
+	setup_wait
+}
+
+activation_16x100G_test()
+{
+	RET=0
+	local lc
+	local type
+	local state
+
+	lc=$LC_SLOT
+	type=$LC_16X100G_TYPE
+
+	unprovision_one $lc
+	provision_one $lc $type
+	state=$(lc_wait_until_state_becomes $lc "active" \
+		$ACTIVATION_TIMEOUT)
+	check_err $? "Failed to get linecard $lc activated (timeout)"
+
+	interface_check
+
+	log_test "Activation 16x100G"
+}
+
+setup_prepare()
+{
+	local lc_num=$(devlink lc show -j | jq -e -r ".[][\"$DEVLINK_DEV\"] |length")
+	if [[ $? -ne 0 ]] || [[ $lc_num -eq 0 ]]; then
+		echo "SKIP: No linecard support found"
+		exit $ksft_skip
+	fi
+
+	if [ -z "$LC_SLOT" ]; then
+		echo "SKIP: \"LC_SLOT\" variable not provided"
+		exit $ksft_skip
+	fi
+
+	# Interfaces are not present during the script start,
+	# that's why we define NUM_NETIFS here so dummy
+	# implicit veth pairs are not created.
+	NUM_NETIFS=2
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+	ifaces_upped=false
+}
+
+cleanup()
+{
+	if [ "$ifaces_upped" = true ] ; then
+		ip link set $h1 down
+		ip link set $h2 down
+	fi
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh
new file mode 100755
index 000000000000..82a47b903f92
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh
@@ -0,0 +1,480 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test sends 1Gbps of traffic through the switch, into which it then
+# injects a burst of traffic and tests that there are no drops.
+#
+# The 1Gbps stream is created by sending >1Gbps stream from H1. This stream
+# ingresses through $swp1, and is forwarded thtrough a small temporary pool to a
+# 1Gbps $swp3.
+#
+# Thus a 1Gbps stream enters $swp4, and is forwarded through a large pool to
+# $swp2, and eventually to H2. Since $swp2 is a 1Gbps port as well, no backlog
+# is generated.
+#
+# At this point, a burst of traffic is forwarded from H3. This enters $swp5, is
+# forwarded to $swp2, which is fully subscribed by the 1Gbps stream. The
+# expectation is that the burst is wholly absorbed by the large pool and no
+# drops are caused. After the burst, there should be a backlog that is hard to
+# get rid of, because $sw2 is fully subscribed. But because each individual
+# packet is scheduled soon after getting enqueued, SLL and HLL do not impact the
+# test.
+#
+# +-----------------------+                           +-----------------------+
+# | H1                    |			      | H3                    |
+# |   + $h1.111           |			      |          $h3.111 +    |
+# |   | 192.0.2.33/28     |			      |    192.0.2.35/28 |    |
+# |   |                   |			      |                  |    |
+# |   + $h1               |			      |              $h3 +    |
+# +---|-------------------+  +--------------------+   +------------------|----+
+#     |                      |                    |       		 |
+# +---|----------------------|--------------------|----------------------|----+
+# |   + $swp1          $swp3 +                    + $swp4          $swp5 |    |
+# |   | iPOOL1        iPOOL0 |                    | iPOOL2        iPOOL2 |    |
+# |   | ePOOL4        ePOOL5 |                    | ePOOL4        ePOOL4 |    |
+# |   |                1Gbps |                    | 1Gbps                |    |
+# | +-|----------------------|-+                +-|----------------------|-+  |
+# | | + $swp1.111  $swp3.111 + |                | + $swp4.111  $swp5.111 + |  |
+# | |                          |                |                          |  |
+# | | BR1                      |                | BR2                      |  |
+# | |                          |                |                          |  |
+# | |                          |                |         + $swp2.111      |  |
+# | +--------------------------+                +---------|----------------+  |
+# |                                                       |                   |
+# | iPOOL0: 500KB dynamic                                 |                   |
+# | iPOOL1: 500KB dynamic                                 |                   |
+# | iPOOL2: 10MB dynamic                                  + $swp2             |
+# | ePOOL4: 500KB dynamic                                 | iPOOL0            |
+# | ePOOL5: 500KB dnamic                                  | ePOOL6            |
+# | ePOOL6: 10MB dynamic                                  | 1Gbps             |
+# +-------------------------------------------------------|-------------------+
+#                                                         |
+#                                                     +---|-------------------+
+#                                                     |   + $h2            H2 |
+#                                                     |   | 1Gbps             |
+#                                                     |   |                   |
+#                                                     |   + $h2.111           |
+#                                                     |     192.0.2.34/28     |
+#                                                     +-----------------------+
+#
+# iPOOL0+ePOOL4 are helper pools for control traffic etc.
+# iPOOL1+ePOOL5 are helper pools for modeling the 1Gbps stream
+# iPOOL2+ePOOL6 are pools for soaking the burst traffic
+
+ALL_TESTS="
+	ping_ipv4
+	test_8K
+	test_800
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=8
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+source mlxsw_lib.sh
+
+_1KB=1000
+_500KB=$((500 * _1KB))
+_1MB=$((1000 * _1KB))
+
+# The failure mode that this specifically tests is exhaustion of descriptor
+# buffer. The point is to produce a burst that shared buffer should be able
+# to accommodate, but produce it with small enough packets that the machine
+# runs out of the descriptor buffer space with default configuration.
+#
+# The machine therefore needs to be able to produce line rate with as small
+# packets as possible, and at the same time have large enough buffer that
+# when filled with these small packets, it runs out of descriptors.
+# Spectrum-2 is very close, but cannot perform this test. Therefore use
+# Spectrum-3 as a minimum, and permit larger burst size, and therefore
+# larger packets, to reduce spurious failures.
+#
+mlxsw_only_on_spectrum 3+ || exit
+
+BURST_SIZE=$((50000000))
+POOL_SIZE=$BURST_SIZE
+
+h1_create()
+{
+	simple_if_init $h1
+	mtu_set $h1 10000
+
+	vlan_create $h1 111 v$h1 192.0.2.33/28
+	ip link set dev $h1.111 type vlan egress-qos-map 0:1
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 111
+
+	mtu_restore $h1
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	mtu_set $h2 10000
+	ethtool -s $h2 speed 1000 autoneg off
+
+	vlan_create $h2 111 v$h2 192.0.2.34/28
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 111
+
+	ethtool -s $h2 autoneg on
+	mtu_restore $h2
+	simple_if_fini $h2
+}
+
+h3_create()
+{
+	simple_if_init $h3
+	mtu_set $h3 10000
+
+	vlan_create $h3 111 v$h3 192.0.2.35/28
+}
+
+h3_destroy()
+{
+	vlan_destroy $h3 111
+
+	mtu_restore $h3
+	simple_if_fini $h3
+}
+
+switch_create()
+{
+	# pools
+	# -----
+
+	devlink_pool_size_thtype_save 0
+	devlink_pool_size_thtype_save 4
+	devlink_pool_size_thtype_save 1
+	devlink_pool_size_thtype_save 5
+	devlink_pool_size_thtype_save 2
+	devlink_pool_size_thtype_save 6
+
+	devlink_port_pool_th_save $swp1 1
+	devlink_port_pool_th_save $swp2 6
+	devlink_port_pool_th_save $swp3 5
+	devlink_port_pool_th_save $swp4 2
+	devlink_port_pool_th_save $swp5 2
+
+	devlink_tc_bind_pool_th_save $swp1 1 ingress
+	devlink_tc_bind_pool_th_save $swp2 1 egress
+	devlink_tc_bind_pool_th_save $swp3 1 egress
+	devlink_tc_bind_pool_th_save $swp4 1 ingress
+	devlink_tc_bind_pool_th_save $swp5 1 ingress
+
+	# Control traffic pools. Just reduce the size.
+	devlink_pool_size_thtype_set 0 dynamic $_500KB
+	devlink_pool_size_thtype_set 4 dynamic $_500KB
+
+	# Stream modeling pools.
+	devlink_pool_size_thtype_set 1 dynamic $_500KB
+	devlink_pool_size_thtype_set 5 dynamic $_500KB
+
+	# Burst soak pools.
+	devlink_pool_size_thtype_set 2 static $POOL_SIZE
+	devlink_pool_size_thtype_set 6 static $POOL_SIZE
+
+	# $swp1
+	# -----
+
+	ip link set dev $swp1 up
+	mtu_set $swp1 10000
+	vlan_create $swp1 111
+	ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp1 1 16
+	devlink_tc_bind_pool_th_set $swp1 1 ingress 1 16
+
+	# Configure qdisc...
+	tc qdisc replace dev $swp1 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	# ... so that we can assign prio1 traffic to PG1.
+	dcb buffer set dev $swp1 prio-buffer all:0 1:1
+
+	# $swp2
+	# -----
+
+	ip link set dev $swp2 up
+	mtu_set $swp2 10000
+	ethtool -s $swp2 speed 1000 autoneg off
+	vlan_create $swp2 111
+	ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp2 6 $POOL_SIZE
+	devlink_tc_bind_pool_th_set $swp2 1 egress 6 $POOL_SIZE
+
+	# prio 0->TC0 (band 7), 1->TC1 (band 6)
+	tc qdisc replace dev $swp2 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+
+	# $swp3
+	# -----
+
+	ip link set dev $swp3 up
+	mtu_set $swp3 10000
+	ethtool -s $swp3 speed 1000 autoneg off
+	vlan_create $swp3 111
+	ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp3 5 16
+	devlink_tc_bind_pool_th_set $swp3 1 egress 5 16
+
+	# prio 0->TC0 (band 7), 1->TC1 (band 6)
+	tc qdisc replace dev $swp3 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+
+	# $swp4
+	# -----
+
+	ip link set dev $swp4 up
+	mtu_set $swp4 10000
+	ethtool -s $swp4 speed 1000 autoneg off
+	vlan_create $swp4 111
+	ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp4 2 $POOL_SIZE
+	devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $POOL_SIZE
+
+	# Configure qdisc...
+	tc qdisc replace dev $swp4 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	# ... so that we can assign prio1 traffic to PG1.
+	dcb buffer set dev $swp4 prio-buffer all:0 1:1
+
+	# $swp5
+	# -----
+
+	ip link set dev $swp5 up
+	mtu_set $swp5 10000
+	vlan_create $swp5 111
+	ip link set dev $swp5.111 type vlan ingress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp5 2 $POOL_SIZE
+	devlink_tc_bind_pool_th_set $swp5 1 ingress 2 $POOL_SIZE
+
+	# Configure qdisc...
+	tc qdisc replace dev $swp5 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	# ... so that we can assign prio1 traffic to PG1.
+	dcb buffer set dev $swp5 prio-buffer all:0 1:1
+
+	# bridges
+	# -------
+
+	ip link add name br1 type bridge vlan_filtering 0
+	ip link set dev $swp1.111 master br1
+	ip link set dev $swp3.111 master br1
+	ip link set dev br1 up
+
+	ip link add name br2 type bridge vlan_filtering 0
+	ip link set dev $swp2.111 master br2
+	ip link set dev $swp4.111 master br2
+	ip link set dev $swp5.111 master br2
+	ip link set dev br2 up
+}
+
+switch_destroy()
+{
+	# Do this first so that we can reset the limits to values that are only
+	# valid for the original static / dynamic setting.
+	devlink_pool_size_thtype_restore 6
+	devlink_pool_size_thtype_restore 5
+	devlink_pool_size_thtype_restore 4
+	devlink_pool_size_thtype_restore 2
+	devlink_pool_size_thtype_restore 1
+	devlink_pool_size_thtype_restore 0
+
+	# bridges
+	# -------
+
+	ip link set dev br2 down
+	ip link set dev $swp5.111 nomaster
+	ip link set dev $swp4.111 nomaster
+	ip link set dev $swp2.111 nomaster
+	ip link del dev br2
+
+	ip link set dev br1 down
+	ip link set dev $swp3.111 nomaster
+	ip link set dev $swp1.111 nomaster
+	ip link del dev br1
+
+	# $swp5
+	# -----
+
+	dcb buffer set dev $swp5 prio-buffer all:0
+	tc qdisc del dev $swp5 root
+
+	devlink_tc_bind_pool_th_restore $swp5 1 ingress
+	devlink_port_pool_th_restore $swp5 2
+
+	vlan_destroy $swp5 111
+	mtu_restore $swp5
+	ip link set dev $swp5 down
+
+	# $swp4
+	# -----
+
+	dcb buffer set dev $swp4 prio-buffer all:0
+	tc qdisc del dev $swp4 root
+
+	devlink_tc_bind_pool_th_restore $swp4 1 ingress
+	devlink_port_pool_th_restore $swp4 2
+
+	vlan_destroy $swp4 111
+	ethtool -s $swp4 autoneg on
+	mtu_restore $swp4
+	ip link set dev $swp4 down
+
+	# $swp3
+	# -----
+
+	tc qdisc del dev $swp3 root
+
+	devlink_tc_bind_pool_th_restore $swp3 1 egress
+	devlink_port_pool_th_restore $swp3 5
+
+	vlan_destroy $swp3 111
+	ethtool -s $swp3 autoneg on
+	mtu_restore $swp3
+	ip link set dev $swp3 down
+
+	# $swp2
+	# -----
+
+	tc qdisc del dev $swp2 root
+
+	devlink_tc_bind_pool_th_restore $swp2 1 egress
+	devlink_port_pool_th_restore $swp2 6
+
+	vlan_destroy $swp2 111
+	ethtool -s $swp2 autoneg on
+	mtu_restore $swp2
+	ip link set dev $swp2 down
+
+	# $swp1
+	# -----
+
+	dcb buffer set dev $swp1 prio-buffer all:0
+	tc qdisc del dev $swp1 root
+
+	devlink_tc_bind_pool_th_restore $swp1 1 ingress
+	devlink_port_pool_th_restore $swp1 1
+
+	vlan_destroy $swp1 111
+	mtu_restore $swp1
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	swp4=${NETIFS[p6]}
+
+	swp5=${NETIFS[p7]}
+	h3=${NETIFS[p8]}
+
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.34 " h1->h2"
+	ping_test $h3 192.0.2.34 " h3->h2"
+}
+
+__test_qos_burst()
+{
+	local pktsize=$1; shift
+
+	RET=0
+
+	start_traffic_pktsize $pktsize $h1.111 192.0.2.33 192.0.2.34 $h2mac
+	sleep 1
+
+	local q0=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1)
+	((q0 == 0))
+	check_err $? "Transmit queue non-zero?"
+
+	local d0=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1)
+
+	local cell_size=$(devlink_cell_size_get)
+	local cells=$((BURST_SIZE / cell_size))
+	# Each packet is $pktsize of payload + headers.
+	local pkt_cells=$(((pktsize + 50 + cell_size - 1)  / cell_size))
+	# How many packets can we admit:
+	local pkts=$((cells / pkt_cells))
+
+	$MZ $h3 -p $pktsize -Q 1:111 -A 192.0.2.35 -B 192.0.2.34 \
+		-a own -b $h2mac -c $pkts -t udp -q
+	sleep 1
+
+	local d1=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1)
+	((d1 == d0))
+	check_err $? "Drops seen on egress port: $d0 -> $d1 ($((d1 - d0)))"
+
+	# Check that the queue is somewhat close to the burst size This
+	# makes sure that the lack of drops above was not due to port
+	# undersubscribtion.
+	local q0=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1)
+	local qe=$((90 * BURST_SIZE / 100))
+	((q0 > qe))
+	check_err $? "Queue size expected >$qe, got $q0"
+
+	stop_traffic
+	sleep 2
+
+	log_test "Burst: absorb $pkts ${pktsize}-B packets"
+}
+
+test_8K()
+{
+	__test_qos_burst 8000
+}
+
+test_800()
+{
+	__test_qos_burst 800
+}
+
+bail_on_lldpad
+
+trap cleanup EXIT
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
index f4493ef9cca1..3569ff45f7d5 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
@@ -371,9 +371,9 @@ test_tc_int_buf()
 	tc qdisc delete dev $swp root
 }
 
-trap cleanup EXIT
-
 bail_on_lldpad
+
+trap cleanup EXIT
 setup_wait
 tests_run
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
index 5d5622fc2758..f9858e221996 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
@@ -393,9 +393,9 @@ test_qos_pfc()
 	log_test "PFC"
 }
 
-trap cleanup EXIT
-
 bail_on_lldpad
+
+trap cleanup EXIT
 setup_prepare
 setup_wait
 tests_run
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
index 1e5ad3209436..7a73057206cd 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -166,12 +166,11 @@ ecn_mirror_test()
 	uninstall_qdisc
 }
 
-trap cleanup EXIT
+bail_on_lldpad
 
+trap cleanup EXIT
 setup_prepare
 setup_wait
-
-bail_on_lldpad
 tests_run
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
index d79a82f317d2..501d192529ac 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
@@ -73,12 +73,11 @@ red_mirror_test()
 	uninstall_qdisc
 }
 
-trap cleanup EXIT
+bail_on_lldpad
 
+trap cleanup EXIT
 setup_prepare
 setup_wait
-
-bail_on_lldpad
 tests_run
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh
index fe1898402987..cba5ac08426b 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh
@@ -319,11 +319,11 @@ counter_test()
 	((pkts < 10))
 	check_err $? "$type stats show >= 10 packets after first enablement"
 
-	sleep 2
+	sleep 2.5
 
 	local pkts=$(get_hwstat dummy1 l3 rx.packets)
 	((pkts >= 20))
-	check_err $? "$type stats show < 20 packets after 2s passed"
+	check_err $? "$type stats show < 20 packets after 2.5s passed"
 
 	$IP stats set dev dummy1 ${type}_stats off
 
diff --git a/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh b/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh
new file mode 100755
index 000000000000..c51c83421c61
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh
@@ -0,0 +1,253 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2022 NXP
+
+# The script is mostly generic, with the exception of the
+# ethtool per-TC counter names ("rx_green_prio_${tc}")
+
+WAIT_TIME=1
+NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
+NETIF_CREATE=no
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+require_command dcb
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+h1_create()
+{
+	simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h1_vlan_create()
+{
+	local vid=$1
+
+	vlan_create $h1 $vid
+	simple_if_init $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+	ip link set $h1.$vid type vlan \
+		egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 \
+		ingress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+}
+
+h1_vlan_destroy()
+{
+	local vid=$1
+
+	simple_if_fini $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+	vlan_destroy $h1 $vid
+}
+
+h2_vlan_create()
+{
+	local vid=$1
+
+	vlan_create $h2 $vid
+	simple_if_init $h2.$vid $H2_IPV4/24 $H2_IPV6/64
+	ip link set $h2.$vid type vlan \
+		egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 \
+		ingress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+}
+
+h2_vlan_destroy()
+{
+	local vid=$1
+
+	simple_if_fini $h2.$vid $H2_IPV4/24 $H2_IPV6/64
+	vlan_destroy $h2 $vid
+}
+
+vlans_prepare()
+{
+	h1_vlan_create 100
+	h2_vlan_create 100
+
+	tc qdisc add dev ${h1}.100 clsact
+	tc filter add dev ${h1}.100 egress protocol ipv4 \
+		flower ip_proto icmp action skbedit priority 3
+	tc filter add dev ${h1}.100 egress protocol ipv6 \
+		flower ip_proto icmpv6 action skbedit priority 3
+}
+
+vlans_destroy()
+{
+	tc qdisc del dev ${h1}.100 clsact
+
+	h1_vlan_destroy 100
+	h2_vlan_destroy 100
+}
+
+switch_create()
+{
+	ip link set ${swp1} up
+	ip link set ${swp2} up
+
+	# Ports should trust VLAN PCP even with vlan_filtering=0
+	ip link add br0 type bridge
+	ip link set ${swp1} master br0
+	ip link set ${swp2} master br0
+	ip link set br0 up
+}
+
+switch_destroy()
+{
+	ip link del br0
+}
+
+setup_prepare()
+{
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+	switch_destroy
+
+	vrf_cleanup
+}
+
+dscp_cs_to_tos()
+{
+	local dscp_cs=$1
+
+	# https://datatracker.ietf.org/doc/html/rfc2474
+	# 4.2.2.1  The Class Selector Codepoints
+	echo $((${dscp_cs} << 5))
+}
+
+run_test()
+{
+	local test_name=$1; shift
+	local if_name=$1; shift
+	local tc=$1; shift
+	local tos=$1; shift
+	local counter_name="rx_green_prio_${tc}"
+	local ipv4_before
+	local ipv4_after
+	local ipv6_before
+	local ipv6_after
+
+	ipv4_before=$(ethtool_stats_get ${swp1} "${counter_name}")
+	ping_do ${if_name} $H2_IPV4 "-Q ${tos}"
+	ipv4_after=$(ethtool_stats_get ${swp1} "${counter_name}")
+
+	if [ $((${ipv4_after} - ${ipv4_before})) -lt ${PING_COUNT} ]; then
+		RET=1
+	else
+		RET=0
+	fi
+	log_test "IPv4 ${test_name}"
+
+	ipv6_before=$(ethtool_stats_get ${swp1} "${counter_name}")
+	ping_do ${if_name} $H2_IPV6 "-Q ${tos}"
+	ipv6_after=$(ethtool_stats_get ${swp1} "${counter_name}")
+
+	if [ $((${ipv6_after} - ${ipv6_before})) -lt ${PING_COUNT} ]; then
+		RET=1
+	else
+		RET=0
+	fi
+	log_test "IPv6 ${test_name}"
+}
+
+port_default_prio_get()
+{
+	local if_name=$1
+	local prio
+
+	prio="$(dcb -j app show dev ${if_name} default-prio | \
+		jq '.default_prio[]')"
+	if [ -z "${prio}" ]; then
+		prio=0
+	fi
+
+	echo ${prio}
+}
+
+test_port_default()
+{
+	local orig=$(port_default_prio_get ${swp1})
+	local dmac=$(mac_get ${h2})
+
+	dcb app replace dev ${swp1} default-prio 5
+
+	run_test "Port-default QoS classification" ${h1} 5 0
+
+	dcb app replace dev ${swp1} default-prio ${orig}
+}
+
+test_vlan_pcp()
+{
+	vlans_prepare
+
+	run_test "Trusted VLAN PCP QoS classification" ${h1}.100 3 0
+
+	vlans_destroy
+}
+
+test_ip_dscp()
+{
+	local port_default=$(port_default_prio_get ${swp1})
+	local tos=$(dscp_cs_to_tos 4)
+
+	dcb app add dev ${swp1} dscp-prio CS4:4
+	run_test "Trusted DSCP QoS classification" ${h1} 4 ${tos}
+	dcb app del dev ${swp1} dscp-prio CS4:4
+
+	vlans_prepare
+	run_test "Untrusted DSCP QoS classification follows VLAN PCP" \
+		${h1}.100 3 ${tos}
+	vlans_destroy
+
+	run_test "Untrusted DSCP QoS classification follows port default" \
+		${h1} ${port_default} ${tos}
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+	test_port_default
+	test_vlan_pcp
+	test_ip_dscp
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
new file mode 100755
index 000000000000..5a5cee92c665
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
@@ -0,0 +1,327 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2021-2022 NXP
+
+# Note: On LS1028A, in lack of enough user ports, this setup requires patching
+# the device tree to use the second CPU port as a user port
+
+WAIT_TIME=1
+NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
+NETIF_CREATE=no
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/tsn_lib.sh
+
+UDS_ADDRESS_H1="/var/run/ptp4l_h1"
+UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1"
+
+# Tunables
+NUM_PKTS=1000
+STREAM_VID=100
+STREAM_PRIO=6
+# Use a conservative cycle of 10 ms to allow the test to still pass when the
+# kernel has some extra overhead like lockdep etc
+CYCLE_TIME_NS=10000000
+# Create two Gate Control List entries, one OPEN and one CLOSE, of equal
+# durations
+GATE_DURATION_NS=$((${CYCLE_TIME_NS} / 2))
+# Give 2/3 of the cycle time to user space and 1/3 to the kernel
+FUDGE_FACTOR=$((${CYCLE_TIME_NS} / 3))
+# Shift the isochron base time by half the gate time, so that packets are
+# always received by swp1 close to the middle of the time slot, to minimize
+# inaccuracies due to network sync
+SHIFT_TIME_NS=$((${GATE_DURATION_NS} / 2))
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+# Chain number exported by the ocelot driver for
+# Per-Stream Filtering and Policing filters
+PSFP()
+{
+	echo 30000
+}
+
+psfp_chain_create()
+{
+	local if_name=$1
+
+	tc qdisc add dev $if_name clsact
+
+	tc filter add dev $if_name ingress chain 0 pref 49152 flower \
+		skip_sw action goto chain $(PSFP)
+}
+
+psfp_chain_destroy()
+{
+	local if_name=$1
+
+	tc qdisc del dev $if_name clsact
+}
+
+psfp_filter_check()
+{
+	local expected=$1
+	local packets=""
+	local drops=""
+	local stats=""
+
+	stats=$(tc -j -s filter show dev ${swp1} ingress chain $(PSFP) pref 1)
+	packets=$(echo ${stats} | jq ".[1].options.actions[].stats.packets")
+	drops=$(echo ${stats} | jq ".[1].options.actions[].stats.drops")
+
+	if ! [ "${packets}" = "${expected}" ]; then
+		printf "Expected filter to match on %d packets but matched on %d instead\n" \
+			"${expected}" "${packets}"
+	fi
+
+	echo "Hardware filter reports ${drops} drops"
+}
+
+h1_create()
+{
+	simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+switch_create()
+{
+	local h2_mac_addr=$(mac_get $h2)
+
+	ip link set ${swp1} up
+	ip link set ${swp2} up
+
+	ip link add br0 type bridge vlan_filtering 1
+	ip link set ${swp1} master br0
+	ip link set ${swp2} master br0
+	ip link set br0 up
+
+	bridge vlan add dev ${swp2} vid ${STREAM_VID}
+	bridge vlan add dev ${swp1} vid ${STREAM_VID}
+	# PSFP on Ocelot requires the filter to also be added to the bridge
+	# FDB, and not be removed
+	bridge fdb add dev ${swp2} \
+		${h2_mac_addr} vlan ${STREAM_VID} static master
+
+	psfp_chain_create ${swp1}
+
+	tc filter add dev ${swp1} ingress chain $(PSFP) pref 1 \
+		protocol 802.1Q flower skip_sw \
+		dst_mac ${h2_mac_addr} vlan_id ${STREAM_VID} \
+		action gate base-time 0.000000000 \
+		sched-entry OPEN  ${GATE_DURATION_NS} -1 -1 \
+		sched-entry CLOSE ${GATE_DURATION_NS} -1 -1
+}
+
+switch_destroy()
+{
+	psfp_chain_destroy ${swp1}
+	ip link del br0
+}
+
+txtime_setup()
+{
+	local if_name=$1
+
+	tc qdisc add dev ${if_name} clsact
+	# Classify PTP on TC 7 and isochron on TC 6
+	tc filter add dev ${if_name} egress protocol 0x88f7 \
+		flower action skbedit priority 7
+	tc filter add dev ${if_name} egress protocol 802.1Q \
+		flower vlan_ethtype 0xdead action skbedit priority 6
+	tc qdisc add dev ${if_name} handle 100: parent root mqprio num_tc 8 \
+		queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+		map 0 1 2 3 4 5 6 7 \
+		hw 1
+	# Set up TC 6 for SO_TXTIME. tc-mqprio queues count from 1.
+	tc qdisc replace dev ${if_name} parent 100:$((${STREAM_PRIO} + 1)) etf \
+		clockid CLOCK_TAI offload delta ${FUDGE_FACTOR}
+}
+
+txtime_cleanup()
+{
+	local if_name=$1
+
+	tc qdisc del dev ${if_name} root
+	tc qdisc del dev ${if_name} clsact
+}
+
+setup_prepare()
+{
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+
+	txtime_setup ${h1}
+
+	# Set up swp1 as a master PHC for h1, synchronized to the local
+	# CLOCK_REALTIME.
+	phc2sys_start ${swp1} ${UDS_ADDRESS_SWP1}
+
+	# Assumption true for LS1028A: h1 and h2 use the same PHC. So by
+	# synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized
+	# to swp1 (and both to CLOCK_REALTIME).
+	ptp4l_start ${h1} true ${UDS_ADDRESS_H1}
+	ptp4l_start ${swp1} false ${UDS_ADDRESS_SWP1}
+
+	# Make sure there are no filter matches at the beginning of the test
+	psfp_filter_check 0
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ptp4l_stop ${swp1}
+	ptp4l_stop ${h1}
+	phc2sys_stop
+	isochron_recv_stop
+
+	txtime_cleanup ${h1}
+
+	h2_destroy
+	h1_destroy
+	switch_destroy
+
+	vrf_cleanup
+}
+
+debug_incorrectly_dropped_packets()
+{
+	local isochron_dat=$1
+	local dropped_seqids
+	local seqid
+
+	echo "Packets incorrectly dropped:"
+
+	dropped_seqids=$(isochron report \
+		--input-file "${isochron_dat}" \
+		--printf-format "%u RX hw %T\n" \
+		--printf-args "qR" | \
+		grep 'RX hw 0.000000000' | \
+		awk '{print $1}')
+
+	for seqid in ${dropped_seqids}; do
+		isochron report \
+			--input-file "${isochron_dat}" \
+			--start ${seqid} --stop ${seqid} \
+			--printf-format "seqid %u scheduled for %T, HW TX timestamp %T\n" \
+			--printf-args "qST"
+	done
+}
+
+debug_incorrectly_received_packets()
+{
+	local isochron_dat=$1
+
+	echo "Packets incorrectly received:"
+
+	isochron report \
+		--input-file "${isochron_dat}" \
+		--printf-format "seqid %u scheduled for %T, HW TX timestamp %T, HW RX timestamp %T\n" \
+		--printf-args "qSTR" |
+		grep -v 'HW RX timestamp 0.000000000'
+}
+
+run_test()
+{
+	local base_time=$1
+	local expected=$2
+	local test_name=$3
+	local debug=$4
+	local isochron_dat="$(mktemp)"
+	local extra_args=""
+	local received
+
+	isochron_do \
+		"${h1}" \
+		"${h2}" \
+		"${UDS_ADDRESS_H1}" \
+		"" \
+		"${base_time}" \
+		"${CYCLE_TIME_NS}" \
+		"${SHIFT_TIME_NS}" \
+		"${NUM_PKTS}" \
+		"${STREAM_VID}" \
+		"${STREAM_PRIO}" \
+		"" \
+		"${isochron_dat}"
+
+	# Count all received packets by looking at the non-zero RX timestamps
+	received=$(isochron report \
+		--input-file "${isochron_dat}" \
+		--printf-format "%u\n" --printf-args "R" | \
+		grep -w -v '0' | wc -l)
+
+	if [ "${received}" = "${expected}" ]; then
+		RET=0
+	else
+		RET=1
+		echo "Expected isochron to receive ${expected} packets but received ${received}"
+	fi
+
+	log_test "${test_name}"
+
+	if [ "$RET" = "1" ]; then
+		${debug} "${isochron_dat}"
+	fi
+
+	rm ${isochron_dat} 2> /dev/null
+}
+
+test_gate_in_band()
+{
+	# Send packets in-band with the OPEN gate entry
+	run_test 0.000000000 ${NUM_PKTS} "In band" \
+		debug_incorrectly_dropped_packets
+
+	psfp_filter_check ${NUM_PKTS}
+}
+
+test_gate_out_of_band()
+{
+	# Send packets in-band with the CLOSE gate entry
+	run_test 0.005000000 0 "Out of band" \
+		debug_incorrectly_received_packets
+
+	psfp_filter_check $((2 * ${NUM_PKTS}))
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+	test_gate_in_band
+	test_gate_out_of_band
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index 10e54bcca7a9..9c79bbcce5a8 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -4,35 +4,17 @@
 
 WAIT_TIME=1
 NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
 lib_dir=$(dirname $0)/../../../net/forwarding
 source $lib_dir/tc_common.sh
 source $lib_dir/lib.sh
 
 require_command tcpdump
 
-#
-#   +---------------------------------------------+
-#   |       DUT ports         Generator ports     |
-#   | +--------+ +--------+ +--------+ +--------+ |
-#   | |        | |        | |        | |        | |
-#   | |  eth0  | |  eth1  | |  eth2  | |  eth3  | |
-#   | |        | |        | |        | |        | |
-#   +-+--------+-+--------+-+--------+-+--------+-+
-#          |         |           |          |
-#          |         |           |          |
-#          |         +-----------+          |
-#          |                                |
-#          +--------------------------------+
-
-eth0=${NETIFS[p1]}
-eth1=${NETIFS[p2]}
-eth2=${NETIFS[p3]}
-eth3=${NETIFS[p4]}
-
-eth0_mac="de:ad:be:ef:00:00"
-eth1_mac="de:ad:be:ef:00:01"
-eth2_mac="de:ad:be:ef:00:02"
-eth3_mac="de:ad:be:ef:00:03"
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
 
 # Helpers to map a VCAP IS1 and VCAP IS2 lookup and policy to a chain number
 # used by the kernel driver. The numbers are:
@@ -156,39 +138,39 @@ create_tcam_skeleton()
 
 setup_prepare()
 {
-	ip link set $eth0 up
-	ip link set $eth1 up
-	ip link set $eth2 up
-	ip link set $eth3 up
+	ip link set $swp1 up
+	ip link set $swp2 up
+	ip link set $h2 up
+	ip link set $h1 up
 
-	create_tcam_skeleton $eth0
+	create_tcam_skeleton $swp1
 
 	ip link add br0 type bridge
-	ip link set $eth0 master br0
-	ip link set $eth1 master br0
+	ip link set $swp1 master br0
+	ip link set $swp2 master br0
 	ip link set br0 up
 
-	ip link add link $eth3 name $eth3.100 type vlan id 100
-	ip link set $eth3.100 up
+	ip link add link $h1 name $h1.100 type vlan id 100
+	ip link set $h1.100 up
 
-	ip link add link $eth3 name $eth3.200 type vlan id 200
-	ip link set $eth3.200 up
+	ip link add link $h1 name $h1.200 type vlan id 200
+	ip link set $h1.200 up
 
-	tc filter add dev $eth0 ingress chain $(IS1 1) pref 1 \
+	tc filter add dev $swp1 ingress chain $(IS1 1) pref 1 \
 		protocol 802.1Q flower skip_sw vlan_id 100 \
 		action vlan pop \
 		action goto chain $(IS1 2)
 
-	tc filter add dev $eth0 egress chain $(ES0) pref 1 \
-		flower skip_sw indev $eth1 \
+	tc filter add dev $swp1 egress chain $(ES0) pref 1 \
+		flower skip_sw indev $swp2 \
 		action vlan push protocol 802.1Q id 100
 
-	tc filter add dev $eth0 ingress chain $(IS1 0) pref 2 \
+	tc filter add dev $swp1 ingress chain $(IS1 0) pref 2 \
 		protocol ipv4 flower skip_sw src_ip 10.1.1.2 \
 		action skbedit priority 7 \
 		action goto chain $(IS1 1)
 
-	tc filter add dev $eth0 ingress chain $(IS2 0 0) pref 1 \
+	tc filter add dev $swp1 ingress chain $(IS2 0 0) pref 1 \
 		protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \
 		action police rate 50mbit burst 64k conform-exceed drop/pipe \
 		action goto chain $(IS2 1 0)
@@ -196,150 +178,160 @@ setup_prepare()
 
 cleanup()
 {
-	ip link del $eth3.200
-	ip link del $eth3.100
-	tc qdisc del dev $eth0 clsact
+	ip link del $h1.200
+	ip link del $h1.100
+	tc qdisc del dev $swp1 clsact
 	ip link del br0
 }
 
 test_vlan_pop()
 {
-	printf "Testing VLAN pop..			"
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
+
+	RET=0
 
-	tcpdump_start $eth2
+	tcpdump_start $h2
 
 	# Work around Mausezahn VLAN builder bug
 	# (https://github.com/netsniff-ng/netsniff-ng/issues/225) by using
 	# an 8021q upper
-	$MZ $eth3.100 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip
+	$MZ $h1.100 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip
 
 	sleep 1
 
-	tcpdump_stop
+	tcpdump_stop $h2
 
-	if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"; then
-		echo "OK"
-	else
-		echo "FAIL"
-	fi
+	tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, ethertype IPv4"
+	check_err "$?" "untagged reception"
+
+	tcpdump_cleanup $h2
 
-	tcpdump_cleanup
+	log_test "VLAN pop"
 }
 
 test_vlan_push()
 {
-	printf "Testing VLAN push..			"
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
 
-	tcpdump_start $eth3.100
+	RET=0
 
-	$MZ $eth2 -q -c 1 -p 64 -a $eth2_mac -b $eth3_mac -t ip
+	tcpdump_start $h1.100
+
+	$MZ $h2 -q -c 1 -p 64 -a $h2_mac -b $h1_mac -t ip
 
 	sleep 1
 
-	tcpdump_stop
+	tcpdump_stop $h1.100
 
-	if tcpdump_show | grep -q "$eth2_mac > $eth3_mac"; then
-		echo "OK"
-	else
-		echo "FAIL"
-	fi
+	tcpdump_show $h1.100 | grep -q "$h2_mac > $h1_mac"
+	check_err "$?" "tagged reception"
 
-	tcpdump_cleanup
+	tcpdump_cleanup $h1.100
+
+	log_test "VLAN push"
 }
 
 test_vlan_ingress_modify()
 {
-	printf "Testing ingress VLAN modification..		"
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
+
+	RET=0
 
 	ip link set br0 type bridge vlan_filtering 1
-	bridge vlan add dev $eth0 vid 200
-	bridge vlan add dev $eth0 vid 300
-	bridge vlan add dev $eth1 vid 300
+	bridge vlan add dev $swp1 vid 200
+	bridge vlan add dev $swp1 vid 300
+	bridge vlan add dev $swp2 vid 300
 
-	tc filter add dev $eth0 ingress chain $(IS1 2) pref 3 \
+	tc filter add dev $swp1 ingress chain $(IS1 2) pref 3 \
 		protocol 802.1Q flower skip_sw vlan_id 200 \
 		action vlan modify id 300 \
 		action goto chain $(IS2 0 0)
 
-	tcpdump_start $eth2
+	tcpdump_start $h2
 
-	$MZ $eth3.200 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip
+	$MZ $h1.200 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip
 
 	sleep 1
 
-	tcpdump_stop
+	tcpdump_stop $h2
 
-	if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
-		echo "OK"
-	else
-		echo "FAIL"
-	fi
+	tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, .* vlan 300"
+	check_err "$?" "tagged reception"
 
-	tcpdump_cleanup
+	tcpdump_cleanup $h2
 
-	tc filter del dev $eth0 ingress chain $(IS1 2) pref 3
+	tc filter del dev $swp1 ingress chain $(IS1 2) pref 3
 
-	bridge vlan del dev $eth0 vid 200
-	bridge vlan del dev $eth0 vid 300
-	bridge vlan del dev $eth1 vid 300
+	bridge vlan del dev $swp1 vid 200
+	bridge vlan del dev $swp1 vid 300
+	bridge vlan del dev $swp2 vid 300
 	ip link set br0 type bridge vlan_filtering 0
+
+	log_test "Ingress VLAN modification"
 }
 
 test_vlan_egress_modify()
 {
-	printf "Testing egress VLAN modification..		"
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
 
-	tc qdisc add dev $eth1 clsact
+	RET=0
+
+	tc qdisc add dev $swp2 clsact
 
 	ip link set br0 type bridge vlan_filtering 1
-	bridge vlan add dev $eth0 vid 200
-	bridge vlan add dev $eth1 vid 200
+	bridge vlan add dev $swp1 vid 200
+	bridge vlan add dev $swp2 vid 200
 
-	tc filter add dev $eth1 egress chain $(ES0) pref 3 \
+	tc filter add dev $swp2 egress chain $(ES0) pref 3 \
 		protocol 802.1Q flower skip_sw vlan_id 200 vlan_prio 0 \
 		action vlan modify id 300 priority 7
 
-	tcpdump_start $eth2
+	tcpdump_start $h2
 
-	$MZ $eth3.200 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip
+	$MZ $h1.200 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip
 
 	sleep 1
 
-	tcpdump_stop
+	tcpdump_stop $h2
 
-	if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
-		echo "OK"
-	else
-		echo "FAIL"
-	fi
+	tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, .* vlan 300"
+	check_err "$?" "tagged reception"
 
-	tcpdump_cleanup
+	tcpdump_cleanup $h2
 
-	tc filter del dev $eth1 egress chain $(ES0) pref 3
-	tc qdisc del dev $eth1 clsact
+	tc filter del dev $swp2 egress chain $(ES0) pref 3
+	tc qdisc del dev $swp2 clsact
 
-	bridge vlan del dev $eth0 vid 200
-	bridge vlan del dev $eth1 vid 200
+	bridge vlan del dev $swp1 vid 200
+	bridge vlan del dev $swp2 vid 200
 	ip link set br0 type bridge vlan_filtering 0
+
+	log_test "Egress VLAN modification"
 }
 
 test_skbedit_priority()
 {
+	local h1_mac=$(mac_get $h1)
+	local h2_mac=$(mac_get $h2)
 	local num_pkts=100
 
-	printf "Testing frame prioritization..		"
+	before=$(ethtool_stats_get $swp1 'rx_green_prio_7')
 
-	before=$(ethtool_stats_get $eth0 'rx_green_prio_7')
+	$MZ $h1 -q -c $num_pkts -p 64 -a $h1_mac -b $h2_mac -t ip -A 10.1.1.2
 
-	$MZ $eth3 -q -c $num_pkts -p 64 -a $eth3_mac -b $eth2_mac -t ip -A 10.1.1.2
-
-	after=$(ethtool_stats_get $eth0 'rx_green_prio_7')
+	after=$(ethtool_stats_get $swp1 'rx_green_prio_7')
 
 	if [ $((after - before)) = $num_pkts ]; then
-		echo "OK"
+		RET=0
 	else
-		echo "FAIL"
+		RET=1
 	fi
+
+	log_test "Frame prioritization"
 }
 
 trap cleanup EXIT
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 21a411b04890..b984f8c8d523 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -5,6 +5,7 @@ socket
 psock_fanout
 psock_snd
 psock_tpacket
+stress_reuseport_listen
 reuseport_addr_any
 reuseport_bpf
 reuseport_bpf_cpu
@@ -36,3 +37,4 @@ gro
 ioam6_parser
 toeplitz
 cmsg_sender
+bind_bhash_test
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index e1f998defd10..464df13831f2 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -37,6 +37,8 @@ TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
 TEST_PROGS += vrf_strict_mode_test.sh
 TEST_PROGS += arp_ndisc_evict_nocarrier.sh
+TEST_PROGS += ndisc_unsolicited_na_test.sh
+TEST_PROGS += stress_reuseport_listen.sh
 TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
 TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
 TEST_GEN_FILES =  socket nettest
@@ -55,7 +57,9 @@ TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 TEST_GEN_FILES += toeplitz
 TEST_GEN_FILES += cmsg_sender
+TEST_GEN_FILES += stress_reuseport_listen
 TEST_PROGS += test_vxlan_vnifiltering.sh
+TEST_GEN_FILES += bind_bhash_test
 
 TEST_FILES := settings
 
@@ -66,4 +70,5 @@ include bpf/Makefile
 
 $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
 $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
+$(OUTPUT)/bind_bhash_test: LDLIBS += -lpthread
 $(OUTPUT)/tcp_inq: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/net/bind_bhash_test.c b/tools/testing/selftests/net/bind_bhash_test.c
new file mode 100644
index 000000000000..252e73754e76
--- /dev/null
+++ b/tools/testing/selftests/net/bind_bhash_test.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This times how long it takes to bind to a port when the port already
+ * has multiple sockets in its bhash table.
+ *
+ * In the setup(), we populate the port's bhash table with
+ * MAX_THREADS * MAX_CONNECTIONS number of entries.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <netdb.h>
+#include <pthread.h>
+
+#define MAX_THREADS 600
+#define MAX_CONNECTIONS 40
+
+static const char *bind_addr = "::1";
+static const char *port;
+
+static int fd_array[MAX_THREADS][MAX_CONNECTIONS];
+
+static int bind_socket(int opt, const char *addr)
+{
+	struct addrinfo *res, hint = {};
+	int sock_fd, reuse = 1, err;
+
+	sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (sock_fd < 0) {
+		perror("socket fd err");
+		return -1;
+	}
+
+	hint.ai_family = AF_INET6;
+	hint.ai_socktype = SOCK_STREAM;
+
+	err = getaddrinfo(addr, port, &hint, &res);
+	if (err) {
+		perror("getaddrinfo failed");
+		return -1;
+	}
+
+	if (opt) {
+		err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse));
+		if (err) {
+			perror("setsockopt failed");
+			return -1;
+		}
+	}
+
+	err = bind(sock_fd, res->ai_addr, res->ai_addrlen);
+	if (err) {
+		perror("failed to bind to port");
+		return -1;
+	}
+
+	return sock_fd;
+}
+
+static void *setup(void *arg)
+{
+	int sock_fd, i;
+	int *array = (int *)arg;
+
+	for (i = 0; i < MAX_CONNECTIONS; i++) {
+		sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr);
+		if (sock_fd < 0)
+			return NULL;
+		array[i] = sock_fd;
+	}
+
+	return NULL;
+}
+
+int main(int argc, const char *argv[])
+{
+	int listener_fd, sock_fd, i, j;
+	pthread_t tid[MAX_THREADS];
+	clock_t begin, end;
+
+	if (argc != 2) {
+		printf("Usage: listener <port>\n");
+		return -1;
+	}
+
+	port = argv[1];
+
+	listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr);
+	if (listen(listener_fd, 100) < 0) {
+		perror("listen failed");
+		return -1;
+	}
+
+	/* Set up threads to populate the bhash table entry for the port */
+	for (i = 0; i < MAX_THREADS; i++)
+		pthread_create(&tid[i], NULL, setup, fd_array[i]);
+
+	for (i = 0; i < MAX_THREADS; i++)
+		pthread_join(tid[i], NULL);
+
+	begin = clock();
+
+	/* Bind to the same port on a different address */
+	sock_fd  = bind_socket(0, "2001:0db8:0:f101::1");
+
+	end = clock();
+
+	printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC);
+
+	/* clean up */
+	close(sock_fd);
+	close(listener_fd);
+	for (i = 0; i < MAX_THREADS; i++) {
+		for (j = 0; i < MAX_THREADS; i++)
+			close(fd_array[i][j]);
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index b3bf5319bb0e..d5a0dd548989 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -56,6 +56,7 @@ TESTS="${ALL_TESTS}"
 VERBOSE=0
 PAUSE_ON_FAIL=no
 PAUSE=no
+PING_TIMEOUT=5
 
 nsid=100
 
@@ -882,13 +883,13 @@ ipv6_fcnal_runtime()
 	log_test $? 0 "Route delete"
 
 	run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Ping with nexthop"
 
 	run_cmd "$IP nexthop add id 82 via 2001:db8:92::2 dev veth3"
 	run_cmd "$IP nexthop add id 122 group 81/82"
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Ping - multipath"
 
 	#
@@ -896,26 +897,26 @@ ipv6_fcnal_runtime()
 	#
 	run_cmd "$IP -6 nexthop add id 83 blackhole"
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 83"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 2 "Ping - blackhole"
 
 	run_cmd "$IP nexthop replace id 83 via 2001:db8:91::2 dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Ping - blackhole replaced with gateway"
 
 	run_cmd "$IP -6 nexthop replace id 83 blackhole"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 2 "Ping - gateway replaced by blackhole"
 
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	if [ $? -eq 0 ]; then
 		run_cmd "$IP nexthop replace id 122 group 83"
-		run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+		run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 		log_test $? 2 "Ping - group with blackhole"
 
 		run_cmd "$IP nexthop replace id 122 group 81/82"
-		run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+		run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 		log_test $? 0 "Ping - group blackhole replaced with gateways"
 	else
 		log_test 2 0 "Ping - multipath failed"
@@ -1003,10 +1004,10 @@ ipv6_fcnal_runtime()
 	run_cmd "$IP nexthop add id 92 via 2001:db8:92::2 dev veth3"
 	run_cmd "$IP nexthop add id 93 group 91/92"
 	run_cmd "$IP -6 ro add default nhid 91"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Nexthop with default route and rpfilter"
 	run_cmd "$IP -6 ro replace default nhid 93"
-	run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
 	log_test $? 0 "Nexthop with multipath default route and rpfilter"
 
 	# TO-DO:
@@ -1460,13 +1461,13 @@ ipv4_fcnal_runtime()
 	#
 	run_cmd "$IP nexthop replace id 21 via 172.16.1.2 dev veth1"
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 21"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Basic ping"
 
 	run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
 	run_cmd "$IP nexthop add id 122 group 21/22"
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Ping - multipath"
 
 	run_cmd "$IP ro delete 172.16.101.1/32 nhid 122"
@@ -1477,7 +1478,7 @@ ipv4_fcnal_runtime()
 	run_cmd "$IP nexthop add id 501 via 172.16.1.2 dev veth1"
 	run_cmd "$IP ro add default nhid 501"
 	run_cmd "$IP ro add default via 172.16.1.3 dev veth1 metric 20"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Ping - multiple default routes, nh first"
 
 	# flip the order
@@ -1486,7 +1487,7 @@ ipv4_fcnal_runtime()
 	run_cmd "$IP ro add default via 172.16.1.2 dev veth1 metric 20"
 	run_cmd "$IP nexthop replace id 501 via 172.16.1.3 dev veth1"
 	run_cmd "$IP ro add default nhid 501 metric 20"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Ping - multiple default routes, nh second"
 
 	run_cmd "$IP nexthop delete nhid 501"
@@ -1497,26 +1498,26 @@ ipv4_fcnal_runtime()
 	#
 	run_cmd "$IP nexthop add id 23 blackhole"
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 23"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 2 "Ping - blackhole"
 
 	run_cmd "$IP nexthop replace id 23 via 172.16.1.2 dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "Ping - blackhole replaced with gateway"
 
 	run_cmd "$IP nexthop replace id 23 blackhole"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 2 "Ping - gateway replaced by blackhole"
 
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	if [ $? -eq 0 ]; then
 		run_cmd "$IP nexthop replace id 122 group 23"
-		run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+		run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 		log_test $? 2 "Ping - group with blackhole"
 
 		run_cmd "$IP nexthop replace id 122 group 21/22"
-		run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+		run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 		log_test $? 0 "Ping - group blackhole replaced with gateways"
 	else
 		log_test 2 0 "Ping - multipath failed"
@@ -1543,7 +1544,7 @@ ipv4_fcnal_runtime()
 	run_cmd "$IP nexthop add id 24 via ${lladdr} dev veth1"
 	set +e
 	run_cmd "$IP ro replace 172.16.101.1/32 nhid 24"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "IPv6 nexthop with IPv4 route"
 
 	$IP neigh sh | grep -q "${lladdr} dev veth1"
@@ -1567,11 +1568,11 @@ ipv4_fcnal_runtime()
 
 	check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
 
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "IPv6 nexthop with IPv4 route"
 
 	run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "IPv4 route with IPv6 gateway"
 
 	$IP neigh sh | grep -q "${lladdr} dev veth1"
@@ -1588,7 +1589,7 @@ ipv4_fcnal_runtime()
 
 	run_cmd "$IP ro del 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
 	run_cmd "$IP -4 ro add default via inet6 ${lladdr} dev veth1"
-	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
 	log_test $? 0 "IPv4 default route with IPv6 gateway"
 
 	#
@@ -2253,6 +2254,7 @@ usage: ${0##*/} OPTS
         -p          Pause on fail
         -P          Pause after each test before cleanup
         -v          verbose mode (show commands and output)
+	-w	    Timeout for ping
 
     Runtime test
 	-n num	    Number of nexthops to target
@@ -2265,7 +2267,7 @@ EOF
 ################################################################################
 # main
 
-while getopts :t:pP46hv o
+while getopts :t:pP46hv:w: o
 do
 	case $o in
 		t) TESTS=$OPTARG;;
@@ -2274,6 +2276,7 @@ do
 		p) PAUSE_ON_FAIL=yes;;
 		P) PAUSE=yes;;
 		v) VERBOSE=$(($VERBOSE + 1));;
+		w) PING_TIMEOUT=$OPTARG;;
 		h) usage; exit 0;;
 		*) usage; exit 1;;
 	esac
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 4f70baad867d..bbe3b379927a 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -20,6 +20,7 @@ SRC_IP6=2001:db8:1::3
 DEV_ADDR=192.51.100.1
 DEV_ADDR6=2001:db8:1::1
 DEV=dummy0
+TESTS="fib_rule6 fib_rule4"
 
 log_test()
 {
@@ -316,7 +317,16 @@ fi
 # start clean
 cleanup &> /dev/null
 setup
-run_fibrule_tests
+for t in $TESTS
+do
+	case $t in
+	fib_rule6_test|fib_rule6)		fib_rule6_test;;
+	fib_rule4_test|fib_rule4)		fib_rule4_test;;
+
+	help) echo "Test names: $TESTS"; exit 0;;
+
+	esac
+done
 cleanup
 
 if [ "$TESTS" != "none" ]; then
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index e811090f7748..8f481218a492 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -2,6 +2,7 @@
 
 TEST_PROGS = bridge_igmp.sh \
 	bridge_locked_port.sh \
+	bridge_mdb.sh \
 	bridge_mld.sh \
 	bridge_port_isolation.sh \
 	bridge_sticky_fdb.sh \
@@ -19,6 +20,7 @@ TEST_PROGS = bridge_igmp.sh \
 	gre_multipath_nh.sh \
 	gre_multipath.sh \
 	hw_stats_l3.sh \
+	hw_stats_l3_gre.sh \
 	ip6_forward_instats_vrf.sh \
 	ip6gre_custom_multipath_hash.sh \
 	ip6gre_flat_key.sh \
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
new file mode 100755
index 000000000000..b1ba6876dd86
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that adding host mdb entries work as intended for all types of
+# multicast filters: ipv4, ipv6, and mac
+
+ALL_TESTS="mdb_add_del_test"
+NUM_NETIFS=2
+
+TEST_GROUP_IP4="225.1.2.3"
+TEST_GROUP_IP6="ff02::42"
+TEST_GROUP_MAC="01:00:01:c0:ff:ee"
+
+source lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+switch_create()
+{
+	# Enable multicast filtering
+	ip link add dev br0 type bridge mcast_snooping 1
+
+	ip link set dev $swp1 master br0
+
+	ip link set dev br0 up
+	ip link set dev $swp1 up
+}
+
+switch_destroy()
+{
+	ip link set dev $swp1 down
+	ip link del dev br0
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	vrf_prepare
+
+	h1_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+do_mdb_add_del()
+{
+	local group=$1
+	local flag=$2
+
+	RET=0
+	bridge mdb add dev br0 port br0 grp $group $flag 2>/dev/null
+	check_err $? "Failed adding $group to br0, port br0"
+
+	if [ -z "$flag" ]; then
+	    flag="temp"
+	fi
+
+	bridge mdb show dev br0 | grep $group | grep -q $flag 2>/dev/null
+	check_err $? "$group not added with $flag flag"
+
+	bridge mdb del dev br0 port br0 grp $group 2>/dev/null
+	check_err $? "Failed deleting $group from br0, port br0"
+
+	bridge mdb show dev br0 | grep -q $group >/dev/null
+	check_err_fail 1 $? "$group still in mdb after delete"
+
+	log_test "MDB add/del group $group to bridge port br0"
+}
+
+mdb_add_del_test()
+{
+	do_mdb_add_del $TEST_GROUP_MAC permanent
+	do_mdb_add_del $TEST_GROUP_IP4
+	do_mdb_add_del $TEST_GROUP_IP6
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
index 1c11c4256d06..9c1f76e108af 100755
--- a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
+++ b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
@@ -162,14 +162,6 @@ ping_ipv6()
 	ping_test $h1.200 2001:db8:2::1 " IPv6"
 }
 
-get_l3_stat()
-{
-	local selector=$1; shift
-
-	ip -j stats show dev $rp1.200 group offload subgroup l3_stats |
-		  jq '.[0].stats64.'$selector
-}
-
 send_packets_rx_ipv4()
 {
 	# Send 21 packets instead of 20, because the first one might trap and go
@@ -208,11 +200,11 @@ ___test_stats()
 	local a
 	local b
 
-	a=$(get_l3_stat ${dir}.packets)
+	a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets)
 	send_packets_${dir}_${prot}
 	"$@"
 	b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
-		       get_l3_stat ${dir}.packets)
+		       hw_stats_get l3_stats $rp1.200 ${dir} packets)
 	check_err $? "Traffic not reflected in the counter: $a -> $b"
 }
 
@@ -281,11 +273,11 @@ __test_stats_report()
 
 	RET=0
 
-	a=$(get_l3_stat ${dir}.packets)
+	a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets)
 	send_packets_${dir}_${prot}
 	ip address flush dev $rp1.200
 	b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
-		       get_l3_stat ${dir}.packets)
+		       hw_stats_get l3_stats $rp1.200 ${dir} packets)
 	check_err $? "Traffic not reflected in the counter: $a -> $b"
 	log_test "Test ${dir} packets: stats pushed on loss of L3"
 
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
new file mode 100755
index 000000000000..eb9ec4a68f84
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test L3 stats on IP-in-IP GRE tunnel without key.
+
+# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more
+# details.
+
+ALL_TESTS="
+	ping_ipv4
+	test_stats_rx
+	test_stats_tx
+"
+NUM_NETIFS=6
+source lib.sh
+source ipip_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	ol1mac=$(mac_get $ol1)
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_flat_create gre $ol1 $ul1
+	sw2_flat_create gre $ol2 $ul2
+	ip stats set dev g1a l3_stats on
+	ip stats set dev g2a l3_stats on
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip stats set dev g1a l3_stats off
+	ip stats set dev g2a l3_stats off
+
+	sw2_flat_destroy $ol2 $ul2
+	sw1_flat_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+	forwarding_restore
+}
+
+ping_ipv4()
+{
+	RET=0
+
+	ping_test $h1 192.0.2.18 " gre flat"
+}
+
+send_packets_ipv4()
+{
+	# Send 21 packets instead of 20, because the first one might trap and go
+	# through the SW datapath, which might not bump the HW counter.
+	$MZ $h1 -c 21 -d 20msec -p 100 \
+	    -a own -b $ol1mac -A 192.0.2.1 -B 192.0.2.18 \
+	    -q -t udp sp=54321,dp=12345
+}
+
+test_stats()
+{
+	local dev=$1; shift
+	local dir=$1; shift
+
+	local a
+	local b
+
+	RET=0
+
+	a=$(hw_stats_get l3_stats $dev $dir packets)
+	send_packets_ipv4
+	b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+		     hw_stats_get l3_stats $dev $dir packets)
+	check_err $? "Traffic not reflected in the counter: $a -> $b"
+
+	log_test "Test $dir packets: $prot"
+}
+
+test_stats_tx()
+{
+	test_stats g1a tx
+}
+
+test_stats_rx()
+{
+	test_stats g2a rx
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 664b9ecaf228..37ae49d47853 100644..100755
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -27,6 +27,9 @@ INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
 LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000}
 REQUIRE_JQ=${REQUIRE_JQ:=yes}
 REQUIRE_MZ=${REQUIRE_MZ:=yes}
+REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
+STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
+TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
 
 relative_path="${BASH_SOURCE%/*}"
 if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
@@ -159,6 +162,12 @@ fi
 if [[ "$REQUIRE_MZ" = "yes" ]]; then
 	require_command $MZ
 fi
+if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
+	# https://github.com/vladimiroltean/mtools/
+	# patched for IPv6 support
+	require_command msend
+	require_command mreceive
+fi
 
 if [[ ! -v NUM_NETIFS ]]; then
 	echo "SKIP: importer does not define \"NUM_NETIFS\""
@@ -214,10 +223,41 @@ create_netif()
 	esac
 }
 
+declare -A MAC_ADDR_ORIG
+mac_addr_prepare()
+{
+	local new_addr=
+	local dev=
+
+	for ((i = 1; i <= NUM_NETIFS; ++i)); do
+		dev=${NETIFS[p$i]}
+		new_addr=$(printf "00:01:02:03:04:%02x" $i)
+
+		MAC_ADDR_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].address')
+		# Strip quotes
+		MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/}
+		ip link set dev $dev address $new_addr
+	done
+}
+
+mac_addr_restore()
+{
+	local dev=
+
+	for ((i = 1; i <= NUM_NETIFS; ++i)); do
+		dev=${NETIFS[p$i]}
+		ip link set dev $dev address ${MAC_ADDR_ORIG["$dev"]}
+	done
+}
+
 if [[ "$NETIF_CREATE" = "yes" ]]; then
 	create_netif
 fi
 
+if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
+	mac_addr_prepare
+fi
+
 for ((i = 1; i <= NUM_NETIFS; ++i)); do
 	ip link show dev ${NETIFS[p$i]} &> /dev/null
 	if [[ $? -ne 0 ]]; then
@@ -503,6 +543,10 @@ pre_cleanup()
 		echo "Pausing before cleanup, hit any key to continue"
 		read
 	fi
+
+	if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
+		mac_addr_restore
+	fi
 }
 
 vrf_prepare()
@@ -784,6 +828,17 @@ ipv6_stats_get()
 	cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2
 }
 
+hw_stats_get()
+{
+	local suite=$1; shift
+	local if_name=$1; shift
+	local dir=$1; shift
+	local stat=$1; shift
+
+	ip -j stats show dev $if_name group offload subgroup $suite |
+		jq ".[0].stats64.$dir.$stat"
+}
+
 humanize()
 {
 	local speed=$1; shift
@@ -824,6 +879,15 @@ mac_get()
 	ip -j link show dev $if_name | jq -r '.[]["address"]'
 }
 
+ipv6_lladdr_get()
+{
+	local if_name=$1
+
+	ip -j addr show dev $if_name | \
+		jq -r '.[]["addr_info"][] | select(.scope == "link").local' | \
+		head -1
+}
+
 bridge_ageing_time_get()
 {
 	local bridge=$1
@@ -1322,25 +1386,40 @@ flood_test()
 
 __start_traffic()
 {
+	local pktsize=$1; shift
 	local proto=$1; shift
 	local h_in=$1; shift    # Where the traffic egresses the host
 	local sip=$1; shift
 	local dip=$1; shift
 	local dmac=$1; shift
 
-	$MZ $h_in -p 8000 -A $sip -B $dip -c 0 \
+	$MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \
 		-a own -b $dmac -t "$proto" -q "$@" &
 	sleep 1
 }
 
+start_traffic_pktsize()
+{
+	local pktsize=$1; shift
+
+	__start_traffic $pktsize udp "$@"
+}
+
+start_tcp_traffic_pktsize()
+{
+	local pktsize=$1; shift
+
+	__start_traffic $pktsize tcp "$@"
+}
+
 start_traffic()
 {
-	__start_traffic udp "$@"
+	start_traffic_pktsize 8000 "$@"
 }
 
 start_tcp_traffic()
 {
-	__start_traffic tcp "$@"
+	start_tcp_traffic_pktsize 8000 "$@"
 }
 
 stop_traffic()
@@ -1349,13 +1428,17 @@ stop_traffic()
 	{ kill %% && wait %%; } 2>/dev/null
 }
 
+declare -A cappid
+declare -A capfile
+declare -A capout
+
 tcpdump_start()
 {
 	local if_name=$1; shift
 	local ns=$1; shift
 
-	capfile=$(mktemp)
-	capout=$(mktemp)
+	capfile[$if_name]=$(mktemp)
+	capout[$if_name]=$(mktemp)
 
 	if [ -z $ns ]; then
 		ns_cmd=""
@@ -1369,27 +1452,35 @@ tcpdump_start()
 		capuser="-Z $SUDO_USER"
 	fi
 
-	$ns_cmd tcpdump -e -n -Q in -i $if_name \
-		-s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
-	cappid=$!
+	$ns_cmd tcpdump $TCPDUMP_EXTRA_FLAGS -e -n -Q in -i $if_name \
+		-s 65535 -B 32768 $capuser -w ${capfile[$if_name]} \
+		> "${capout[$if_name]}" 2>&1 &
+	cappid[$if_name]=$!
 
 	sleep 1
 }
 
 tcpdump_stop()
 {
-	$ns_cmd kill $cappid
+	local if_name=$1
+	local pid=${cappid[$if_name]}
+
+	$ns_cmd kill "$pid" && wait "$pid"
 	sleep 1
 }
 
 tcpdump_cleanup()
 {
-	rm $capfile $capout
+	local if_name=$1
+
+	rm ${capfile[$if_name]} ${capout[$if_name]}
 }
 
 tcpdump_show()
 {
-	tcpdump -e -n -r $capfile 2>&1
+	local if_name=$1
+
+	tcpdump -e -n -r ${capfile[$if_name]} 2>&1
 }
 
 # return 0 if the packet wasn't seen on host2_if or 1 if it was
@@ -1499,6 +1590,37 @@ brmcast_check_sg_state()
 	done
 }
 
+mc_join()
+{
+	local if_name=$1
+	local group=$2
+	local vrf_name=$(master_name_get $if_name)
+
+	# We don't care about actual reception, just about joining the
+	# IP multicast group and adding the L2 address to the device's
+	# MAC filtering table
+	ip vrf exec $vrf_name \
+		mreceive -g $group -I $if_name > /dev/null 2>&1 &
+	mreceive_pid=$!
+
+	sleep 1
+}
+
+mc_leave()
+{
+	kill "$mreceive_pid" && wait "$mreceive_pid"
+}
+
+mc_send()
+{
+	local if_name=$1
+	local groups=$2
+	local vrf_name=$(master_name_get $if_name)
+
+	ip vrf exec $vrf_name \
+		msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
+}
+
 start_ip_monitor()
 {
 	local mtype=$1; shift
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
new file mode 100755
index 000000000000..c5b0cbc85b3e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -0,0 +1,299 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="standalone bridge"
+NUM_NETIFS=2
+PING_COUNT=1
+REQUIRE_MTOOLS=yes
+REQUIRE_MZ=no
+
+source lib.sh
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+BRIDGE_ADDR="00:00:de:ad:be:ee"
+MACVLAN_ADDR="00:00:de:ad:be:ef"
+UNKNOWN_UC_ADDR1="de:ad:be:ef:ee:03"
+UNKNOWN_UC_ADDR2="de:ad:be:ef:ee:04"
+UNKNOWN_UC_ADDR3="de:ad:be:ef:ee:05"
+JOINED_IPV4_MC_ADDR="225.1.2.3"
+UNKNOWN_IPV4_MC_ADDR1="225.1.2.4"
+UNKNOWN_IPV4_MC_ADDR2="225.1.2.5"
+UNKNOWN_IPV4_MC_ADDR3="225.1.2.6"
+JOINED_IPV6_MC_ADDR="ff2e::0102:0304"
+UNKNOWN_IPV6_MC_ADDR1="ff2e::0102:0305"
+UNKNOWN_IPV6_MC_ADDR2="ff2e::0102:0306"
+UNKNOWN_IPV6_MC_ADDR3="ff2e::0102:0307"
+
+JOINED_MACV4_MC_ADDR="01:00:5e:01:02:03"
+UNKNOWN_MACV4_MC_ADDR1="01:00:5e:01:02:04"
+UNKNOWN_MACV4_MC_ADDR2="01:00:5e:01:02:05"
+UNKNOWN_MACV4_MC_ADDR3="01:00:5e:01:02:06"
+JOINED_MACV6_MC_ADDR="33:33:01:02:03:04"
+UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05"
+UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06"
+UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07"
+
+NON_IP_MC="01:02:03:04:05:06"
+NON_IP_PKT="00:04 48:45:4c:4f"
+BC="ff:ff:ff:ff:ff:ff"
+
+# Disable promisc to ensure we don't receive unknown MAC DA packets
+export TCPDUMP_EXTRA_FLAGS="-pl"
+
+h1=${NETIFS[p1]}
+h2=${NETIFS[p2]}
+
+send_non_ip()
+{
+	local if_name=$1
+	local smac=$2
+	local dmac=$3
+
+	$MZ -q $if_name "$dmac $smac $NON_IP_PKT"
+}
+
+send_uc_ipv4()
+{
+	local if_name=$1
+	local dmac=$2
+
+	ip neigh add $H2_IPV4 lladdr $dmac dev $if_name
+	ping_do $if_name $H2_IPV4
+	ip neigh del $H2_IPV4 dev $if_name
+}
+
+check_rcv()
+{
+	local if_name=$1
+	local type=$2
+	local pattern=$3
+	local should_receive=$4
+	local should_fail=
+
+	[ $should_receive = true ] && should_fail=0 || should_fail=1
+	RET=0
+
+	tcpdump_show $if_name | grep -q "$pattern"
+
+	check_err_fail "$should_fail" "$?" "reception"
+
+	log_test "$if_name: $type"
+}
+
+mc_route_prepare()
+{
+	local if_name=$1
+	local vrf_name=$(master_name_get $if_name)
+
+	ip route add 225.100.1.0/24 dev $if_name vrf $vrf_name
+	ip -6 route add ff2e::/64 dev $if_name vrf $vrf_name
+}
+
+mc_route_destroy()
+{
+	local if_name=$1
+	local vrf_name=$(master_name_get $if_name)
+
+	ip route del 225.100.1.0/24 dev $if_name vrf $vrf_name
+	ip -6 route del ff2e::/64 dev $if_name vrf $vrf_name
+}
+
+run_test()
+{
+	local rcv_if_name=$1
+	local smac=$(mac_get $h1)
+	local rcv_dmac=$(mac_get $rcv_if_name)
+
+	tcpdump_start $rcv_if_name
+
+	mc_route_prepare $h1
+	mc_route_prepare $rcv_if_name
+
+	send_uc_ipv4 $h1 $rcv_dmac
+	send_uc_ipv4 $h1 $MACVLAN_ADDR
+	send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1
+
+	ip link set dev $rcv_if_name promisc on
+	send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2
+	mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2
+	mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2
+	ip link set dev $rcv_if_name promisc off
+
+	mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR
+	mc_send $h1 $JOINED_IPV4_MC_ADDR
+	mc_leave
+
+	mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR
+	mc_send $h1 $JOINED_IPV6_MC_ADDR
+	mc_leave
+
+	mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1
+	mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1
+
+	ip link set dev $rcv_if_name allmulticast on
+	send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3
+	mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3
+	mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3
+	ip link set dev $rcv_if_name allmulticast off
+
+	mc_route_destroy $rcv_if_name
+	mc_route_destroy $h1
+
+	sleep 1
+
+	tcpdump_stop $rcv_if_name
+
+	check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \
+		"$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \
+		"$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
+		"$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
+		false
+
+	check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \
+		"$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, allmulti" \
+		"$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
+		false
+
+	check_rcv $rcv_if_name "Multicast IPv4 to joined group" \
+		"$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Multicast IPv4 to unknown group" \
+		"$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \
+		false
+
+	check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \
+		"$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \
+		"$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \
+		true
+
+	check_rcv $rcv_if_name "Multicast IPv6 to joined group" \
+		"$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \
+		true
+
+	check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \
+		"$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \
+		false
+
+	check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \
+		"$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \
+		true
+
+	check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \
+		"$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \
+		true
+
+	tcpdump_cleanup $rcv_if_name
+}
+
+h1_create()
+{
+	simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+bridge_create()
+{
+	ip link add br0 type bridge
+	ip link set br0 address $BRIDGE_ADDR
+	ip link set br0 up
+
+	ip link set $h2 master br0
+	ip link set $h2 up
+
+	simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64
+}
+
+bridge_destroy()
+{
+	simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64
+
+	ip link del br0
+}
+
+standalone()
+{
+	h1_create
+	h2_create
+
+	ip link add link $h2 name macvlan0 type macvlan mode private
+	ip link set macvlan0 address $MACVLAN_ADDR
+	ip link set macvlan0 up
+
+	run_test $h2
+
+	ip link del macvlan0
+
+	h2_destroy
+	h1_destroy
+}
+
+bridge()
+{
+	h1_create
+	bridge_create
+
+	ip link add link br0 name macvlan0 type macvlan mode private
+	ip link set macvlan0 address $MACVLAN_ADDR
+	ip link set macvlan0 up
+
+	run_test br0
+
+	ip link del macvlan0
+
+	bridge_destroy
+	h1_destroy
+}
+
+cleanup()
+{
+	pre_cleanup
+	vrf_cleanup
+}
+
+setup_prepare()
+{
+	vrf_prepare
+	# setup_wait() needs this
+	ip link set $h1 up
+	ip link set $h2 up
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh
new file mode 100755
index 000000000000..af3b398d13f0
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh
@@ -0,0 +1,261 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="standalone two_bridges one_bridge_two_pvids"
+NUM_NETIFS=4
+
+source lib.sh
+
+h1=${NETIFS[p1]}
+h2=${NETIFS[p3]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+IPV4_ALLNODES="224.0.0.1"
+IPV6_ALLNODES="ff02::1"
+MACV4_ALLNODES="01:00:5e:00:00:01"
+MACV6_ALLNODES="33:33:00:00:00:01"
+NON_IP_MC="01:02:03:04:05:06"
+NON_IP_PKT="00:04 48:45:4c:4f"
+BC="ff:ff:ff:ff:ff:ff"
+
+# The full 4K VLAN space is too much to check, so strategically pick some
+# values which should provide reasonable coverage
+vids=(0 1 2 5 10 20 50 100 200 500 1000 1000 2000 4000 4094)
+
+send_non_ip()
+{
+	local if_name=$1
+	local smac=$2
+	local dmac=$3
+
+	$MZ -q $if_name "$dmac $smac $NON_IP_PKT"
+}
+
+send_uc_ipv4()
+{
+	local if_name=$1
+	local dmac=$2
+
+	ip neigh add $H2_IPV4 lladdr $dmac dev $if_name
+	ping_do $if_name $H2_IPV4
+	ip neigh del $H2_IPV4 dev $if_name
+}
+
+send_mc_ipv4()
+{
+	local if_name=$1
+
+	ping_do $if_name $IPV4_ALLNODES "-I $if_name"
+}
+
+send_uc_ipv6()
+{
+	local if_name=$1
+	local dmac=$2
+
+	ip -6 neigh add $H2_IPV6 lladdr $dmac dev $if_name
+	ping6_do $if_name $H2_IPV6
+	ip -6 neigh del $H2_IPV6 dev $if_name
+}
+
+send_mc_ipv6()
+{
+	local if_name=$1
+
+	ping6_do $if_name $IPV6_ALLNODES%$if_name
+}
+
+check_rcv()
+{
+	local if_name=$1
+	local type=$2
+	local pattern=$3
+	local should_fail=1
+
+	RET=0
+
+	tcpdump_show $if_name | grep -q "$pattern"
+
+	check_err_fail "$should_fail" "$?" "reception"
+
+	log_test "$type"
+}
+
+run_test()
+{
+	local test_name="$1"
+	local smac=$(mac_get $h1)
+	local dmac=$(mac_get $h2)
+	local h1_ipv6_lladdr=$(ipv6_lladdr_get $h1)
+	local vid=
+
+	echo "$test_name: Sending packets"
+
+	tcpdump_start $h2
+
+	send_non_ip $h1 $smac $dmac
+	send_non_ip $h1 $smac $NON_IP_MC
+	send_non_ip $h1 $smac $BC
+	send_uc_ipv4 $h1 $dmac
+	send_mc_ipv4 $h1
+	send_uc_ipv6 $h1 $dmac
+	send_mc_ipv6 $h1
+
+	for vid in "${vids[@]}"; do
+		vlan_create $h1 $vid
+		simple_if_init $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+
+		send_non_ip $h1.$vid $smac $dmac
+		send_non_ip $h1.$vid $smac $NON_IP_MC
+		send_non_ip $h1.$vid $smac $BC
+		send_uc_ipv4 $h1.$vid $dmac
+		send_mc_ipv4 $h1.$vid
+		send_uc_ipv6 $h1.$vid $dmac
+		send_mc_ipv6 $h1.$vid
+
+		simple_if_fini $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+		vlan_destroy $h1 $vid
+	done
+
+	sleep 1
+
+	echo "$test_name: Checking which packets were received"
+
+	tcpdump_stop $h2
+
+	check_rcv $h2 "$test_name: Unicast non-IP untagged" \
+		"$smac > $dmac, 802.3, length 4:"
+
+	check_rcv $h2 "$test_name: Multicast non-IP untagged" \
+		"$smac > $NON_IP_MC, 802.3, length 4:"
+
+	check_rcv $h2 "$test_name: Broadcast non-IP untagged" \
+		"$smac > $BC, 802.3, length 4:"
+
+	check_rcv $h2 "$test_name: Unicast IPv4 untagged" \
+		"$smac > $dmac, ethertype IPv4 (0x0800)"
+
+	check_rcv $h2 "$test_name: Multicast IPv4 untagged" \
+		"$smac > $MACV4_ALLNODES, ethertype IPv4 (0x0800).*: $H1_IPV4 > $IPV4_ALLNODES"
+
+	check_rcv $h2 "$test_name: Unicast IPv6 untagged" \
+		"$smac > $dmac, ethertype IPv6 (0x86dd).*8: $H1_IPV6 > $H2_IPV6"
+
+	check_rcv $h2 "$test_name: Multicast IPv6 untagged" \
+		"$smac > $MACV6_ALLNODES, ethertype IPv6 (0x86dd).*: $h1_ipv6_lladdr > $IPV6_ALLNODES"
+
+	for vid in "${vids[@]}"; do
+		check_rcv $h2 "$test_name: Unicast non-IP VID $vid" \
+			"$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4"
+
+		check_rcv $h2 "$test_name: Multicast non-IP VID $vid" \
+			"$smac > $NON_IP_MC, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4"
+
+		check_rcv $h2 "$test_name: Broadcast non-IP VID $vid" \
+			"$smac > $BC, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4"
+
+		check_rcv $h2 "$test_name: Unicast IPv4 VID $vid" \
+			"$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv4 (0x0800), $H1_IPV4 > $H2_IPV4"
+
+		check_rcv $h2 "$test_name: Multicast IPv4 VID $vid" \
+			"$smac > $MACV4_ALLNODES, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv4 (0x0800), $H1_IPV4 > $IPV4_ALLNODES"
+
+		check_rcv $h2 "$test_name: Unicast IPv6 VID $vid" \
+			"$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv6 (0x86dd), $H1_IPV6 > $H2_IPV6"
+
+		check_rcv $h2 "$test_name: Multicast IPv6 VID $vid" \
+			"$smac > $MACV6_ALLNODES, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv6 (0x86dd), $h1_ipv6_lladdr > $IPV6_ALLNODES"
+	done
+
+	tcpdump_cleanup $h2
+}
+
+standalone()
+{
+	run_test "Standalone switch ports"
+}
+
+two_bridges()
+{
+	ip link add br0 type bridge && ip link set br0 up
+	ip link add br1 type bridge && ip link set br1 up
+	ip link set $swp1 master br0
+	ip link set $swp2 master br1
+
+	run_test "Switch ports in different bridges"
+
+	ip link del br1
+	ip link del br0
+}
+
+one_bridge_two_pvids()
+{
+	ip link add br0 type bridge vlan_filtering 1 vlan_default_pvid 0
+	ip link set br0 up
+	ip link set $swp1 master br0
+	ip link set $swp2 master br0
+
+	bridge vlan add dev $swp1 vid 1 pvid untagged
+	bridge vlan add dev $swp1 vid 2 pvid untagged
+
+	run_test "Switch ports in VLAN-aware bridge with different PVIDs"
+
+	ip link del br0
+}
+
+h1_create()
+{
+	simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+setup_prepare()
+{
+	vrf_prepare
+
+	h1_create
+	h2_create
+	# we call simple_if_init from the test itself, but setup_wait expects
+	# that we call it from here, and waits until the interfaces are up
+	ip link set dev $swp1 up
+	ip link set dev $swp2 up
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
index 057f91b05098..b98ea9449b8b 100755
--- a/tools/testing/selftests/net/forwarding/router.sh
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -1,6 +1,24 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+# +--------------------+                     +----------------------+
+# | H1                 |                     |                   H2 |
+# |                    |                     |                      |
+# |              $h1 + |                     | + $h2                |
+# |     192.0.2.2/24 | |                     | | 198.51.100.2/24    |
+# | 2001:db8:1::2/64 | |                     | | 2001:db8:2::2/64   |
+# |                  | |                     | |                    |
+# +------------------|-+                     +-|--------------------+
+#                    |                         |
+# +------------------|-------------------------|--------------------+
+# | SW               |                         |                    |
+# |                  |                         |                    |
+# |             $rp1 +                         + $rp2               |
+# |     192.0.2.1/24                             198.51.100.1/24    |
+# | 2001:db8:1::1/64                             2001:db8:2::1/64   |
+# |                                                                 |
+# +-----------------------------------------------------------------+
+
 ALL_TESTS="
 	ping_ipv4
 	ping_ipv6
diff --git a/tools/testing/selftests/net/forwarding/router_vid_1.sh b/tools/testing/selftests/net/forwarding/router_vid_1.sh
index a7306c7ac06d..865c9f7d8143 100755
--- a/tools/testing/selftests/net/forwarding/router_vid_1.sh
+++ b/tools/testing/selftests/net/forwarding/router_vid_1.sh
@@ -1,7 +1,32 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
-ALL_TESTS="ping_ipv4 ping_ipv6"
+# +--------------------+                     +----------------------+
+# | H1                 |                     |                   H2 |
+# |                    |                     |                      |
+# |            $h1.1 + |                     | + $h2.1              |
+# |     192.0.2.2/24 | |                     | | 198.51.100.2/24    |
+# | 2001:db8:1::2/64 | |                     | | 2001:db8:2::2/64   |
+# |                  | |                     | |                    |
+# |              $h1 + |                     | + $h2                |
+# |                  | |                     | |                    |
+# +------------------|-+                     +-|--------------------+
+#                    |                         |
+# +------------------|-------------------------|--------------------+
+# | SW               |                         |                    |
+# |                  |                         |                    |
+# |             $rp1 +                         + $rp2               |
+# |                  |                         |                    |
+# |           $rp1.1 +                         + $rp2.1             |
+# |     192.0.2.1/24                             198.51.100.1/24    |
+# | 2001:db8:1::1/64                             2001:db8:2::1/64   |
+# |                                                                 |
+# +-----------------------------------------------------------------+
+
+ALL_TESTS="
+	ping_ipv4
+	ping_ipv6
+"
 NUM_NETIFS=4
 source lib.sh
 
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index de19eb6c38f0..1e0a62f638fe 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -60,7 +60,7 @@ mirred_egress_test()
 	RET=0
 
 	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
-		$tcflags dst_ip 192.0.2.2 action drop
+		dst_ip 192.0.2.2 action drop
 
 	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
 		-t ip -q
diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh
new file mode 100644
index 000000000000..60a1423e8116
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh
@@ -0,0 +1,235 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2021-2022 NXP
+
+REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes}
+REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes}
+
+# Tunables
+UTC_TAI_OFFSET=37
+ISOCHRON_CPU=1
+
+if [[ "$REQUIRE_ISOCHRON" = "yes" ]]; then
+	# https://github.com/vladimiroltean/tsn-scripts
+	# WARNING: isochron versions pre-1.0 are unstable,
+	# always use the latest version
+	require_command isochron
+fi
+if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then
+	require_command phc2sys
+	require_command ptp4l
+fi
+
+phc2sys_start()
+{
+	local if_name=$1
+	local uds_address=$2
+	local extra_args=""
+
+	if ! [ -z "${uds_address}" ]; then
+		extra_args="${extra_args} -z ${uds_address}"
+	fi
+
+	phc2sys_log="$(mktemp)"
+
+	chrt -f 10 phc2sys -m \
+		-c ${if_name} \
+		-s CLOCK_REALTIME \
+		-O ${UTC_TAI_OFFSET} \
+		--step_threshold 0.00002 \
+		--first_step_threshold 0.00002 \
+		${extra_args} \
+		> "${phc2sys_log}" 2>&1 &
+	phc2sys_pid=$!
+
+	echo "phc2sys logs to ${phc2sys_log} and has pid ${phc2sys_pid}"
+
+	sleep 1
+}
+
+phc2sys_stop()
+{
+	{ kill ${phc2sys_pid} && wait ${phc2sys_pid}; } 2> /dev/null
+	rm "${phc2sys_log}" 2> /dev/null
+}
+
+ptp4l_start()
+{
+	local if_name=$1
+	local slave_only=$2
+	local uds_address=$3
+	local log="ptp4l_log_${if_name}"
+	local pid="ptp4l_pid_${if_name}"
+	local extra_args=""
+
+	if [ "${slave_only}" = true ]; then
+		extra_args="${extra_args} -s"
+	fi
+
+	# declare dynamic variables ptp4l_log_${if_name} and ptp4l_pid_${if_name}
+	# as global, so that they can be referenced later
+	declare -g "${log}=$(mktemp)"
+
+	chrt -f 10 ptp4l -m -2 -P \
+		-i ${if_name} \
+		--step_threshold 0.00002 \
+		--first_step_threshold 0.00002 \
+		--tx_timestamp_timeout 100 \
+		--uds_address="${uds_address}" \
+		${extra_args} \
+		> "${!log}" 2>&1 &
+	declare -g "${pid}=$!"
+
+	echo "ptp4l for interface ${if_name} logs to ${!log} and has pid ${!pid}"
+
+	sleep 1
+}
+
+ptp4l_stop()
+{
+	local if_name=$1
+	local log="ptp4l_log_${if_name}"
+	local pid="ptp4l_pid_${if_name}"
+
+	{ kill ${!pid} && wait ${!pid}; } 2> /dev/null
+	rm "${!log}" 2> /dev/null
+}
+
+cpufreq_max()
+{
+	local cpu=$1
+	local freq="cpu${cpu}_freq"
+	local governor="cpu${cpu}_governor"
+
+	# Kernel may be compiled with CONFIG_CPU_FREQ disabled
+	if ! [ -d /sys/bus/cpu/devices/cpu${cpu}/cpufreq ]; then
+		return
+	fi
+
+	# declare dynamic variables cpu${cpu}_freq and cpu${cpu}_governor as
+	# global, so they can be referenced later
+	declare -g "${freq}=$(cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq)"
+	declare -g "${governor}=$(cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor)"
+
+	cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_max_freq > \
+		/sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq
+	echo -n "performance" > \
+		/sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor
+}
+
+cpufreq_restore()
+{
+	local cpu=$1
+	local freq="cpu${cpu}_freq"
+	local governor="cpu${cpu}_governor"
+
+	if ! [ -d /sys/bus/cpu/devices/cpu${cpu}/cpufreq ]; then
+		return
+	fi
+
+	echo "${!freq}" > /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq
+	echo -n "${!governor}" > \
+		/sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor
+}
+
+isochron_recv_start()
+{
+	local if_name=$1
+	local uds=$2
+	local extra_args=$3
+
+	if ! [ -z "${uds}" ]; then
+		extra_args="--unix-domain-socket ${uds}"
+	fi
+
+	isochron rcv \
+		--interface ${if_name} \
+		--sched-priority 98 \
+		--sched-fifo \
+		--utc-tai-offset ${UTC_TAI_OFFSET} \
+		--quiet \
+		${extra_args} & \
+	isochron_pid=$!
+
+	sleep 1
+}
+
+isochron_recv_stop()
+{
+	{ kill ${isochron_pid} && wait ${isochron_pid}; } 2> /dev/null
+}
+
+isochron_do()
+{
+	local sender_if_name=$1; shift
+	local receiver_if_name=$1; shift
+	local sender_uds=$1; shift
+	local receiver_uds=$1; shift
+	local base_time=$1; shift
+	local cycle_time=$1; shift
+	local shift_time=$1; shift
+	local num_pkts=$1; shift
+	local vid=$1; shift
+	local priority=$1; shift
+	local dst_ip=$1; shift
+	local isochron_dat=$1; shift
+	local extra_args=""
+	local receiver_extra_args=""
+	local vrf="$(master_name_get ${sender_if_name})"
+	local use_l2="true"
+
+	if ! [ -z "${dst_ip}" ]; then
+		use_l2="false"
+	fi
+
+	if ! [ -z "${vrf}" ]; then
+		dst_ip="${dst_ip}%${vrf}"
+	fi
+
+	if ! [ -z "${vid}" ]; then
+		vid="--vid=${vid}"
+	fi
+
+	if [ -z "${receiver_uds}" ]; then
+		extra_args="${extra_args} --omit-remote-sync"
+	fi
+
+	if ! [ -z "${shift_time}" ]; then
+		extra_args="${extra_args} --shift-time=${shift_time}"
+	fi
+
+	if [ "${use_l2}" = "true" ]; then
+		extra_args="${extra_args} --l2 --etype=0xdead ${vid}"
+		receiver_extra_args="--l2 --etype=0xdead"
+	else
+		extra_args="${extra_args} --l4 --ip-destination=${dst_ip}"
+		receiver_extra_args="--l4"
+	fi
+
+	cpufreq_max ${ISOCHRON_CPU}
+
+	isochron_recv_start "${h2}" "${receiver_uds}" "${receiver_extra_args}"
+
+	isochron send \
+		--interface ${sender_if_name} \
+		--unix-domain-socket ${sender_uds} \
+		--priority ${priority} \
+		--base-time ${base_time} \
+		--cycle-time ${cycle_time} \
+		--num-frames ${num_pkts} \
+		--frame-size 64 \
+		--txtime \
+		--utc-tai-offset ${UTC_TAI_OFFSET} \
+		--cpu-mask $((1 << ${ISOCHRON_CPU})) \
+		--sched-fifo \
+		--sched-priority 98 \
+		--client 127.0.0.1 \
+		--sync-threshold 5000 \
+		--output-file ${isochron_dat} \
+		${extra_args} \
+		--quiet
+
+	isochron_recv_stop
+
+	cpufreq_restore ${ISOCHRON_CPU}
+}
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index d36b7da5082a..38021a0dd527 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -12,6 +12,9 @@ CONFIG_NF_TABLES=m
 CONFIG_NFT_COMPAT=m
 CONFIG_NETFILTER_XTABLES=m
 CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
 CONFIG_NF_TABLES_INET=y
 CONFIG_NFT_TPROXY=m
 CONFIG_NFT_SOCKET=m
@@ -19,3 +22,8 @@ CONFIG_IP_ADVANCED_ROUTER=y
 CONFIG_IP_MULTIPLE_TABLES=y
 CONFIG_IP_NF_TARGET_REJECT=m
 CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_SCH_INGRESS=m
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index ff821025d309..9dd43d7d957b 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -71,6 +71,43 @@ chk_msk_remote_key_nr()
 		__chk_nr "grep -c remote_key" $*
 }
 
+__chk_listen()
+{
+	local filter="$1"
+	local expected=$2
+
+	shift 2
+	msg=$*
+
+	nr=$(ss -N $ns -Ml "$filter" | grep -c LISTEN)
+	printf "%-50s" "$msg"
+
+	if [ $nr != $expected ]; then
+		echo "[ fail ] expected $expected found $nr"
+		ret=$test_cnt
+	else
+		echo "[  ok  ]"
+	fi
+}
+
+chk_msk_listen()
+{
+	lport=$1
+	local msg="check for listen socket"
+
+	# destination port search should always return empty list
+	__chk_listen "dport $lport" 0 "listen match for dport $lport"
+
+	# should return 'our' mptcp listen socket
+	__chk_listen "sport $lport" 1 "listen match for sport $lport"
+
+	__chk_listen "src inet:0.0.0.0:$lport" 1 "listen match for saddr and sport"
+
+	__chk_listen "" 1 "all listen sockets"
+
+	nr=$(ss -Ml $filter | wc -l)
+}
+
 # $1: ns, $2: port
 wait_local_port_listen()
 {
@@ -113,6 +150,7 @@ echo "a" | \
 				0.0.0.0 >/dev/null &
 wait_local_port_listen $ns 10000
 chk_msk_nr 0 "no msk on netns creation"
+chk_msk_listen 10000
 
 echo "b" | \
 	timeout ${timeout_test} \
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 48ef112f42c2..a4406b7a8064 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -70,6 +70,7 @@ init_partial()
 		ip netns add $netns || exit $ksft_skip
 		ip -net $netns link set lo up
 		ip netns exec $netns sysctl -q net.mptcp.enabled=1
+		ip netns exec $netns sysctl -q net.mptcp.pm_type=0
 		ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
 		ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
 		if [ $checksum -eq 1 ]; then
@@ -266,6 +267,58 @@ reset_with_allow_join_id0()
 	ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
 }
 
+# Modify TCP payload without corrupting the TCP packet
+#
+# This rule inverts a 8-bit word at byte offset 148 for the 2nd TCP ACK packets
+# carrying enough data.
+# Once it is done, the TCP Checksum field is updated so the packet is still
+# considered as valid at the TCP level.
+# Because the MPTCP checksum, covering the TCP options and data, has not been
+# updated, the modification will be detected and an MP_FAIL will be emitted:
+# what we want to validate here without corrupting "random" MPTCP options.
+#
+# To avoid having tc producing this pr_info() message for each TCP ACK packets
+# not carrying enough data:
+#
+#     tc action pedit offset 162 out of bounds
+#
+# Netfilter is used to mark packets with enough data.
+reset_with_fail()
+{
+	reset "${1}" || return 1
+
+	ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1
+	ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1
+
+	check_invert=1
+	validate_checksum=1
+	local i="$2"
+	local ip="${3:-4}"
+	local tables
+
+	tables="iptables"
+	if [ $ip -eq 6 ]; then
+		tables="ip6tables"
+	fi
+
+	ip netns exec $ns2 $tables \
+		-t mangle \
+		-A OUTPUT \
+		-o ns2eth$i \
+		-p tcp \
+		-m length --length 150:9999 \
+		-m statistic --mode nth --packet 1 --every 99999 \
+		-j MARK --set-mark 42 || exit 1
+
+	tc -n $ns2 qdisc add dev ns2eth$i clsact || exit 1
+	tc -n $ns2 filter add dev ns2eth$i egress \
+		protocol ip prio 1000 \
+		handle 42 fw \
+		action pedit munge offset 148 u8 invert \
+		pipe csum tcp \
+		index 100 || exit 1
+}
+
 fail_test()
 {
 	ret=1
@@ -961,6 +1014,7 @@ chk_csum_nr()
 	local csum_ns2=${2:-0}
 	local count
 	local dump_stats
+	local extra_msg=""
 	local allow_multi_errors_ns1=0
 	local allow_multi_errors_ns2=0
 
@@ -976,6 +1030,9 @@ chk_csum_nr()
 	printf "%-${nr_blank}s %s" " " "sum"
 	count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}')
 	[ -z "$count" ] && count=0
+	if [ "$count" != "$csum_ns1" ]; then
+		extra_msg="$extra_msg ns1=$count"
+	fi
 	if { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } ||
 	   { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
 		echo "[fail] got $count data checksum error[s] expected $csum_ns1"
@@ -987,28 +1044,58 @@ chk_csum_nr()
 	echo -n " - csum  "
 	count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtDataCsumErr | awk '{print $2}')
 	[ -z "$count" ] && count=0
+	if [ "$count" != "$csum_ns2" ]; then
+		extra_msg="$extra_msg ns2=$count"
+	fi
 	if { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } ||
 	   { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
 		echo "[fail] got $count data checksum error[s] expected $csum_ns2"
 		fail_test
 		dump_stats=1
 	else
-		echo "[ ok ]"
+		echo -n "[ ok ]"
 	fi
 	[ "${dump_stats}" = 1 ] && dump_stats
+
+	echo "$extra_msg"
 }
 
 chk_fail_nr()
 {
 	local fail_tx=$1
 	local fail_rx=$2
+	local ns_invert=${3:-""}
 	local count
 	local dump_stats
+	local ns_tx=$ns1
+	local ns_rx=$ns2
+	local extra_msg=""
+	local allow_tx_lost=0
+	local allow_rx_lost=0
+
+	if [[ $ns_invert = "invert" ]]; then
+		ns_tx=$ns2
+		ns_rx=$ns1
+		extra_msg=" invert"
+	fi
+
+	if [[ "${fail_tx}" = "-"* ]]; then
+		allow_tx_lost=1
+		fail_tx=${fail_tx:1}
+	fi
+	if [[ "${fail_rx}" = "-"* ]]; then
+		allow_rx_lost=1
+		fail_rx=${fail_rx:1}
+	fi
 
 	printf "%-${nr_blank}s %s" " " "ftx"
-	count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}')
+	count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPFailTx | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$fail_tx" ]; then
+		extra_msg="$extra_msg,tx=$count"
+	fi
+	if { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } ||
+	   { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
 		echo "[fail] got $count MP_FAIL[s] TX expected $fail_tx"
 		fail_test
 		dump_stats=1
@@ -1017,17 +1104,23 @@ chk_fail_nr()
 	fi
 
 	echo -n " - failrx"
-	count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}')
+	count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPFailRx | awk '{print $2}')
 	[ -z "$count" ] && count=0
 	if [ "$count" != "$fail_rx" ]; then
+		extra_msg="$extra_msg,rx=$count"
+	fi
+	if { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } ||
+	   { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
 		echo "[fail] got $count MP_FAIL[s] RX expected $fail_rx"
 		fail_test
 		dump_stats=1
 	else
-		echo "[ ok ]"
+		echo -n "[ ok ]"
 	fi
 
 	[ "${dump_stats}" = 1 ] && dump_stats
+
+	echo "$extra_msg"
 }
 
 chk_fclose_nr()
@@ -1106,6 +1199,38 @@ chk_rst_nr()
 	echo "$extra_msg"
 }
 
+chk_infi_nr()
+{
+	local infi_tx=$1
+	local infi_rx=$2
+	local count
+	local dump_stats
+
+	printf "%-${nr_blank}s %s" " " "itx"
+	count=$(ip netns exec $ns2 nstat -as | grep InfiniteMapTx | awk '{print $2}')
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$infi_tx" ]; then
+		echo "[fail] got $count infinite map[s] TX expected $infi_tx"
+		fail_test
+		dump_stats=1
+	else
+		echo -n "[ ok ]"
+	fi
+
+	echo -n " - infirx"
+	count=$(ip netns exec $ns1 nstat -as | grep InfiniteMapRx | awk '{print $2}')
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$infi_rx" ]; then
+		echo "[fail] got $count infinite map[s] RX expected $infi_rx"
+		fail_test
+		dump_stats=1
+	else
+		echo "[ ok ]"
+	fi
+
+	[ "${dump_stats}" = 1 ] && dump_stats
+}
+
 chk_join_nr()
 {
 	local syn_nr=$1
@@ -1115,7 +1240,8 @@ chk_join_nr()
 	local csum_ns2=${5:-0}
 	local fail_nr=${6:-0}
 	local rst_nr=${7:-0}
-	local corrupted_pkts=${8:-0}
+	local infi_nr=${8:-0}
+	local corrupted_pkts=${9:-0}
 	local count
 	local dump_stats
 	local with_cookie
@@ -1166,10 +1292,11 @@ chk_join_nr()
 		echo "[ ok ]"
 	fi
 	[ "${dump_stats}" = 1 ] && dump_stats
-	if [ $checksum -eq 1 ]; then
+	if [ $validate_checksum -eq 1 ]; then
 		chk_csum_nr $csum_ns1 $csum_ns2
 		chk_fail_nr $fail_nr $fail_nr
 		chk_rst_nr $rst_nr $rst_nr
+		chk_infi_nr $infi_nr $infi_nr
 	fi
 }
 
@@ -1512,6 +1639,13 @@ wait_attempt_fail()
 	return 1
 }
 
+set_userspace_pm()
+{
+	local ns=$1
+
+	ip netns exec $ns sysctl -q net.mptcp.pm_type=1
+}
+
 subflows_tests()
 {
 	if reset "no JOIN"; then
@@ -2583,6 +2717,101 @@ fastclose_tests()
 	fi
 }
 
+pedit_action_pkts()
+{
+	tc -n $ns2 -j -s action show action pedit index 100 | \
+		grep "packets" | \
+		sed 's/.*"packets":\([0-9]\+\),.*/\1/'
+}
+
+fail_tests()
+{
+	# single subflow
+	if reset_with_fail "Infinite map" 1; then
+		run_tests $ns1 $ns2 10.0.1.1 128
+		chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
+		chk_fail_nr 1 -1 invert
+	fi
+
+	# multiple subflows
+	if reset_with_fail "MP_FAIL MP_RST" 2; then
+		tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 1024
+		chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)"
+	fi
+}
+
+userspace_tests()
+{
+	# userspace pm type prevents add_addr
+	if reset "userspace pm type prevents add_addr"; then
+		set_userspace_pm $ns1
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+		chk_add_nr 0 0
+	fi
+
+	# userspace pm type does not echo add_addr without daemon
+	if reset "userspace pm no echo w/o daemon"; then
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 0 2
+		pm_nl_set_limits $ns2 0 2
+		pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+		chk_add_nr 1 0
+	fi
+
+	# userspace pm type rejects join
+	if reset "userspace pm type rejects join"; then
+		set_userspace_pm $ns1
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 1 1 0
+	fi
+
+	# userspace pm type does not send join
+	if reset "userspace pm type does not send join"; then
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1
+		chk_join_nr 0 0 0
+	fi
+
+	# userspace pm type prevents mp_prio
+	if reset "userspace pm type prevents mp_prio"; then
+		set_userspace_pm $ns1
+		pm_nl_set_limits $ns1 1 1
+		pm_nl_set_limits $ns2 1 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 0 0 0 slow backup
+		chk_join_nr 1 1 0
+		chk_prio_nr 0 0
+	fi
+
+	# userspace pm type prevents rm_addr
+	if reset "userspace pm type prevents rm_addr"; then
+		set_userspace_pm $ns1
+		set_userspace_pm $ns2
+		pm_nl_set_limits $ns1 0 1
+		pm_nl_set_limits $ns2 0 1
+		pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+		run_tests $ns1 $ns2 10.0.1.1 0 0 -1 slow
+		chk_join_nr 0 0 0
+		chk_rm_nr 0 0
+	fi
+}
+
 endpoint_tests()
 {
 	# userspace pm type prevents add_addr
@@ -2668,6 +2897,8 @@ all_tests_sorted=(
 	d@deny_join_id0_tests
 	m@fullmesh_tests
 	z@fastclose_tests
+	F@fail_tests
+	u@userspace_tests
 	I@endpoint_tests
 )
 
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index a75a68ad652e..6a2f4b981e1d 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -6,6 +6,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <limits.h>
 
 #include <sys/socket.h>
 #include <sys/types.h>
@@ -21,17 +22,29 @@
 #ifndef MPTCP_PM_NAME
 #define MPTCP_PM_NAME		"mptcp_pm"
 #endif
+#ifndef MPTCP_PM_EVENTS
+#define MPTCP_PM_EVENTS		"mptcp_pm_events"
+#endif
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
 
 static void syntax(char *argv[])
 {
 	fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]);
 	fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
+	fprintf(stderr, "\tann <local-ip> id <local-id> token <token> [port <local-port>] [dev <name>]\n");
+	fprintf(stderr, "\trem id <local-id> token <token>\n");
+	fprintf(stderr, "\tcsf lip <local-ip> lid <local-id> rip <remote-ip> rport <remote-port> token <token>\n");
+	fprintf(stderr, "\tdsf lip <local-ip> lport <local-port> rip <remote-ip> rport <remote-port> token <token>\n");
 	fprintf(stderr, "\tdel <id> [<ip>]\n");
 	fprintf(stderr, "\tget <id>\n");
 	fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>]\n");
 	fprintf(stderr, "\tflush\n");
 	fprintf(stderr, "\tdump\n");
 	fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
+	fprintf(stderr, "\tevents\n");
+	fprintf(stderr, "\tlisten <local-ip> <local-port>\n");
 	exit(0);
 }
 
@@ -83,6 +96,108 @@ static void nl_error(struct nlmsghdr *nh)
 	}
 }
 
+static int capture_events(int fd, int event_group)
+{
+	u_int8_t buffer[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+			NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024];
+	struct genlmsghdr *ghdr;
+	struct rtattr *attrs;
+	struct nlmsghdr *nh;
+	int ret = 0;
+	int res_len;
+	int msg_len;
+	fd_set rfds;
+
+	if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
+		       &event_group, sizeof(event_group)) < 0)
+		error(1, errno, "could not join the " MPTCP_PM_EVENTS " mcast group");
+
+	do {
+		FD_ZERO(&rfds);
+		FD_SET(fd, &rfds);
+		res_len = NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024;
+
+		ret = select(FD_SETSIZE, &rfds, NULL, NULL, NULL);
+
+		if (ret < 0)
+			error(1, ret, "error in select() on NL socket");
+
+		res_len = recv(fd, buffer, res_len, 0);
+		if (res_len < 0)
+			error(1, res_len, "error on recv() from NL socket");
+
+		nh = (struct nlmsghdr *)buffer;
+
+		for (; NLMSG_OK(nh, res_len); nh = NLMSG_NEXT(nh, res_len)) {
+			if (nh->nlmsg_type == NLMSG_ERROR)
+				error(1, NLMSG_ERROR, "received invalid NL message");
+
+			ghdr = (struct genlmsghdr *)NLMSG_DATA(nh);
+
+			if (ghdr->cmd == 0)
+				continue;
+
+			fprintf(stderr, "type:%d", ghdr->cmd);
+
+			msg_len = nh->nlmsg_len - NLMSG_LENGTH(GENL_HDRLEN);
+
+			attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
+			while (RTA_OK(attrs, msg_len)) {
+				if (attrs->rta_type == MPTCP_ATTR_TOKEN)
+					fprintf(stderr, ",token:%u", *(__u32 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_FAMILY)
+					fprintf(stderr, ",family:%u", *(__u16 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_LOC_ID)
+					fprintf(stderr, ",loc_id:%u", *(__u8 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_REM_ID)
+					fprintf(stderr, ",rem_id:%u", *(__u8 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_SADDR4) {
+					u_int32_t saddr4 = ntohl(*(__u32 *)RTA_DATA(attrs));
+
+					fprintf(stderr, ",saddr4:%u.%u.%u.%u", saddr4 >> 24,
+					       (saddr4 >> 16) & 0xFF, (saddr4 >> 8) & 0xFF,
+					       (saddr4 & 0xFF));
+				} else if (attrs->rta_type == MPTCP_ATTR_SADDR6) {
+					char buf[INET6_ADDRSTRLEN];
+
+					if (inet_ntop(AF_INET6, RTA_DATA(attrs), buf,
+						      sizeof(buf)) != NULL)
+						fprintf(stderr, ",saddr6:%s", buf);
+				} else if (attrs->rta_type == MPTCP_ATTR_DADDR4) {
+					u_int32_t daddr4 = ntohl(*(__u32 *)RTA_DATA(attrs));
+
+					fprintf(stderr, ",daddr4:%u.%u.%u.%u", daddr4 >> 24,
+					       (daddr4 >> 16) & 0xFF, (daddr4 >> 8) & 0xFF,
+					       (daddr4 & 0xFF));
+				} else if (attrs->rta_type == MPTCP_ATTR_DADDR6) {
+					char buf[INET6_ADDRSTRLEN];
+
+					if (inet_ntop(AF_INET6, RTA_DATA(attrs), buf,
+						      sizeof(buf)) != NULL)
+						fprintf(stderr, ",daddr6:%s", buf);
+				} else if (attrs->rta_type == MPTCP_ATTR_SPORT)
+					fprintf(stderr, ",sport:%u",
+						ntohs(*(__u16 *)RTA_DATA(attrs)));
+				else if (attrs->rta_type == MPTCP_ATTR_DPORT)
+					fprintf(stderr, ",dport:%u",
+						ntohs(*(__u16 *)RTA_DATA(attrs)));
+				else if (attrs->rta_type == MPTCP_ATTR_BACKUP)
+					fprintf(stderr, ",backup:%u", *(__u8 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_ERROR)
+					fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs));
+				else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE)
+					fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs));
+
+				attrs = RTA_NEXT(attrs, msg_len);
+			}
+		}
+		fprintf(stderr, "\n");
+	} while (1);
+
+	return 0;
+}
+
 /* do a netlink command and, if max > 0, fetch the reply  */
 static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max)
 {
@@ -116,11 +231,18 @@ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max)
 	return ret;
 }
 
-static int genl_parse_getfamily(struct nlmsghdr *nlh)
+static int genl_parse_getfamily(struct nlmsghdr *nlh, int *pm_family,
+				int *events_mcast_grp)
 {
 	struct genlmsghdr *ghdr = NLMSG_DATA(nlh);
 	int len = nlh->nlmsg_len;
 	struct rtattr *attrs;
+	struct rtattr *grps;
+	struct rtattr *grp;
+	int got_events_grp;
+	int got_family;
+	int grps_len;
+	int grp_len;
 
 	if (nlh->nlmsg_type != GENL_ID_CTRL)
 		error(1, errno, "Not a controller message, len=%d type=0x%x\n",
@@ -135,9 +257,42 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh)
 		error(1, errno, "Unknown controller command %d\n", ghdr->cmd);
 
 	attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
+	got_family = 0;
+	got_events_grp = 0;
+
 	while (RTA_OK(attrs, len)) {
-		if (attrs->rta_type == CTRL_ATTR_FAMILY_ID)
-			return *(__u16 *)RTA_DATA(attrs);
+		if (attrs->rta_type == CTRL_ATTR_FAMILY_ID) {
+			*pm_family = *(__u16 *)RTA_DATA(attrs);
+			got_family = 1;
+		} else if (attrs->rta_type == CTRL_ATTR_MCAST_GROUPS) {
+			grps = RTA_DATA(attrs);
+			grps_len = RTA_PAYLOAD(attrs);
+
+			while (RTA_OK(grps, grps_len)) {
+				grp = RTA_DATA(grps);
+				grp_len = RTA_PAYLOAD(grps);
+				got_events_grp = 0;
+
+				while (RTA_OK(grp, grp_len)) {
+					if (grp->rta_type == CTRL_ATTR_MCAST_GRP_ID)
+						*events_mcast_grp = *(__u32 *)RTA_DATA(grp);
+					else if (grp->rta_type == CTRL_ATTR_MCAST_GRP_NAME &&
+						 !strcmp(RTA_DATA(grp), MPTCP_PM_EVENTS))
+						got_events_grp = 1;
+
+					grp = RTA_NEXT(grp, grp_len);
+				}
+
+				if (got_events_grp)
+					break;
+
+				grps = RTA_NEXT(grps, grps_len);
+			}
+		}
+
+		if (got_family && got_events_grp)
+			return 0;
+
 		attrs = RTA_NEXT(attrs, len);
 	}
 
@@ -145,7 +300,7 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh)
 	return -1;
 }
 
-static int resolve_mptcp_pm_netlink(int fd)
+static int resolve_mptcp_pm_netlink(int fd, int *pm_family, int *events_mcast_grp)
 {
 	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
 		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
@@ -167,7 +322,421 @@ static int resolve_mptcp_pm_netlink(int fd)
 	off += NLMSG_ALIGN(rta->rta_len);
 
 	do_nl_req(fd, nh, off, sizeof(data));
-	return genl_parse_getfamily((void *)data);
+	return genl_parse_getfamily((void *)data, pm_family, events_mcast_grp);
+}
+
+int dsf(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	struct rtattr *rta, *addr;
+	u_int16_t family, port;
+	struct nlmsghdr *nh;
+	u_int32_t token;
+	int addr_start;
+	int off = 0;
+	int arg;
+
+	const char *params[5];
+
+	memset(params, 0, 5 * sizeof(const char *));
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SUBFLOW_DESTROY,
+			    MPTCP_PM_VER);
+
+	if (argc < 12)
+		syntax(argv);
+
+	/* Params recorded in this order:
+	 * <local-ip>, <local-port>, <remote-ip>, <remote-port>, <token>
+	 */
+	for (arg = 2; arg < argc; arg++) {
+		if (!strcmp(argv[arg], "lip")) {
+			if (++arg >= argc)
+				error(1, 0, " missing local IP");
+
+			params[0] = argv[arg];
+		} else if (!strcmp(argv[arg], "lport")) {
+			if (++arg >= argc)
+				error(1, 0, " missing local port");
+
+			params[1] = argv[arg];
+		} else if (!strcmp(argv[arg], "rip")) {
+			if (++arg >= argc)
+				error(1, 0, " missing remote IP");
+
+			params[2] = argv[arg];
+		} else if (!strcmp(argv[arg], "rport")) {
+			if (++arg >= argc)
+				error(1, 0, " missing remote port");
+
+			params[3] = argv[arg];
+		} else if (!strcmp(argv[arg], "token")) {
+			if (++arg >= argc)
+				error(1, 0, " missing token");
+
+			params[4] = argv[arg];
+		} else
+			error(1, 0, "unknown keyword %s", argv[arg]);
+	}
+
+	for (arg = 0; arg < 4; arg = arg + 2) {
+		/*  addr header */
+		addr_start = off;
+		addr = (void *)(data + off);
+		addr->rta_type = NLA_F_NESTED |
+			((arg == 0) ? MPTCP_PM_ATTR_ADDR : MPTCP_PM_ATTR_ADDR_REMOTE);
+		addr->rta_len = RTA_LENGTH(0);
+		off += NLMSG_ALIGN(addr->rta_len);
+
+		/*  addr data */
+		rta = (void *)(data + off);
+		if (inet_pton(AF_INET, params[arg], RTA_DATA(rta))) {
+			family = AF_INET;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+			rta->rta_len = RTA_LENGTH(4);
+		} else if (inet_pton(AF_INET6, params[arg], RTA_DATA(rta))) {
+			family = AF_INET6;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+			rta->rta_len = RTA_LENGTH(16);
+		} else
+			error(1, errno, "can't parse ip %s", params[arg]);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		/* family */
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+		rta->rta_len = RTA_LENGTH(2);
+		memcpy(RTA_DATA(rta), &family, 2);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		/*  port */
+		port = atoi(params[arg + 1]);
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+		rta->rta_len = RTA_LENGTH(2);
+		memcpy(RTA_DATA(rta), &port, 2);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		addr->rta_len = off - addr_start;
+	}
+
+	/* token */
+	token = atoi(params[4]);
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+	rta->rta_len = RTA_LENGTH(4);
+	memcpy(RTA_DATA(rta), &token, 4);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	do_nl_req(fd, nh, off, 0);
+
+	return 0;
+}
+
+int csf(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	const char *params[5];
+	struct nlmsghdr *nh;
+	struct rtattr *addr;
+	struct rtattr *rta;
+	u_int16_t family;
+	u_int32_t token;
+	u_int16_t port;
+	int addr_start;
+	u_int8_t id;
+	int off = 0;
+	int arg;
+
+	memset(params, 0, 5 * sizeof(const char *));
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SUBFLOW_CREATE,
+			    MPTCP_PM_VER);
+
+	if (argc < 12)
+		syntax(argv);
+
+	/* Params recorded in this order:
+	 * <local-ip>, <local-id>, <remote-ip>, <remote-port>, <token>
+	 */
+	for (arg = 2; arg < argc; arg++) {
+		if (!strcmp(argv[arg], "lip")) {
+			if (++arg >= argc)
+				error(1, 0, " missing local IP");
+
+			params[0] = argv[arg];
+		} else if (!strcmp(argv[arg], "lid")) {
+			if (++arg >= argc)
+				error(1, 0, " missing local id");
+
+			params[1] = argv[arg];
+		} else if (!strcmp(argv[arg], "rip")) {
+			if (++arg >= argc)
+				error(1, 0, " missing remote ip");
+
+			params[2] = argv[arg];
+		} else if (!strcmp(argv[arg], "rport")) {
+			if (++arg >= argc)
+				error(1, 0, " missing remote port");
+
+			params[3] = argv[arg];
+		} else if (!strcmp(argv[arg], "token")) {
+			if (++arg >= argc)
+				error(1, 0, " missing token");
+
+			params[4] = argv[arg];
+		} else
+			error(1, 0, "unknown param %s", argv[arg]);
+	}
+
+	for (arg = 0; arg < 4; arg = arg + 2) {
+		/*  addr header */
+		addr_start = off;
+		addr = (void *)(data + off);
+		addr->rta_type = NLA_F_NESTED |
+			((arg == 0) ? MPTCP_PM_ATTR_ADDR : MPTCP_PM_ATTR_ADDR_REMOTE);
+		addr->rta_len = RTA_LENGTH(0);
+		off += NLMSG_ALIGN(addr->rta_len);
+
+		/*  addr data */
+		rta = (void *)(data + off);
+		if (inet_pton(AF_INET, params[arg], RTA_DATA(rta))) {
+			family = AF_INET;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+			rta->rta_len = RTA_LENGTH(4);
+		} else if (inet_pton(AF_INET6, params[arg], RTA_DATA(rta))) {
+			family = AF_INET6;
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+			rta->rta_len = RTA_LENGTH(16);
+		} else
+			error(1, errno, "can't parse ip %s", params[arg]);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		/* family */
+		rta = (void *)(data + off);
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+		rta->rta_len = RTA_LENGTH(2);
+		memcpy(RTA_DATA(rta), &family, 2);
+		off += NLMSG_ALIGN(rta->rta_len);
+
+		if (arg == 2) {
+			/*  port */
+			port = atoi(params[arg + 1]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+			rta->rta_len = RTA_LENGTH(2);
+			memcpy(RTA_DATA(rta), &port, 2);
+			off += NLMSG_ALIGN(rta->rta_len);
+		}
+
+		if (arg == 0) {
+			/* id */
+			id = atoi(params[arg + 1]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+			rta->rta_len = RTA_LENGTH(1);
+			memcpy(RTA_DATA(rta), &id, 1);
+			off += NLMSG_ALIGN(rta->rta_len);
+		}
+
+		addr->rta_len = off - addr_start;
+	}
+
+	/* token */
+	token = atoi(params[4]);
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+	rta->rta_len = RTA_LENGTH(4);
+	memcpy(RTA_DATA(rta), &token, 4);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	do_nl_req(fd, nh, off, 0);
+
+	return 0;
+}
+
+int remove_addr(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	struct nlmsghdr *nh;
+	struct rtattr *rta;
+	u_int32_t token;
+	u_int8_t id;
+	int off = 0;
+	int arg;
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_REMOVE,
+			    MPTCP_PM_VER);
+
+	if (argc < 6)
+		syntax(argv);
+
+	for (arg = 2; arg < argc; arg++) {
+		if (!strcmp(argv[arg], "id")) {
+			if (++arg >= argc)
+				error(1, 0, " missing id value");
+
+			id = atoi(argv[arg]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ATTR_LOC_ID;
+			rta->rta_len = RTA_LENGTH(1);
+			memcpy(RTA_DATA(rta), &id, 1);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "token")) {
+			if (++arg >= argc)
+				error(1, 0, " missing token value");
+
+			token = atoi(argv[arg]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+			rta->rta_len = RTA_LENGTH(4);
+			memcpy(RTA_DATA(rta), &token, 4);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else
+			error(1, 0, "unknown keyword %s", argv[arg]);
+	}
+
+	do_nl_req(fd, nh, off, 0);
+	return 0;
+}
+
+int announce_addr(int fd, int pm_family, int argc, char *argv[])
+{
+	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+		  NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+		  1024];
+	u_int32_t flags = MPTCP_PM_ADDR_FLAG_SIGNAL;
+	u_int32_t token = UINT_MAX;
+	struct rtattr *rta, *addr;
+	u_int32_t id = UINT_MAX;
+	struct nlmsghdr *nh;
+	u_int16_t family;
+	int addr_start;
+	int off = 0;
+	int arg;
+
+	memset(data, 0, sizeof(data));
+	nh = (void *)data;
+	off = init_genl_req(data, pm_family, MPTCP_PM_CMD_ANNOUNCE,
+			    MPTCP_PM_VER);
+
+	if (argc < 7)
+		syntax(argv);
+
+	/* local-ip header */
+	addr_start = off;
+	addr = (void *)(data + off);
+	addr->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR;
+	addr->rta_len = RTA_LENGTH(0);
+	off += NLMSG_ALIGN(addr->rta_len);
+
+	/* local-ip data */
+	/* record addr type */
+	rta = (void *)(data + off);
+	if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) {
+		family = AF_INET;
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+		rta->rta_len = RTA_LENGTH(4);
+	} else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) {
+		family = AF_INET6;
+		rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+		rta->rta_len = RTA_LENGTH(16);
+	} else
+		error(1, errno, "can't parse ip %s", argv[2]);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	/* addr family */
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+	rta->rta_len = RTA_LENGTH(2);
+	memcpy(RTA_DATA(rta), &family, 2);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	for (arg = 3; arg < argc; arg++) {
+		if (!strcmp(argv[arg], "id")) {
+			/* local-id */
+			if (++arg >= argc)
+				error(1, 0, " missing id value");
+
+			id = atoi(argv[arg]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+			rta->rta_len = RTA_LENGTH(1);
+			memcpy(RTA_DATA(rta), &id, 1);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "dev")) {
+			/* for the if_index */
+			int32_t ifindex;
+
+			if (++arg >= argc)
+				error(1, 0, " missing dev name");
+
+			ifindex = if_nametoindex(argv[arg]);
+			if (!ifindex)
+				error(1, errno, "unknown device %s", argv[arg]);
+
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_IF_IDX;
+			rta->rta_len = RTA_LENGTH(4);
+			memcpy(RTA_DATA(rta), &ifindex, 4);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "port")) {
+			/* local-port (optional) */
+			u_int16_t port;
+
+			if (++arg >= argc)
+				error(1, 0, " missing port value");
+
+			port = atoi(argv[arg]);
+			rta = (void *)(data + off);
+			rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+			rta->rta_len = RTA_LENGTH(2);
+			memcpy(RTA_DATA(rta), &port, 2);
+			off += NLMSG_ALIGN(rta->rta_len);
+		} else if (!strcmp(argv[arg], "token")) {
+			/* MPTCP connection token */
+			if (++arg >= argc)
+				error(1, 0, " missing token value");
+
+			token = atoi(argv[arg]);
+		} else
+			error(1, 0, "unknown keyword %s", argv[arg]);
+	}
+
+	/* addr flags */
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
+	rta->rta_len = RTA_LENGTH(4);
+	memcpy(RTA_DATA(rta), &flags, 4);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	addr->rta_len = off - addr_start;
+
+	if (id == UINT_MAX || token == UINT_MAX)
+		error(1, 0, " missing mandatory inputs");
+
+	/* token */
+	rta = (void *)(data + off);
+	rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+	rta->rta_len = RTA_LENGTH(4);
+	memcpy(RTA_DATA(rta), &token, 4);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	do_nl_req(fd, nh, off, 0);
+
+	return 0;
 }
 
 int add_addr(int fd, int pm_family, int argc, char *argv[])
@@ -654,6 +1223,54 @@ int get_set_limits(int fd, int pm_family, int argc, char *argv[])
 	return 0;
 }
 
+int add_listener(int argc, char *argv[])
+{
+	struct sockaddr_storage addr;
+	struct sockaddr_in6 *a6;
+	struct sockaddr_in *a4;
+	u_int16_t family;
+	int enable = 1;
+	int sock;
+	int err;
+
+	if (argc < 4)
+		syntax(argv);
+
+	memset(&addr, 0, sizeof(struct sockaddr_storage));
+	a4 = (struct sockaddr_in *)&addr;
+	a6 = (struct sockaddr_in6 *)&addr;
+
+	if (inet_pton(AF_INET, argv[2], &a4->sin_addr)) {
+		family = AF_INET;
+		a4->sin_family = family;
+		a4->sin_port = htons(atoi(argv[3]));
+	} else if (inet_pton(AF_INET6, argv[2], &a6->sin6_addr)) {
+		family = AF_INET6;
+		a6->sin6_family = family;
+		a6->sin6_port = htons(atoi(argv[3]));
+	} else
+		error(1, errno, "can't parse ip %s", argv[2]);
+
+	sock = socket(family, SOCK_STREAM, IPPROTO_MPTCP);
+	if (sock < 0)
+		error(1, errno, "can't create listener sock\n");
+
+	if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable))) {
+		close(sock);
+		error(1, errno, "can't set SO_REUSEADDR on listener sock\n");
+	}
+
+	err = bind(sock, (struct sockaddr *)&addr,
+		   ((family == AF_INET) ? sizeof(struct sockaddr_in) :
+		    sizeof(struct sockaddr_in6)));
+
+	if (err == 0 && listen(sock, 30) == 0)
+		pause();
+
+	close(sock);
+	return 0;
+}
+
 int set_flags(int fd, int pm_family, int argc, char *argv[])
 {
 	char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
@@ -773,7 +1390,9 @@ int set_flags(int fd, int pm_family, int argc, char *argv[])
 
 int main(int argc, char *argv[])
 {
-	int fd, pm_family;
+	int events_mcast_grp;
+	int pm_family;
+	int fd;
 
 	if (argc < 2)
 		syntax(argv);
@@ -782,10 +1401,18 @@ int main(int argc, char *argv[])
 	if (fd == -1)
 		error(1, errno, "socket netlink");
 
-	pm_family = resolve_mptcp_pm_netlink(fd);
+	resolve_mptcp_pm_netlink(fd, &pm_family, &events_mcast_grp);
 
 	if (!strcmp(argv[1], "add"))
 		return add_addr(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "ann"))
+		return announce_addr(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "rem"))
+		return remove_addr(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "csf"))
+		return csf(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "dsf"))
+		return dsf(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "del"))
 		return del_addr(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "flush"))
@@ -798,6 +1425,10 @@ int main(int argc, char *argv[])
 		return get_set_limits(fd, pm_family, argc, argv);
 	else if (!strcmp(argv[1], "set"))
 		return set_flags(fd, pm_family, argc, argv);
+	else if (!strcmp(argv[1], "events"))
+		return capture_events(fd, events_mcast_grp);
+	else if (!strcmp(argv[1], "listen"))
+		return add_listener(argc, argv);
 
 	fprintf(stderr, "unknown sub-command: %s", argv[1]);
 	syntax(argv);
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
new file mode 100755
index 000000000000..78d0bb640b11
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -0,0 +1,779 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Cannot not run test without ip tool"
+	exit 1
+fi
+
+ANNOUNCED=6        # MPTCP_EVENT_ANNOUNCED
+REMOVED=7          # MPTCP_EVENT_REMOVED
+SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
+SUB_CLOSED=11      # MPTCP_EVENT_SUB_CLOSED
+
+AF_INET=2
+AF_INET6=10
+
+evts_pid=0
+client4_pid=0
+server4_pid=0
+client6_pid=0
+server6_pid=0
+client4_token=""
+server4_token=""
+client6_token=""
+server6_token=""
+client4_port=0;
+client6_port=0;
+app4_port=50002
+new4_port=50003
+app6_port=50004
+client_addr_id=${RANDOM:0:2}
+server_addr_id=${RANDOM:0:2}
+
+sec=$(date +%s)
+rndh=$(stdbuf -o0 -e0 printf %x "$sec")-$(mktemp -u XXXXXX)
+ns1="ns1-$rndh"
+ns2="ns2-$rndh"
+
+cleanup()
+{
+	echo "cleanup"
+
+	rm -rf $file
+
+	# Terminate the MPTCP connection and related processes
+	if [ $client4_pid -ne 0 ]; then
+		kill -SIGUSR1 $client4_pid > /dev/null 2>&1
+	fi
+	if [ $server4_pid -ne 0 ]; then
+		kill $server4_pid > /dev/null 2>&1
+	fi
+	if [ $client6_pid -ne 0 ]; then
+		kill -SIGUSR1 $client6_pid > /dev/null 2>&1
+	fi
+	if [ $server6_pid -ne 0 ]; then
+		kill $server6_pid > /dev/null 2>&1
+	fi
+	if [ $evts_pid -ne 0 ]; then
+		kill $evts_pid > /dev/null 2>&1
+	fi
+	local netns
+	for netns in "$ns1" "$ns2" ;do
+		ip netns del "$netns"
+	done
+}
+
+trap cleanup EXIT
+
+# Create and configure network namespaces for testing
+for i in "$ns1" "$ns2" ;do
+	ip netns add "$i" || exit 1
+	ip -net "$i" link set lo up
+	ip netns exec "$i" sysctl -q net.mptcp.enabled=1
+	ip netns exec "$i" sysctl -q net.mptcp.pm_type=1
+done
+
+#  "$ns1"              ns2
+#     ns1eth2    ns2eth1
+
+ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
+
+# Add IPv4/v6 addresses to the namespaces
+ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth2
+ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2
+ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth2 nodad
+ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad
+ip -net "$ns1" link set ns1eth2 up
+
+ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
+ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth1
+ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
+ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad
+ip -net "$ns2" link set ns2eth1 up
+
+stdbuf -o0 -e0 printf "Created network namespaces ns1, ns2         \t\t\t[OK]\n"
+
+make_file()
+{
+	# Store a chunk of data in a file to transmit over an MPTCP connection
+	local name=$1
+	local ksize=1
+
+	dd if=/dev/urandom of="$name" bs=2 count=$ksize 2> /dev/null
+	echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+}
+
+make_connection()
+{
+	local file
+	file=$(mktemp)
+	make_file "$file" "client"
+
+	local is_v6=$1
+	local app_port=$app4_port
+	local connect_addr="10.0.1.1"
+	local listen_addr="0.0.0.0"
+	if [ "$is_v6" = "v6" ]
+	then
+		connect_addr="dead:beef:1::1"
+		listen_addr="::"
+		app_port=$app6_port
+	else
+		is_v6="v4"
+	fi
+
+	# Capture netlink events over the two network namespaces running
+	# the MPTCP client and server
+	local client_evts
+	client_evts=$(mktemp)
+	:>"$client_evts"
+	ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 &
+	local client_evts_pid=$!
+	local server_evts
+	server_evts=$(mktemp)
+	:>"$server_evts"
+	ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 &
+	local server_evts_pid=$!
+	sleep 0.5
+
+	# Run the server
+	ip netns exec "$ns1" \
+	   ./mptcp_connect -s MPTCP -w 300 -p $app_port -l $listen_addr > /dev/null 2>&1 &
+	local server_pid=$!
+	sleep 0.5
+
+	# Run the client, transfer $file and stay connected to the server
+	# to conduct tests
+	ip netns exec "$ns2" \
+	   ./mptcp_connect -s MPTCP -w 300 -m sendfile -p $app_port $connect_addr\
+	   2>&1 > /dev/null < "$file" &
+	local client_pid=$!
+	sleep 1
+
+	# Capture client/server attributes from MPTCP connection netlink events
+	kill $client_evts_pid
+
+	local client_token
+	local client_port
+	local client_serverside
+	local server_token
+	local server_serverside
+
+	client_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+	client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts")
+	client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\
+				      "$client_evts")
+	kill $server_evts_pid
+	server_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts")
+	server_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\
+				      "$server_evts")
+	rm -f "$client_evts" "$server_evts" "$file"
+
+	if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
+		   [ "$server_serverside" = 1 ]
+	then
+		stdbuf -o0 -e0 printf "Established IP%s MPTCP Connection ns2 => ns1    \t\t[OK]\n" $is_v6
+	else
+		exit 1
+	fi
+
+	if [ "$is_v6" = "v6" ]
+	then
+		client6_token=$client_token
+		server6_token=$server_token
+		client6_port=$client_port
+		client6_pid=$client_pid
+		server6_pid=$server_pid
+	else
+		client4_token=$client_token
+		server4_token=$server_token
+		client4_port=$client_port
+		client4_pid=$client_pid
+		server4_pid=$server_pid
+	fi
+}
+
+verify_announce_event()
+{
+	local evt=$1
+	local e_type=$2
+	local e_token=$3
+	local e_addr=$4
+	local e_id=$5
+	local e_dport=$6
+	local e_af=$7
+	local type
+	local token
+	local addr
+	local dport
+	local id
+
+	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	if [ "$e_af" = "v6" ]
+	then
+		addr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
+	else
+		addr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
+	fi
+	dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
+		   [ "$addr" = "$e_addr" ] && [ "$dport" = "$e_dport" ] &&
+		   [ "$id" = "$e_id" ]
+	then
+		stdbuf -o0 -e0 printf "[OK]\n"
+		return 0
+	fi
+	stdbuf -o0 -e0 printf "[FAIL]\n"
+	exit 1
+}
+
+test_announce()
+{
+	local evts
+	evts=$(mktemp)
+	# Capture events on the network namespace running the server
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 &
+	evts_pid=$!
+	sleep 0.5
+
+	# ADD_ADDR using an invalid token should result in no action
+	local invalid_token=$(( client4_token - 1))
+	ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token $invalid_token id\
+	   $client_addr_id dev ns2eth1 > /dev/null 2>&1
+
+	local type
+	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+	stdbuf -o0 -e0 printf "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token    \t\t"
+	if [ "$type" = "" ]
+	then
+		stdbuf -o0 -e0 printf "[OK]\n"
+	else
+		stdbuf -o0 -e0 printf "[FAIL]\n"
+		exit 1
+	fi
+
+	# ADD_ADDR from the client to server machine reusing the subflow port
+	:>"$evts"
+	ip netns exec "$ns2"\
+	   ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\
+	   ns2eth1 > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, reuse port \t\t" $client_addr_id
+	sleep 0.5
+	verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2" "$client_addr_id"\
+			      "$client4_port"
+
+	# ADD_ADDR6 from the client to server machine reusing the subflow port
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl ann\
+	   dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1 > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::2 (ns2) => ns1, reuse port\t\t" $client_addr_id
+	sleep 0.5
+	verify_announce_event "$evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\
+			      "$client_addr_id" "$client6_port" "v6"
+
+	# ADD_ADDR from the client to server machine using a new port
+	:>"$evts"
+	client_addr_id=$((client_addr_id+1))
+	ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
+	   $client_addr_id dev ns2eth1 port $new4_port > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.2 (ns2) => ns1, new port \t\t\t" $client_addr_id
+	sleep 0.5
+	verify_announce_event "$evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\
+			      "$client_addr_id" "$new4_port"
+
+	kill $evts_pid
+
+	# Capture events on the network namespace running the client
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 &
+	evts_pid=$!
+	sleep 0.5
+
+	# ADD_ADDR from the server to client machine reusing the subflow port
+	ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+	   $server_addr_id dev ns1eth2 > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, reuse port \t\t" $server_addr_id
+	sleep 0.5
+	verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
+			      "$server_addr_id" "$app4_port"
+
+	# ADD_ADDR6 from the server to client machine reusing the subflow port
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\
+	   $server_addr_id dev ns1eth2 > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "ADD_ADDR6 id:%d dead:beef:2::1 (ns1) => ns2, reuse port\t\t" $server_addr_id
+	sleep 0.5
+	verify_announce_event "$evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\
+			      "$server_addr_id" "$app6_port" "v6"
+
+	# ADD_ADDR from the server to client machine using a new port
+	:>"$evts"
+	server_addr_id=$((server_addr_id+1))
+	ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+	   $server_addr_id dev ns1eth2 port $new4_port > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "ADD_ADDR id:%d 10.0.2.1 (ns1) => ns2, new port \t\t\t" $server_addr_id
+	sleep 0.5
+	verify_announce_event "$evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
+			      "$server_addr_id" "$new4_port"
+
+	kill $evts_pid
+	rm -f "$evts"
+}
+
+verify_remove_event()
+{
+	local evt=$1
+	local e_type=$2
+	local e_token=$3
+	local e_id=$4
+	local type
+	local token
+	local id
+
+	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
+		   [ "$id" = "$e_id" ]
+	then
+		stdbuf -o0 -e0 printf "[OK]\n"
+		return 0
+	fi
+	stdbuf -o0 -e0 printf "[FAIL]\n"
+	exit 1
+}
+
+test_remove()
+{
+	local evts
+	evts=$(mktemp)
+
+	# Capture events on the network namespace running the server
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 &
+	evts_pid=$!
+	sleep 0.5
+
+	# RM_ADDR using an invalid token should result in no action
+	local invalid_token=$(( client4_token - 1 ))
+	ip netns exec "$ns2" ./pm_nl_ctl rem token $invalid_token id\
+	   $client_addr_id > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid token                    \t"\
+	       $client_addr_id
+	local type
+	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+	if [ "$type" = "" ]
+	then
+		stdbuf -o0 -e0 printf "[OK]\n"
+	else
+		stdbuf -o0 -e0 printf "[FAIL]\n"
+	fi
+
+	# RM_ADDR using an invalid addr id should result in no action
+	local invalid_id=$(( client_addr_id + 1 ))
+	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
+	   $invalid_id > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1, invalid id                    \t"\
+	       $invalid_id
+	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+	if [ "$type" = "" ]
+	then
+		stdbuf -o0 -e0 printf "[OK]\n"
+	else
+		stdbuf -o0 -e0 printf "[FAIL]\n"
+	fi
+
+	# RM_ADDR from the client to server machine
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
+	   $client_addr_id > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1                                \t"\
+	       $client_addr_id
+	sleep 0.5
+	verify_remove_event "$evts" "$REMOVED" "$server4_token" "$client_addr_id"
+
+	# RM_ADDR from the client to server machine
+	:>"$evts"
+	client_addr_id=$(( client_addr_id - 1 ))
+	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
+	   $client_addr_id > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns2 => ns1                                \t"\
+	       $client_addr_id
+	sleep 0.5
+	verify_remove_event "$evts" "$REMOVED" "$server4_token" "$client_addr_id"
+
+	# RM_ADDR6 from the client to server machine
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\
+	   $client_addr_id > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns2 => ns1                               \t"\
+	       $client_addr_id
+	sleep 0.5
+	verify_remove_event "$evts" "$REMOVED" "$server6_token" "$client_addr_id"
+
+	kill $evts_pid
+
+	# Capture events on the network namespace running the client
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 &
+	evts_pid=$!
+	sleep 0.5
+
+	# RM_ADDR from the server to client machine
+	ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
+	   $server_addr_id > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2                                \t"\
+	       $server_addr_id
+	sleep 0.5
+	verify_remove_event "$evts" "$REMOVED" "$client4_token" "$server_addr_id"
+
+	# RM_ADDR from the server to client machine
+	:>"$evts"
+	server_addr_id=$(( server_addr_id - 1 ))
+	ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
+	   $server_addr_id > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "RM_ADDR id:%d ns1 => ns2                                \t" $server_addr_id
+	sleep 0.5
+	verify_remove_event "$evts" "$REMOVED" "$client4_token" "$server_addr_id"
+
+	# RM_ADDR6 from the server to client machine
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\
+	   $server_addr_id > /dev/null 2>&1
+	stdbuf -o0 -e0 printf "RM_ADDR6 id:%d ns1 => ns2                               \t" $server_addr_id
+	sleep 0.5
+	verify_remove_event "$evts" "$REMOVED" "$client6_token" "$server_addr_id"
+
+	kill $evts_pid
+	rm -f "$evts"
+}
+
+verify_subflow_events()
+{
+	local evt=$1
+	local e_type=$2
+	local e_token=$3
+	local e_family=$4
+	local e_saddr=$5
+	local e_daddr=$6
+	local e_dport=$7
+	local e_locid=$8
+	local e_remid=$9
+	shift 2
+	local e_from=$8
+	local e_to=$9
+	local type
+	local token
+	local family
+	local saddr
+	local daddr
+	local dport
+	local locid
+	local remid
+
+	if [ "$e_type" = "$SUB_ESTABLISHED" ]
+	then
+		if [ "$e_family" = "$AF_INET6" ]
+		then
+			stdbuf -o0 -e0 printf "CREATE_SUBFLOW6 %s (%s) => %s (%s)    "\
+			       "$e_saddr" "$e_from" "$e_daddr" "$e_to"
+		else
+			stdbuf -o0 -e0 printf "CREATE_SUBFLOW %s (%s) => %s (%s)         \t"\
+			       "$e_saddr" "$e_from" "$e_daddr" "$e_to"
+		fi
+	else
+		if [ "$e_family" = "$AF_INET6" ]
+		then
+			stdbuf -o0 -e0 printf "DESTROY_SUBFLOW6 %s (%s) => %s (%s)   "\
+			       "$e_saddr" "$e_from" "$e_daddr" "$e_to"
+		else
+			stdbuf -o0 -e0 printf "DESTROY_SUBFLOW %s (%s) => %s (%s)         \t"\
+			       "$e_saddr" "$e_from" "$e_daddr" "$e_to"
+		fi
+	fi
+
+	type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	family=$(sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	locid=$(sed --unbuffered -n 's/.*\(loc_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	remid=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt")
+	if [ "$family" = "$AF_INET6" ]
+	then
+		saddr=$(sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
+		daddr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt")
+	else
+		saddr=$(sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
+		daddr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt")
+	fi
+
+	if [ "$type" = "$e_type" ] && [ "$token" = "$e_token" ] &&
+		   [ "$daddr" = "$e_daddr" ] && [ "$e_dport" = "$dport" ] &&
+		   [ "$family" = "$e_family" ] && [ "$saddr" = "$e_saddr" ] &&
+		   [ "$e_locid" = "$locid" ] && [ "$e_remid" = "$remid" ]
+	then
+		stdbuf -o0 -e0 printf "[OK]\n"
+		return 0
+	fi
+	stdbuf -o0 -e0 printf "[FAIL]\n"
+	exit 1
+}
+
+test_subflows()
+{
+	local evts
+	evts=$(mktemp)
+	# Capture events on the network namespace running the server
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl events >> "$evts" 2>&1 &
+	evts_pid=$!
+	sleep 0.5
+
+	# Attempt to add a listener at 10.0.2.2:<subflow-port>
+	ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\
+	   "$client4_port" > /dev/null 2>&1 &
+	local listener_pid=$!
+
+	# ADD_ADDR from client to server machine reusing the subflow port
+	ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
+	   $client_addr_id > /dev/null 2>&1
+	sleep 0.5
+
+	# CREATE_SUBFLOW from server to client machine
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\
+	   rport "$client4_port" token "$server4_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET" "10.0.2.1"\
+			      "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"
+
+	# Delete the listener from the client ns, if one was created
+	kill $listener_pid > /dev/null 2>&1
+
+	local sport
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+	# DESTROY_SUBFLOW from server to client machine
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\
+	   "$client4_port" token "$server4_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\
+			      "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"
+
+	# RM_ADDR from client to server machine
+	ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
+	   "$client4_token" > /dev/null 2>&1
+	sleep 0.5
+
+	# Attempt to add a listener at dead:beef:2::2:<subflow-port>
+	ip netns exec "$ns2" ./pm_nl_ctl listen dead:beef:2::2\
+	   "$client6_port" > /dev/null 2>&1 &
+	listener_pid=$!
+
+	# ADD_ADDR6 from client to server machine reusing the subflow port
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl ann dead:beef:2::2 token "$client6_token" id\
+	   $client_addr_id > /dev/null 2>&1
+	sleep 0.5
+
+	# CREATE_SUBFLOW6 from server to client machine
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl csf lip dead:beef:2::1 lid 23 rip\
+	   dead:beef:2::2 rport "$client6_port" token "$server6_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server6_token" "$AF_INET6"\
+			      "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\
+			      "$client_addr_id" "ns1" "ns2"
+
+	# Delete the listener from the client ns, if one was created
+	kill $listener_pid > /dev/null 2>&1
+
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+	# DESTROY_SUBFLOW6 from server to client machine
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl dsf lip dead:beef:2::1 lport "$sport" rip\
+	   dead:beef:2::2 rport "$client6_port" token "$server6_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_CLOSED" "$server6_token" "$AF_INET6"\
+			      "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\
+			      "$client_addr_id" "ns1" "ns2"
+
+	# RM_ADDR from client to server machine
+	ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
+	   "$client6_token" > /dev/null 2>&1
+	sleep 0.5
+
+	# Attempt to add a listener at 10.0.2.2:<new-port>
+	ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\
+	   $new4_port > /dev/null 2>&1 &
+	listener_pid=$!
+
+	# ADD_ADDR from client to server machine using a new port
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
+	   $client_addr_id port $new4_port > /dev/null 2>&1
+	sleep 0.5
+
+	# CREATE_SUBFLOW from server to client machine
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2 rport\
+	   $new4_port token "$server4_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET"\
+			      "10.0.2.1" "10.0.2.2" "$new4_port" "23"\
+			      "$client_addr_id" "ns1" "ns2"
+
+	# Delete the listener from the client ns, if one was created
+	kill $listener_pid > /dev/null 2>&1
+
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+	# DESTROY_SUBFLOW from server to client machine
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\
+	   $new4_port token "$server4_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\
+			      "10.0.2.2" "$new4_port" "23" "$client_addr_id" "ns1" "ns2"
+
+	# RM_ADDR from client to server machine
+	ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
+	   "$client4_token" > /dev/null 2>&1
+
+	kill $evts_pid
+
+	# Capture events on the network namespace running the client
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl events >> "$evts" 2>&1 &
+	evts_pid=$!
+	sleep 0.5
+
+	# Attempt to add a listener at 10.0.2.1:<subflow-port>
+	ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
+	   $app4_port > /dev/null 2>&1 &
+	listener_pid=$!
+
+	# ADD_ADDR from server to client machine reusing the subflow port
+	ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+	   $server_addr_id > /dev/null 2>&1
+	sleep 0.5
+
+	# CREATE_SUBFLOW from client to server machine
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\
+	   $app4_port token "$client4_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET" "10.0.2.2"\
+			      "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+	# Delete the listener from the server ns, if one was created
+	kill $listener_pid> /dev/null 2>&1
+
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+	# DESTROY_SUBFLOW from client to server machine
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\
+	   $app4_port token "$client4_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\
+			      "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+	# RM_ADDR from server to client machine
+	ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+	   "$server4_token" > /dev/null 2>&1
+	sleep 0.5
+
+	# Attempt to add a listener at dead:beef:2::1:<subflow-port>
+	ip netns exec "$ns1" ./pm_nl_ctl listen dead:beef:2::1\
+	   $app6_port > /dev/null 2>&1 &
+	listener_pid=$!
+
+	# ADD_ADDR6 from server to client machine reusing the subflow port
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\
+	   $server_addr_id > /dev/null 2>&1
+	sleep 0.5
+
+	# CREATE_SUBFLOW6 from client to server machine
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl csf lip dead:beef:2::2 lid 23 rip\
+	   dead:beef:2::1 rport $app6_port token "$client6_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client6_token"\
+			      "$AF_INET6" "dead:beef:2::2"\
+			      "dead:beef:2::1" "$app6_port" "23"\
+			      "$server_addr_id" "ns2" "ns1"
+
+	# Delete the listener from the server ns, if one was created
+	kill $listener_pid > /dev/null 2>&1
+
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+	# DESTROY_SUBFLOW6 from client to server machine
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl dsf lip dead:beef:2::2 lport "$sport" rip\
+	   dead:beef:2::1 rport $app6_port token "$client6_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_CLOSED" "$client6_token" "$AF_INET6" "dead:beef:2::2"\
+			      "dead:beef:2::1" "$app6_port" "23" "$server_addr_id" "ns2" "ns1"
+
+	# RM_ADDR6 from server to client machine
+	ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+	   "$server6_token" > /dev/null 2>&1
+	sleep 0.5
+
+	# Attempt to add a listener at 10.0.2.1:<new-port>
+	ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
+	   $new4_port > /dev/null 2>&1 &
+	listener_pid=$!
+
+	# ADD_ADDR from server to client machine using a new port
+	:>"$evts"
+	ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+	   $server_addr_id port $new4_port > /dev/null 2>&1
+	sleep 0.5
+
+	# CREATE_SUBFLOW from client to server machine
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\
+	   $new4_port token "$client4_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET"\
+			      "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+	# Delete the listener from the server ns, if one was created
+	kill $listener_pid > /dev/null 2>&1
+
+	sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts")
+
+	# DESTROY_SUBFLOW from client to server machine
+	:>"$evts"
+	ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\
+	   $new4_port token "$client4_token" > /dev/null 2>&1
+	sleep 0.5
+	verify_subflow_events "$evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\
+			      "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+	# RM_ADDR from server to client machine
+	ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+	   "$server4_token" > /dev/null 2>&1
+
+	kill $evts_pid
+	rm -f "$evts"
+}
+
+make_connection
+make_connection "v6"
+test_announce
+test_remove
+test_subflows
+
+exit 0
diff --git a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh
new file mode 100755
index 000000000000..f508657ee126
--- /dev/null
+++ b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh
@@ -0,0 +1,255 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for the accept_unsolicited_na feature to
+# enable RFC9131 behaviour. The following is the test-matrix.
+# drop   accept  fwding                   behaviour
+# ----   ------  ------  ----------------------------------------------
+#    1        X       X  Drop NA packet and don't pass up the stack
+#    0        0       X  Pass NA packet up the stack, don't update NC
+#    0        1       0  Pass NA packet up the stack, don't update NC
+#    0        1       1  Pass NA packet up the stack, and add a STALE
+#                           NC entry
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+HOST_NS="ns-host"
+ROUTER_NS="ns-router"
+
+HOST_INTF="veth-host"
+ROUTER_INTF="veth-router"
+
+ROUTER_ADDR="2000:20::1"
+HOST_ADDR="2000:20::2"
+SUBNET_WIDTH=64
+ROUTER_ADDR_WITH_MASK="${ROUTER_ADDR}/${SUBNET_WIDTH}"
+HOST_ADDR_WITH_MASK="${HOST_ADDR}/${SUBNET_WIDTH}"
+
+IP_HOST="ip -6 -netns ${HOST_NS}"
+IP_HOST_EXEC="ip netns exec ${HOST_NS}"
+IP_ROUTER="ip -6 -netns ${ROUTER_NS}"
+IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}"
+
+tcpdump_stdout=
+tcpdump_stderr=
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "    TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+		echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
+}
+
+setup()
+{
+	set -e
+
+	local drop_unsolicited_na=$1
+	local accept_unsolicited_na=$2
+	local forwarding=$3
+
+	# Setup two namespaces and a veth tunnel across them.
+	# On end of the tunnel is a router and the other end is a host.
+	ip netns add ${HOST_NS}
+	ip netns add ${ROUTER_NS}
+	${IP_ROUTER} link add ${ROUTER_INTF} type veth \
+                peer name ${HOST_INTF} netns ${HOST_NS}
+
+	# Enable IPv6 on both router and host, and configure static addresses.
+	# The router here is the DUT
+	# Setup router configuration as specified by the arguments.
+	# forwarding=0 case is to check that a non-router
+	# doesn't add neighbour entries.
+        ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF}
+	${IP_ROUTER_EXEC} sysctl -qw \
+                ${ROUTER_CONF}.forwarding=${forwarding}
+	${IP_ROUTER_EXEC} sysctl -qw \
+                ${ROUTER_CONF}.drop_unsolicited_na=${drop_unsolicited_na}
+	${IP_ROUTER_EXEC} sysctl -qw \
+                ${ROUTER_CONF}.accept_unsolicited_na=${accept_unsolicited_na}
+	${IP_ROUTER_EXEC} sysctl -qw ${ROUTER_CONF}.disable_ipv6=0
+	${IP_ROUTER} addr add ${ROUTER_ADDR_WITH_MASK} dev ${ROUTER_INTF}
+
+	# Turn on ndisc_notify on host interface so that
+	# the host sends unsolicited NAs.
+	HOST_CONF=net.ipv6.conf.${HOST_INTF}
+	${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.ndisc_notify=1
+	${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.disable_ipv6=0
+	${IP_HOST} addr add ${HOST_ADDR_WITH_MASK} dev ${HOST_INTF}
+
+	set +e
+}
+
+start_tcpdump() {
+	set -e
+	tcpdump_stdout=`mktemp`
+	tcpdump_stderr=`mktemp`
+	${IP_ROUTER_EXEC} timeout 15s \
+                tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \
+                "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR}" \
+                > ${tcpdump_stdout} 2> /dev/null
+	set +e
+}
+
+cleanup_tcpdump()
+{
+	set -e
+	[[ ! -z  ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout}
+	[[ ! -z  ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr}
+	tcpdump_stdout=
+	tcpdump_stderr=
+	set +e
+}
+
+cleanup()
+{
+	cleanup_tcpdump
+	ip netns del ${HOST_NS}
+	ip netns del ${ROUTER_NS}
+}
+
+link_up() {
+	set -e
+	${IP_ROUTER} link set dev ${ROUTER_INTF} up
+	${IP_HOST} link set dev ${HOST_INTF} up
+	set +e
+}
+
+verify_ndisc() {
+	local drop_unsolicited_na=$1
+	local accept_unsolicited_na=$2
+	local forwarding=$3
+
+	neigh_show_output=$(${IP_ROUTER} neigh show \
+                to ${HOST_ADDR} dev ${ROUTER_INTF} nud stale)
+	if [ ${drop_unsolicited_na} -eq 0 ] && \
+			[ ${accept_unsolicited_na} -eq 1 ] && \
+			[ ${forwarding} -eq 1 ]; then
+		# Neighbour entry expected to be present for 011 case
+		[[ ${neigh_show_output} ]]
+	else
+		# Neighbour entry expected to be absent for all other cases
+		[[ -z ${neigh_show_output} ]]
+	fi
+}
+
+test_unsolicited_na_common()
+{
+	# Setup the test bed, but keep links down
+	setup $1 $2 $3
+
+	# Bring the link up, wait for the NA,
+	# and add a delay to ensure neighbour processing is done.
+	link_up
+	start_tcpdump
+
+	# Verify the neighbour table
+	verify_ndisc $1 $2 $3
+
+}
+
+test_unsolicited_na_combination() {
+	test_unsolicited_na_common $1 $2 $3
+	test_msg=("test_unsolicited_na: "
+		"drop_unsolicited_na=$1 "
+		"accept_unsolicited_na=$2 "
+		"forwarding=$3")
+	log_test $? 0 "${test_msg[*]}"
+	cleanup
+}
+
+test_unsolicited_na_combinations() {
+	# Args: drop_unsolicited_na accept_unsolicited_na forwarding
+
+	# Expect entry
+	test_unsolicited_na_combination 0 1 1
+
+	# Expect no entry
+	test_unsolicited_na_combination 0 0 0
+	test_unsolicited_na_combination 0 0 1
+	test_unsolicited_na_combination 0 1 0
+	test_unsolicited_na_combination 1 0 0
+	test_unsolicited_na_combination 1 0 1
+	test_unsolicited_na_combination 1 1 0
+	test_unsolicited_na_combination 1 1 1
+}
+
+###############################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+        -p          Pause on fail
+        -P          Pause after each test before cleanup
+EOF
+}
+
+###############################################################################
+# main
+
+while getopts :pPh o
+do
+	case $o in
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tcpdump)" ]; then
+	echo "SKIP: Could not run test without tcpdump tool"
+	exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+test_unsolicited_na_combinations
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n"   ${nfail}
+
+exit $ret
diff --git a/tools/testing/selftests/net/stress_reuseport_listen.c b/tools/testing/selftests/net/stress_reuseport_listen.c
new file mode 100644
index 000000000000..ef800bb35a8e
--- /dev/null
+++ b/tools/testing/selftests/net/stress_reuseport_listen.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+/* Test listening on the same port 443 with multiple VIPS.
+ * Each VIP:443 will have multiple sk listening on by using
+ * SO_REUSEPORT.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <error.h>
+#include <errno.h>
+#include <time.h>
+#include <arpa/inet.h>
+
+#define IP6_LADDR_START "2401:dead::1"
+#define IP6_LPORT 443
+#define NSEC_PER_SEC 1000000000L
+#define NSEC_PER_USEC 1000L
+
+static unsigned int nr_socks_per_vip;
+static unsigned int nr_vips;
+
+static int *bind_reuseport_sock6(void)
+{
+	int *lfds, *cur_fd, err, optvalue = 1;
+	struct sockaddr_in6 sa6 = {};
+	unsigned int i, j;
+
+	sa6.sin6_family = AF_INET6;
+	sa6.sin6_port = htons(IP6_LPORT);
+	err = inet_pton(AF_INET6, IP6_LADDR_START, &sa6.sin6_addr);
+	if (err != 1)
+		error(1, err, "inet_pton(%s)", IP6_LADDR_START);
+
+	lfds = malloc(nr_vips * nr_socks_per_vip * sizeof(lfds[0]));
+	if (!lfds)
+		error(1, errno, "cannot alloc array of lfds");
+
+	cur_fd = lfds;
+	for (i = 0; i < nr_vips; i++) {
+		for (j = 0; j < nr_socks_per_vip; j++) {
+			*cur_fd = socket(AF_INET6, SOCK_STREAM, 0);
+			if (*cur_fd == -1)
+				error(1, errno,
+				      "lfds[%u,%u] = socket(AF_INET6)", i, j);
+
+			err = setsockopt(*cur_fd, SOL_SOCKET, SO_REUSEPORT,
+					 &optvalue, sizeof(optvalue));
+			if (err)
+				error(1, errno,
+				      "setsockopt(lfds[%u,%u], SO_REUSEPORT)",
+				      i, j);
+
+			err = bind(*cur_fd, (struct sockaddr *)&sa6,
+				   sizeof(sa6));
+			if (err)
+				error(1, errno, "bind(lfds[%u,%u])", i, j);
+			cur_fd++;
+		}
+		sa6.sin6_addr.s6_addr32[3]++;
+	}
+
+	return lfds;
+}
+
+int main(int argc, const char *argv[])
+{
+	struct timespec start_ts, end_ts;
+	unsigned long start_ns, end_ns;
+	unsigned int nr_lsocks;
+	int *lfds, i, err;
+
+	if (argc != 3 || atoi(argv[1]) <= 0 || atoi(argv[2]) <= 0)
+		error(1, 0, "Usage: %s <nr_vips> <nr_socks_per_vip>\n",
+		      argv[0]);
+
+	nr_vips = atoi(argv[1]);
+	nr_socks_per_vip = atoi(argv[2]);
+	nr_lsocks = nr_vips * nr_socks_per_vip;
+	lfds = bind_reuseport_sock6();
+
+	clock_gettime(CLOCK_MONOTONIC, &start_ts);
+	for (i = 0; i < nr_lsocks; i++) {
+		err = listen(lfds[i], 0);
+		if (err)
+			error(1, errno, "listen(lfds[%d])", i);
+	}
+	clock_gettime(CLOCK_MONOTONIC, &end_ts);
+
+	start_ns = start_ts.tv_sec * NSEC_PER_SEC + start_ts.tv_nsec;
+	end_ns = end_ts.tv_sec * NSEC_PER_SEC + end_ts.tv_nsec;
+
+	printf("listen %d socks took %lu.%lu\n", nr_lsocks,
+	       (end_ns - start_ns) / NSEC_PER_SEC,
+	       (end_ns - start_ns) / NSEC_PER_USEC);
+
+	for (i = 0; i < nr_lsocks; i++)
+		close(lfds[i]);
+
+	free(lfds);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/stress_reuseport_listen.sh b/tools/testing/selftests/net/stress_reuseport_listen.sh
new file mode 100755
index 000000000000..4de11da4092b
--- /dev/null
+++ b/tools/testing/selftests/net/stress_reuseport_listen.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
+
+NS='stress_reuseport_listen_ns'
+NR_FILES=24100
+SAVED_NR_FILES=$(ulimit -n)
+
+setup() {
+	ip netns add $NS
+	ip netns exec $NS sysctl -q -w net.ipv6.ip_nonlocal_bind=1
+	ulimit -n $NR_FILES
+}
+
+cleanup() {
+	ip netns del $NS
+	ulimit -n $SAVED_NR_FILES
+}
+
+trap cleanup EXIT
+setup
+# 300 different vips listen on port 443
+# Each vip:443 sockaddr has 80 LISTEN sock by using SO_REUSEPORT
+# Total 24000 listening socks
+ip netns exec $NS ./stress_reuseport_listen 300 80
diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
index 865d53c1781c..417d214264f3 100755
--- a/tools/testing/selftests/net/vrf_strict_mode_test.sh
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -14,6 +14,8 @@ INIT_NETNS_NAME="init"
 
 PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
 
+TESTS="init testns mix"
+
 log_test()
 {
 	local rc=$1
@@ -262,6 +264,8 @@ cleanup()
 
 vrf_strict_mode_tests_init()
 {
+	log_section "VRF strict_mode test on init network namespace"
+
 	vrf_strict_mode_check_support init
 
 	strict_mode_check_default init
@@ -292,6 +296,8 @@ vrf_strict_mode_tests_init()
 
 vrf_strict_mode_tests_testns()
 {
+	log_section "VRF strict_mode test on testns network namespace"
+
 	vrf_strict_mode_check_support testns
 
 	strict_mode_check_default testns
@@ -318,6 +324,8 @@ vrf_strict_mode_tests_testns()
 
 vrf_strict_mode_tests_mix()
 {
+	log_section "VRF strict_mode test mixing init and testns network namespaces"
+
 	read_strict_mode_compare_and_check init 1
 
 	read_strict_mode_compare_and_check testns 0
@@ -341,18 +349,30 @@ vrf_strict_mode_tests_mix()
 	read_strict_mode_compare_and_check testns 0
 }
 
-vrf_strict_mode_tests()
-{
-	log_section "VRF strict_mode test on init network namespace"
-	vrf_strict_mode_tests_init
+################################################################################
+# usage
 
-	log_section "VRF strict_mode test on testns network namespace"
-	vrf_strict_mode_tests_testns
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
 
-	log_section "VRF strict_mode test mixing init and testns network namespaces"
-	vrf_strict_mode_tests_mix
+	-t <test>	Test(s) to run (default: all)
+			(options: $TESTS)
+EOF
 }
 
+################################################################################
+# main
+
+while getopts ":t:h" opt; do
+	case $opt in
+		t) TESTS=$OPTARG;;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
 vrf_strict_mode_check_support()
 {
 	local nsname=$1
@@ -391,7 +411,17 @@ fi
 cleanup &> /dev/null
 
 setup
-vrf_strict_mode_tests
+for t in $TESTS
+do
+	case $t in
+	vrf_strict_mode_tests_init|init) vrf_strict_mode_tests_init;;
+	vrf_strict_mode_tests_testns|testns) vrf_strict_mode_tests_testns;;
+	vrf_strict_mode_tests_mix|mix) vrf_strict_mode_tests_mix;;
+
+	help) echo "Test names: $TESTS"; exit 0;;
+
+	esac
+done
 cleanup
 
 print_log_test_results
diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
index 695a1958723f..fd76b69635a4 100755
--- a/tools/testing/selftests/netfilter/nft_fib.sh
+++ b/tools/testing/selftests/netfilter/nft_fib.sh
@@ -66,6 +66,20 @@ table inet filter {
 EOF
 }
 
+load_pbr_ruleset() {
+	local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+	chain forward {
+		type filter hook forward priority raw;
+		fib saddr . iif oif gt 0 accept
+		log drop
+	}
+}
+EOF
+}
+
 load_ruleset_count() {
 	local netns=$1
 
@@ -219,4 +233,40 @@ sleep 2
 ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
 check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
 
+# delete all rules
+ip netns exec ${ns1} nft flush ruleset
+ip netns exec ${ns2} nft flush ruleset
+ip netns exec ${nsrouter} nft flush ruleset
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr del 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr del dead:2::99/64 dev eth0
+
+ip -net ${nsrouter} addr del dead:2::1/64 dev veth0
+
+# ... pbr ruleset for the router, check iif+oif.
+load_pbr_ruleset ${nsrouter}
+if [ $? -ne 0 ] ; then
+	echo "SKIP: Could not load fib forward ruleset"
+	exit $ksft_skip
+fi
+
+ip -net ${nsrouter} rule add from all table 128
+ip -net ${nsrouter} rule add from all iif veth0 table 129
+ip -net ${nsrouter} route add table 128 to 10.0.1.0/24 dev veth0
+ip -net ${nsrouter} route add table 129 to 10.0.2.0/24 dev veth1
+
+# drop main ipv4 table
+ip -net ${nsrouter} -4 rule delete table main
+
+test_ping 10.0.2.99 dead:2::99
+if [ $? -ne 0 ] ; then
+	ip -net ${nsrouter} nft list ruleset
+	echo "FAIL: fib mismatch in pbr setup"
+	exit 1
+fi
+
+echo "PASS: fib expression forward check with policy based routing"
 exit 0
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index 19515dcb7d04..f50778a3d744 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -40,6 +40,7 @@ ALL_TESTS="$ALL_TESTS 0004:1:1:uint_0001"
 ALL_TESTS="$ALL_TESTS 0005:3:1:int_0003"
 ALL_TESTS="$ALL_TESTS 0006:50:1:bitmap_0001"
 ALL_TESTS="$ALL_TESTS 0007:1:1:boot_int"
+ALL_TESTS="$ALL_TESTS 0008:1:1:match_int"
 
 function allow_user_defaults()
 {
@@ -785,6 +786,27 @@ sysctl_test_0007()
 	return $ksft_skip
 }
 
+sysctl_test_0008()
+{
+	TARGET="${SYSCTL}/match_int"
+	if [ ! -f $TARGET ]; then
+		echo "Skipping test for $TARGET as it is not present ..."
+		return $ksft_skip
+	fi
+
+	echo -n "Testing if $TARGET is matched in kernel"
+	ORIG_VALUE=$(cat "${TARGET}")
+
+	if [ $ORIG_VALUE -ne 1 ]; then
+		echo "TEST FAILED"
+		rc=1
+		test_rc
+	fi
+
+	echo "ok"
+	return 0
+}
+
 list_tests()
 {
 	echo "Test ID list:"
@@ -800,6 +822,7 @@ list_tests()
 	echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array"
 	echo "0006 x $(get_test_count 0006) - tests proc_do_large_bitmap()"
 	echo "0007 x $(get_test_count 0007) - tests setting sysctl from kernel boot param"
+	echo "0008 x $(get_test_count 0008) - tests sysctl macro values match"
 }
 
 usage()