549 files changed, 44329 insertions, 9607 deletions
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index c2a1842c3d8b..19c1638e312a 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -5,7 +5,6 @@ bpf-syscall*
 test_verifier
 test_maps
 test_lru_map
-test_lpm_map
 test_tag
 FEATURE-DUMP.libbpf
 FEATURE-DUMP.selftests
@@ -19,13 +18,12 @@ feature
 urandom_read
 test_sockmap
 test_lirc_mode2_user
-test_flow_dissector
 flow_dissector_load
 test_tcpnotify_user
 test_libbpf
-test_sysctl
 xdping
 test_cpp
+test_progs_verification_cert
 *.d
 *.subskel.h
 *.skel.h
@@ -35,7 +33,6 @@ test_cpp
 /cpuv4
 /host-tools
 /tools
-/runqslower
 /bench
 /veristat
 /sign-file
@@ -47,3 +44,4 @@ xdp_redirect_multi
 xdp_synproxy
 xdp_hw_metadata
 xdp_features
+verification_cert.h
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
deleted file mode 100644
index 901349da680f..000000000000
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ /dev/null
@@ -1,12 +0,0 @@
-bpf_cookie/multi_kprobe_attach_api               # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
-bpf_cookie/multi_kprobe_link_api                 # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
-kprobe_multi_bench_attach                        # needs CONFIG_FPROBE
-kprobe_multi_test                                # needs CONFIG_FPROBE
-module_attach                                    # prog 'kprobe_multi': failed to auto-attach: -95
-fentry_test/fentry_many_args                     # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524
-fexit_test/fexit_many_args                       # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524
-tracing_struct/struct_many_args                  # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524
-fill_link_info/kprobe_multi_link_info            # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-fill_link_info/kretprobe_multi_link_info         # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-fill_link_info/kprobe_multi_invalid_ubuff        # bpf_program__attach_kprobe_multi_opts unexpected error: -95
-missed/kprobe_recursion                          # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 3ebd77206f98..a17baf8c6fd7 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -2,4 +2,3 @@
 # Alphabetical order
 get_stack_raw_tp                         # user_stack corrupted user stack                                             (no backchain userspace)
 stacktrace_build_id                      # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2                   (?)
-verifier_iterating_callbacks
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6ad3b1ba1920..b7030a6e2e76 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -34,6 +34,9 @@ OPT_FLAGS	?= $(if $(RELEASE),-O2,-O0)
 LIBELF_CFLAGS	:= $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null)
 LIBELF_LIBS	:= $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
 
+SKIP_DOCS	?=
+SKIP_LLVM	?=
+
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(CURDIR)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
@@ -41,8 +44,9 @@ srctree := $(patsubst %/,%,$(dir $(srctree)))
 srctree := $(patsubst %/,%,$(dir $(srctree)))
 endif
 
-CFLAGS += -g $(OPT_FLAGS) -rdynamic					\
+CFLAGS += -g $(OPT_FLAGS) -rdynamic -std=gnu11				\
 	  -Wall -Werror -fno-omit-frame-pointer				\
+	  -Wno-unused-but-set-variable					\
 	  $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS)			\
 	  -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)		\
 	  -I$(TOOLSINCDIR) -I$(TOOLSARCHINCDIR) -I$(APIDIR) -I$(OUTPUT)
@@ -54,21 +58,6 @@ PCAP_LIBS	:= $(shell $(PKG_CONFIG) --libs libpcap 2>/dev/null)
 LDLIBS += $(PCAP_LIBS)
 CFLAGS += $(PCAP_CFLAGS)
 
-# The following tests perform type punning and they may break strict
-# aliasing rules, which are exploited by both GCC and clang by default
-# while optimizing.  This can lead to broken programs.
-progs/bind4_prog.c-CFLAGS := -fno-strict-aliasing
-progs/bind6_prog.c-CFLAGS := -fno-strict-aliasing
-progs/dynptr_fail.c-CFLAGS := -fno-strict-aliasing
-progs/linked_list_fail.c-CFLAGS := -fno-strict-aliasing
-progs/map_kptr_fail.c-CFLAGS := -fno-strict-aliasing
-progs/syscall.c-CFLAGS := -fno-strict-aliasing
-progs/test_pkt_md_access.c-CFLAGS := -fno-strict-aliasing
-progs/test_sk_lookup.c-CFLAGS := -fno-strict-aliasing
-progs/timer_crash.c-CFLAGS := -fno-strict-aliasing
-progs/test_global_func9.c-CFLAGS := -fno-strict-aliasing
-progs/verifier_nocsr.c-CFLAGS := -fno-strict-aliasing
-
 # Some utility functions use LLVM libraries
 jit_disasm_helpers.c-CFLAGS = $(LLVM_CFLAGS)
 
@@ -83,9 +72,9 @@ CLANG_CPUV4 := 1
 endif
 
 # Order correspond to 'make run_tests' order
-TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
+TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_progs \
 	test_sockmap \
-	test_tcpnotify_user test_sysctl \
+	test_tcpnotify_user \
 	test_progs-no_alu32
 TEST_INST_SUBDIRS := no_alu32
 
@@ -103,18 +92,6 @@ progs/btf_dump_test_case_packing.c-bpf_gcc-CFLAGS := -Wno-error
 progs/btf_dump_test_case_padding.c-bpf_gcc-CFLAGS := -Wno-error
 progs/btf_dump_test_case_syntax.c-bpf_gcc-CFLAGS := -Wno-error
 
-# The following tests do type-punning, via the __imm_insn macro, from
-# `struct bpf_insn' to long and then uses the value.  This triggers an
-# "is used uninitialized" warning in GCC due to strict-aliasing
-# rules.
-progs/verifier_ref_tracking.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
-progs/verifier_unpriv.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
-progs/verifier_cgroup_storage.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
-progs/verifier_ld_ind.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
-progs/verifier_map_ret_val.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
-progs/verifier_spill_fill.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
-progs/verifier_subprog_precision.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
-progs/verifier_uninit.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
 endif
 
 ifneq ($(CLANG_CPUV4),)
@@ -122,56 +99,42 @@ TEST_GEN_PROGS += test_progs-cpuv4
 TEST_INST_SUBDIRS += cpuv4
 endif
 
-TEST_GEN_FILES = test_lwt_ip_encap.bpf.o test_tc_edt.bpf.o
 TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c)
 
 # Order correspond to 'make run_tests' order
 TEST_PROGS := test_kmod.sh \
-	test_xdp_redirect.sh \
-	test_xdp_redirect_multi.sh \
-	test_xdp_meta.sh \
-	test_tunnel.sh \
-	test_lwt_seg6local.sh \
 	test_lirc_mode2.sh \
-	test_flow_dissector.sh \
-	test_xdp_vlan_mode_generic.sh \
-	test_xdp_vlan_mode_native.sh \
-	test_lwt_ip_encap.sh \
-	test_tc_tunnel.sh \
-	test_tc_edt.sh \
 	test_xdping.sh \
 	test_bpftool_build.sh \
 	test_bpftool.sh \
+	test_bpftool_map.sh \
 	test_bpftool_metadata.sh \
 	test_doc_build.sh \
 	test_xsk.sh \
 	test_xdp_features.sh
 
-TEST_PROGS_EXTENDED := with_addr.sh \
-	with_tunnels.sh ima_setup.sh verify_sig_setup.sh \
-	test_xdp_vlan.sh test_bpftool.py
+TEST_PROGS_EXTENDED := \
+	ima_setup.sh verify_sig_setup.sh \
+	test_bpftool.py
+
+TEST_KMODS := bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
+	bpf_test_modorder_y.ko bpf_test_rqspinlock.ko
+TEST_KMOD_TARGETS = $(addprefix $(OUTPUT)/,$(TEST_KMODS))
 
 # Compile but not part of 'make run_tests'
 TEST_GEN_PROGS_EXTENDED = \
 	bench \
-	bpf_testmod.ko \
-	bpf_test_modorder_x.ko \
-	bpf_test_modorder_y.ko \
-	bpf_test_no_cfi.ko \
 	flow_dissector_load \
-	runqslower \
 	test_cpp \
-	test_flow_dissector \
 	test_lirc_mode2_user \
 	veristat \
 	xdp_features \
 	xdp_hw_metadata \
-	xdp_redirect_multi \
 	xdp_synproxy \
 	xdping \
 	xskxceiver
 
-TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi
+TEST_GEN_FILES += $(TEST_KMODS) liburandom_read.so urandom_read sign-file uprobe_multi
 
 ifneq ($(V),1)
 submake_extras := feature_display=0
@@ -184,8 +147,9 @@ override define CLEAN
 	$(Q)$(RM) -r $(TEST_GEN_PROGS)
 	$(Q)$(RM) -r $(TEST_GEN_PROGS_EXTENDED)
 	$(Q)$(RM) -r $(TEST_GEN_FILES)
+	$(Q)$(RM) -r $(TEST_KMODS)
 	$(Q)$(RM) -r $(EXTRA_CLEAN)
-	$(Q)$(MAKE) -C bpf_testmod clean
+	$(Q)$(MAKE) -C test_kmods clean
 	$(Q)$(MAKE) docs-clean
 endef
 
@@ -203,22 +167,29 @@ ifeq ($(shell expr $(MAKE_VERSION) \>= 4.4), 1)
 $(let OUTPUT,$(OUTPUT)/,\
 	$(eval include ../../../build/Makefile.feature))
 else
-OUTPUT := $(OUTPUT)/
+override OUTPUT := $(OUTPUT)/
 $(eval include ../../../build/Makefile.feature)
-OUTPUT := $(patsubst %/,%,$(OUTPUT))
+override OUTPUT := $(patsubst %/,%,$(OUTPUT))
 endif
 endif
 
+ifneq ($(SKIP_LLVM),1)
 ifeq ($(feature-llvm),1)
   LLVM_CFLAGS  += -DHAVE_LLVM_SUPPORT
   LLVM_CONFIG_LIB_COMPONENTS := mcdisassembler all-targets
   # both llvm-config and lib.mk add -D_GNU_SOURCE, which ends up as conflict
   LLVM_CFLAGS  += $(filter-out -D_GNU_SOURCE,$(shell $(LLVM_CONFIG) --cflags))
-  LLVM_LDLIBS  += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS))
-  LLVM_LDLIBS  += $(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS))
-  LLVM_LDLIBS  += -lstdc++
+  # Prefer linking statically if it's available, otherwise fallback to shared
+  ifeq ($(shell $(LLVM_CONFIG) --link-static --libs >/dev/null 2>&1 && echo static),static)
+    LLVM_LDLIBS  += $(shell $(LLVM_CONFIG) --link-static --libs $(LLVM_CONFIG_LIB_COMPONENTS))
+    LLVM_LDLIBS  += $(filter-out -lxml2,$(shell $(LLVM_CONFIG) --link-static --system-libs $(LLVM_CONFIG_LIB_COMPONENTS)))
+    LLVM_LDLIBS  += -lstdc++
+  else
+    LLVM_LDLIBS  += $(shell $(LLVM_CONFIG) --link-shared --libs $(LLVM_CONFIG_LIB_COMPONENTS))
+  endif
   LLVM_LDFLAGS += $(shell $(LLVM_CONFIG) --ldflags)
 endif
+endif
 
 SCRATCH_DIR := $(OUTPUT)/tools
 BUILD_DIR := $(SCRATCH_DIR)/build
@@ -235,8 +206,6 @@ HOST_INCLUDE_DIR	:= $(INCLUDE_DIR)
 endif
 HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a
 RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids
-RUNQSLOWER_OUTPUT := $(BUILD_DIR)/runqslower/
-
 VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
 		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
 		     ../../../../vmlinux				\
@@ -247,18 +216,18 @@ ifeq ($(VMLINUX_BTF),)
 $(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
 endif
 
-# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
+# Define simple and short `make test_progs`, `make test_maps`, etc targets
 # to build individual tests.
 # NOTE: Semicolon at the end is critical to override lib.mk's default static
 # rule for binaries.
-$(notdir $(TEST_GEN_PROGS)						\
+$(notdir $(TEST_GEN_PROGS) $(TEST_KMODS)				\
 	 $(TEST_GEN_PROGS_EXTENDED)): %: $(OUTPUT)/% ;
 
 # sort removes libbpf duplicates when not cross-building
 MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf	\
 	       $(BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/bpftool		\
 	       $(HOST_BUILD_DIR)/resolve_btfids				\
-	       $(RUNQSLOWER_OUTPUT) $(INCLUDE_DIR))
+	       $(INCLUDE_DIR))
 $(MAKE_DIRS):
 	$(call msg,MKDIR,,$@)
 	$(Q)mkdir -p $@
@@ -305,37 +274,19 @@ $(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
 		  $< -o $@ \
 		  $(shell $(PKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto)
 
-$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
-	$(call msg,MOD,,$@)
-	$(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation
-	$(Q)$(MAKE) $(submake_extras) -C bpf_testmod \
-		RESOLVE_BTFIDS=$(RESOLVE_BTFIDS)     \
+# This should really be a grouped target, but make versions before 4.3 don't
+# support that for regular rules. However, pattern matching rules are implicitly
+# treated as grouped even with older versions of make, so as a workaround, the
+# subst() turns the rule into a pattern matching rule
+$(addprefix test_kmods/,$(subst .ko,%ko,$(TEST_KMODS))): $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard test_kmods/Makefile test_kmods/*.[ch])
+	$(Q)$(RM) test_kmods/*.ko test_kmods/*.mod.o # force re-compilation
+	$(Q)$(MAKE) $(submake_extras) -C test_kmods	\
+		RESOLVE_BTFIDS=$(RESOLVE_BTFIDS)	\
 		EXTRA_CFLAGS='' EXTRA_LDFLAGS=''
-	$(Q)cp bpf_testmod/bpf_testmod.ko $@
 
-$(OUTPUT)/bpf_test_no_cfi.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_no_cfi/Makefile bpf_test_no_cfi/*.[ch])
+$(TEST_KMOD_TARGETS): $(addprefix test_kmods/,$(TEST_KMODS))
 	$(call msg,MOD,,$@)
-	$(Q)$(RM) bpf_test_no_cfi/bpf_test_no_cfi.ko # force re-compilation
-	$(Q)$(MAKE) $(submake_extras) -C bpf_test_no_cfi \
-		RESOLVE_BTFIDS=$(RESOLVE_BTFIDS)	 \
-		EXTRA_CFLAGS='' EXTRA_LDFLAGS=''
-	$(Q)cp bpf_test_no_cfi/bpf_test_no_cfi.ko $@
-
-$(OUTPUT)/bpf_test_modorder_x.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_modorder_x/Makefile bpf_test_modorder_x/*.[ch])
-	$(call msg,MOD,,$@)
-	$(Q)$(RM) bpf_test_modorder_x/bpf_test_modorder_x.ko # force re-compilation
-	$(Q)$(MAKE) $(submake_extras) -C bpf_test_modorder_x \
-		RESOLVE_BTFIDS=$(RESOLVE_BTFIDS)	     \
-		EXTRA_CFLAGS='' EXTRA_LDFLAGS=''
-	$(Q)cp bpf_test_modorder_x/bpf_test_modorder_x.ko $@
-
-$(OUTPUT)/bpf_test_modorder_y.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_modorder_y/Makefile bpf_test_modorder_y/*.[ch])
-	$(call msg,MOD,,$@)
-	$(Q)$(RM) bpf_test_modorder_y/bpf_test_modorder_y.ko # force re-compilation
-	$(Q)$(MAKE) $(submake_extras) -C bpf_test_modorder_y \
-		RESOLVE_BTFIDS=$(RESOLVE_BTFIDS)	     \
-		EXTRA_CFLAGS='' EXTRA_LDFLAGS=''
-	$(Q)cp bpf_test_modorder_y/bpf_test_modorder_y.ko $@
+	$(Q)cp test_kmods/$(@F) $@
 
 
 DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool
@@ -348,16 +299,6 @@ TRUNNER_BPFTOOL := $(DEFAULT_BPFTOOL)
 USE_BOOTSTRAP := "bootstrap/"
 endif
 
-$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
-	$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower	       \
-		    OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF)     \
-		    BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/		       \
-		    BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf/			       \
-		    BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR)		       \
-		    EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS) $(EXTRA_CFLAGS)' \
-		    EXTRA_LDFLAGS='$(SAN_LDFLAGS) $(EXTRA_LDFLAGS)' &&	       \
-		    cp $(RUNQSLOWER_OUTPUT)runqslower $@
-
 TEST_GEN_PROGS_EXTENDED += $(TRUNNER_BPFTOOL)
 
 $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ)
@@ -373,7 +314,6 @@ NETWORK_HELPERS := $(OUTPUT)/network_helpers.o
 $(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS)
 $(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS)
 $(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS)
-$(OUTPUT)/test_sysctl: $(CGROUP_HELPERS) $(TESTING_HELPERS)
 $(OUTPUT)/test_tag: $(TESTING_HELPERS)
 $(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS)
 $(OUTPUT)/xdping: $(TESTING_HELPERS)
@@ -407,7 +347,9 @@ $(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)	\
 		    prefix= DESTDIR=$(SCRATCH_DIR)/ install-bin
 endif
 
+ifneq ($(SKIP_DOCS),1)
 all: docs
+endif
 
 docs:
 	$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras)	\
@@ -440,7 +382,7 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
 		    DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
 endif
 
-# vmlinux.h is first dumped to a temprorary file and then compared to
+# vmlinux.h is first dumped to a temporary file and then compared to
 # the previous version. This helps to avoid unnecessary re-builds of
 # $(TRUNNER_BPF_OBJS)
 $(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
@@ -480,10 +422,10 @@ $(shell $(1) $(2) -dM -E - </dev/null | grep -E 'MIPS(EL|EB)|_MIPS_SZ(PTR|LONG)
 endef
 
 # Determine target endianness.
-IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
+IS_LITTLE_ENDIAN := $(shell $(CC) -dM -E - </dev/null | \
 			grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
-MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
-BPF_TARGET_ENDIAN=$(if $(IS_LITTLE_ENDIAN),--target=bpfel,--target=bpfeb)
+MENDIAN:=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
+BPF_TARGET_ENDIAN:=$(if $(IS_LITTLE_ENDIAN),--target=bpfel,--target=bpfeb)
 
 ifneq ($(CROSS_COMPILE),)
 CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
@@ -493,7 +435,11 @@ CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
 BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN)	\
 	     -I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR)			\
 	     -I$(abspath $(OUTPUT)/../usr/include)			\
-	     -Wno-compare-distinct-pointer-types
+	     -std=gnu11		 					\
+	     -fno-strict-aliasing 					\
+	     -Wno-compare-distinct-pointer-types			\
+	     -Wno-initializer-overrides					\
+	     #
 # TODO: enable me -Wsign-compare
 
 CLANG_CFLAGS = $(CLANG_SYS_INCLUDES)
@@ -536,15 +482,17 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
 		test_subskeleton.skel.h test_subskeleton_lib.skel.h	\
 		test_usdt.skel.h
 
-LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c 		\
-	trace_printk.c trace_vprintk.c map_ptr_kern.c 			\
+LSKELS := fexit_sleep.c trace_printk.c trace_vprintk.c map_ptr_kern.c 	\
 	core_kern.c core_kern_overflow.c test_ringbuf.c			\
-	test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c
+	test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c    \
+	test_ringbuf_overwrite.c
+
+LSKELS_SIGNED := fentry_test.c fexit_test.c atomics.c
 
 # Generate both light skeleton and libbpf skeleton for these
 LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
 	kfunc_call_test_subprog.c
-SKEL_BLACKLIST += $$(LSKELS)
+SKEL_BLACKLIST += $$(LSKELS) $$(LSKELS_SIGNED)
 
 test_static_linked.skel.h-deps := test_static_linked1.bpf.o test_static_linked2.bpf.o
 linked_funcs.skel.h-deps := linked_funcs1.bpf.o linked_funcs2.bpf.o
@@ -575,12 +523,15 @@ HEADERS_FOR_BPF_OBJS := $(wildcard $(BPFDIR)/*.bpf.h)		\
 # $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc)
 define DEFINE_TEST_RUNNER
 
+LSKEL_SIGN := -S -k $(PRIVATE_KEY) -i $(VERIFICATION_CERT)
 TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2
 TRUNNER_BINARY := $1$(if $2,-)$2
 TRUNNER_TEST_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.test.o,	\
 				 $$(notdir $$(wildcard $(TRUNNER_TESTS_DIR)/*.c)))
 TRUNNER_EXTRA_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o,		\
 				 $$(filter %.c,$(TRUNNER_EXTRA_SOURCES)))
+TRUNNER_LIB_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o,		\
+				 $$(filter %.c,$(TRUNNER_LIB_SOURCES)))
 TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES))
 TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h
 TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))
@@ -590,6 +541,7 @@ TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h,	\
 					       $$(TRUNNER_BPF_SRCS)))
 TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS) $$(LSKELS_EXTRA))
 TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
+TRUNNER_BPF_LSKELS_SIGNED := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS_SIGNED))
 TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
 
 # Evaluate rules now with extra TRUNNER_XXX variables above already defined
@@ -644,6 +596,15 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
 	$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
 	$(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
 
+$(TRUNNER_BPF_LSKELS_SIGNED): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+	$$(call msg,GEN-SKEL,$(TRUNNER_BINARY) (signed),$$@)
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$<
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o)
+	$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o)
+	$(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+	$(Q)$$(BPFTOOL) gen skeleton $(LSKEL_SIGN) $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
+	$(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+
 $(LINKED_BPF_OBJS): %: $(TRUNNER_OUTPUT)/%
 
 # .SECONDEXPANSION here allows to correctly expand %-deps variables as prerequisites
@@ -693,6 +654,7 @@ $(TRUNNER_TEST_OBJS:.o=.d): $(TRUNNER_OUTPUT)/%.test.d:			\
 			    $(TRUNNER_EXTRA_HDRS)			\
 			    $(TRUNNER_BPF_SKELS)			\
 			    $(TRUNNER_BPF_LSKELS)			\
+			    $(TRUNNER_BPF_LSKELS_SIGNED)		\
 			    $(TRUNNER_BPF_SKELS_LINKED)			\
 			    $$(BPFOBJ) | $(TRUNNER_OUTPUT)
 
@@ -707,11 +669,16 @@ $(foreach N,$(patsubst $(TRUNNER_OUTPUT)/%.o,%,$(TRUNNER_EXTRA_OBJS)),	\
 $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 		       %.c						\
 		       $(TRUNNER_EXTRA_HDRS)				\
+		       $(VERIFY_SIG_HDR)				\
 		       $(TRUNNER_TESTS_HDR)				\
 		       $$(BPFOBJ) | $(TRUNNER_OUTPUT)
 	$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
 	$(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
 
+$(TRUNNER_LIB_OBJS): $(TRUNNER_OUTPUT)/%.o:$(TOOLSDIR)/lib/%.c
+	$$(call msg,LIB-OBJ,$(TRUNNER_BINARY),$$@)
+	$(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
+
 # non-flavored in-srctree builds receive special treatment, in particular, we
 # do not need to copy extra resources (see e.g. test_btf_dump_case())
 $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
@@ -720,25 +687,37 @@ ifneq ($2:$(OUTPUT),:$(shell pwd))
 	$(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/
 endif
 
-$(OUTPUT)/$(TRUNNER_BINARY): LDLIBS += $$(LLVM_LDLIBS)
-$(OUTPUT)/$(TRUNNER_BINARY): LDFLAGS += $$(LLVM_LDFLAGS)
-
 # some X.test.o files have runtime dependencies on Y.bpf.o files
 $(OUTPUT)/$(TRUNNER_BINARY): | $(TRUNNER_BPF_OBJS)
 
 $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)			\
 			     $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ)		\
+			     $(TRUNNER_LIB_OBJS)			\
 			     $(RESOLVE_BTFIDS)				\
 			     $(TRUNNER_BPFTOOL)				\
+			     $(OUTPUT)/veristat				\
 			     | $(TRUNNER_BINARY)-extras
 	$$(call msg,BINARY,,$$@)
-	$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LDFLAGS) -o $$@
+	$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) $$(LLVM_LDLIBS) $$(LDFLAGS) $$(LLVM_LDFLAGS) -o $$@
 	$(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@
 	$(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/$(USE_BOOTSTRAP)bpftool \
 		   $(OUTPUT)/$(if $2,$2/)bpftool
 
 endef
 
+VERIFY_SIG_SETUP := $(CURDIR)/verify_sig_setup.sh
+VERIFY_SIG_HDR := verification_cert.h
+VERIFICATION_CERT   := $(BUILD_DIR)/signing_key.der
+PRIVATE_KEY := $(BUILD_DIR)/signing_key.pem
+
+$(VERIFICATION_CERT) $(PRIVATE_KEY): $(VERIFY_SIG_SETUP)
+	$(Q)mkdir -p $(BUILD_DIR)
+	$(Q)$(VERIFY_SIG_SETUP) genkey $(BUILD_DIR)
+
+$(VERIFY_SIG_HDR): $(VERIFICATION_CERT)
+	$(Q)ln -fs $< test_progs_verification_cert && \
+	xxd -i test_progs_verification_cert > $@
+
 # Define test_progs test runner.
 TRUNNER_TESTS_DIR := prog_tests
 TRUNNER_BPF_PROGS_DIR := progs
@@ -758,18 +737,18 @@ TRUNNER_EXTRA_SOURCES := test_progs.c		\
 			 disasm.c		\
 			 disasm_helpers.c	\
 			 json_writer.c 		\
+			 $(VERIFY_SIG_HDR)		\
 			 flow_dissector_load.h	\
 			 ip_check_defrag_frags.h
-TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko	\
-		       $(OUTPUT)/bpf_test_no_cfi.ko			\
-		       $(OUTPUT)/bpf_test_modorder_x.ko		\
-		       $(OUTPUT)/bpf_test_modorder_y.ko		\
+TRUNNER_LIB_SOURCES := find_bit.c
+TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read				\
 		       $(OUTPUT)/liburandom_read.so			\
 		       $(OUTPUT)/xdp_synproxy				\
 		       $(OUTPUT)/sign-file				\
 		       $(OUTPUT)/uprobe_multi				\
+		       $(TEST_KMOD_TARGETS)				\
 		       ima_setup.sh 					\
-		       verify_sig_setup.sh				\
+		       $(VERIFY_SIG_SETUP)				\
 		       $(wildcard progs/btf_dump_test_case_*.c)		\
 		       $(wildcard progs/*.bpf.o)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
@@ -799,6 +778,7 @@ endif
 TRUNNER_TESTS_DIR := map_tests
 TRUNNER_BPF_PROGS_DIR := progs
 TRUNNER_EXTRA_SOURCES := test_maps.c
+TRUNNER_LIB_SOURCES :=
 TRUNNER_EXTRA_FILES :=
 TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built)
 TRUNNER_BPF_CFLAGS :=
@@ -820,7 +800,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
 	$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
 
 # Include find_bit.c to compile xskxceiver.
-EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c
+EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c prog_tests/test_xsk.c prog_tests/test_xsk.h
 $(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
 	$(call msg,BINARY,,$@)
 	$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
@@ -834,9 +814,12 @@ $(OUTPUT)/xdp_features: xdp_features.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xdp
 	$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
 
 # Make sure we are able to include and link libbpf against c++.
+CXXFLAGS += $(CFLAGS)
+CXXFLAGS := $(subst -D_GNU_SOURCE=,,$(CXXFLAGS))
+CXXFLAGS := $(subst -std=gnu11,-std=gnu++11,$(CXXFLAGS))
 $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
 	$(call msg,CXX,,$@)
-	$(Q)$(CXX) $(subst -D_GNU_SOURCE=,,$(CFLAGS)) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
+	$(Q)$(CXX) $(CXXFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
 
 # Benchmark runner
 $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ)
@@ -856,6 +839,8 @@ $(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.ske
 $(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
 $(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h
 $(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h
+$(OUTPUT)/bench_sockmap.o: $(OUTPUT)/bench_sockmap_prog.skel.h
+$(OUTPUT)/bench_lpm_trie_map.o: $(OUTPUT)/lpm_trie_bench.skel.h $(OUTPUT)/lpm_trie_map.skel.h
 $(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
 $(OUTPUT)/bench: LDLIBS += -lm
 $(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -876,10 +861,17 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
 		 $(OUTPUT)/bench_local_storage_create.o \
 		 $(OUTPUT)/bench_htab_mem.o \
 		 $(OUTPUT)/bench_bpf_crypto.o \
+		 $(OUTPUT)/bench_sockmap.o \
+		 $(OUTPUT)/bench_lpm_trie_map.o \
 		 #
 	$(call msg,BINARY,,$@)
 	$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
 
+# This works around GCC warning about snprintf truncating strings like:
+#
+#   char a[PATH_MAX], b[PATH_MAX];
+#   snprintf(a, "%s/foo", b);      // triggers -Wformat-truncation
+$(OUTPUT)/veristat.o: CFLAGS += -Wno-format-truncation
 $(OUTPUT)/veristat.o: $(BPFOBJ)
 $(OUTPUT)/veristat: $(OUTPUT)/veristat.o
 	$(call msg,BINARY,,$@)
@@ -894,14 +886,12 @@ $(OUTPUT)/uprobe_multi: uprobe_multi.c uprobe_multi.ld
 
 EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR)			\
 	prog_tests/tests.h map_tests/tests.h verifier/tests.h		\
-	feature bpftool 						\
+	feature bpftool $(TEST_KMOD_TARGETS)				\
 	$(addprefix $(OUTPUT)/,*.o *.d *.skel.h *.lskel.h *.subskel.h	\
-			       no_alu32 cpuv4 bpf_gcc bpf_testmod.ko	\
-			       bpf_test_no_cfi.ko			\
-			       bpf_test_modorder_x.ko			\
-			       bpf_test_modorder_y.ko			\
+			       no_alu32 cpuv4 bpf_gcc			\
 			       liburandom_read.so)			\
-	$(OUTPUT)/FEATURE-DUMP.selftests
+	$(OUTPUT)/FEATURE-DUMP.selftests				\
+	test_progs_verification_cert
 
 .PHONY: docs docs-clean
 
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
index eb6a4fea8c79..f7f9e7088bb3 100644
--- a/tools/testing/selftests/bpf/Makefile.docs
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -7,12 +7,6 @@ INSTALL ?= install
 RM ?= rm -f
 RMDIR ?= rmdir --ignore-fail-on-non-empty
 
-ifeq ($(V),1)
-  Q =
-else
-  Q = @
-endif
-
 prefix ?= /usr/local
 mandir ?= $(prefix)/man
 man2dir = $(mandir)/man2
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 1bd403a5ef7b..bd29bb2e6cb5 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -283,6 +283,8 @@ extern struct argp bench_local_storage_create_argp;
 extern struct argp bench_htab_mem_argp;
 extern struct argp bench_trigger_batch_argp;
 extern struct argp bench_crypto_argp;
+extern struct argp bench_sockmap_argp;
+extern struct argp bench_lpm_trie_map_argp;
 
 static const struct argp_child bench_parsers[] = {
 	{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -297,6 +299,8 @@ static const struct argp_child bench_parsers[] = {
 	{ &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 },
 	{ &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 },
 	{ &bench_crypto_argp, 0, "bpf crypto benchmark", 0 },
+	{ &bench_sockmap_argp, 0, "bpf sockmap benchmark", 0 },
+	{ &bench_lpm_trie_map_argp, 0, "LPM trie map benchmark", 0 },
 	{},
 };
 
@@ -497,7 +501,7 @@ extern const struct bench bench_rename_rawtp;
 extern const struct bench bench_rename_fentry;
 extern const struct bench bench_rename_fexit;
 
-/* pure counting benchmarks to establish theoretical lmits */
+/* pure counting benchmarks to establish theoretical limits */
 extern const struct bench bench_trig_usermode_count;
 extern const struct bench bench_trig_syscall_count;
 extern const struct bench bench_trig_kernel_count;
@@ -508,6 +512,8 @@ extern const struct bench bench_trig_kretprobe;
 extern const struct bench bench_trig_kprobe_multi;
 extern const struct bench bench_trig_kretprobe_multi;
 extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_kprobe_multi_all;
+extern const struct bench bench_trig_kretprobe_multi_all;
 extern const struct bench bench_trig_fexit;
 extern const struct bench bench_trig_fmodret;
 extern const struct bench bench_trig_tp;
@@ -526,6 +532,12 @@ extern const struct bench bench_trig_uprobe_multi_push;
 extern const struct bench bench_trig_uretprobe_multi_push;
 extern const struct bench bench_trig_uprobe_multi_ret;
 extern const struct bench bench_trig_uretprobe_multi_ret;
+#ifdef __x86_64__
+extern const struct bench bench_trig_uprobe_nop5;
+extern const struct bench bench_trig_uretprobe_nop5;
+extern const struct bench bench_trig_uprobe_multi_nop5;
+extern const struct bench bench_trig_uretprobe_multi_nop5;
+#endif
 
 extern const struct bench bench_rb_libbpf;
 extern const struct bench bench_rb_custom;
@@ -549,6 +561,14 @@ extern const struct bench bench_local_storage_create;
 extern const struct bench bench_htab_mem;
 extern const struct bench bench_crypto_encrypt;
 extern const struct bench bench_crypto_decrypt;
+extern const struct bench bench_sockmap;
+extern const struct bench bench_lpm_trie_noop;
+extern const struct bench bench_lpm_trie_baseline;
+extern const struct bench bench_lpm_trie_lookup;
+extern const struct bench bench_lpm_trie_insert;
+extern const struct bench bench_lpm_trie_update;
+extern const struct bench bench_lpm_trie_delete;
+extern const struct bench bench_lpm_trie_free;
 
 static const struct bench *benchs[] = {
 	&bench_count_global,
@@ -569,6 +589,8 @@ static const struct bench *benchs[] = {
 	&bench_trig_kprobe_multi,
 	&bench_trig_kretprobe_multi,
 	&bench_trig_fentry,
+	&bench_trig_kprobe_multi_all,
+	&bench_trig_kretprobe_multi_all,
 	&bench_trig_fexit,
 	&bench_trig_fmodret,
 	&bench_trig_tp,
@@ -586,6 +608,12 @@ static const struct bench *benchs[] = {
 	&bench_trig_uretprobe_multi_push,
 	&bench_trig_uprobe_multi_ret,
 	&bench_trig_uretprobe_multi_ret,
+#ifdef __x86_64__
+	&bench_trig_uprobe_nop5,
+	&bench_trig_uretprobe_nop5,
+	&bench_trig_uprobe_multi_nop5,
+	&bench_trig_uretprobe_multi_nop5,
+#endif
 	/* ringbuf/perfbuf benchmarks */
 	&bench_rb_libbpf,
 	&bench_rb_custom,
@@ -609,6 +637,14 @@ static const struct bench *benchs[] = {
 	&bench_htab_mem,
 	&bench_crypto_encrypt,
 	&bench_crypto_decrypt,
+	&bench_sockmap,
+	&bench_lpm_trie_noop,
+	&bench_lpm_trie_baseline,
+	&bench_lpm_trie_lookup,
+	&bench_lpm_trie_insert,
+	&bench_lpm_trie_update,
+	&bench_lpm_trie_delete,
+	&bench_lpm_trie_free,
 };
 
 static void find_benchmark(void)
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index 005c401b3e22..bea323820ffb 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -46,6 +46,7 @@ struct bench_res {
 	unsigned long gp_ns;
 	unsigned long gp_ct;
 	unsigned int stime;
+	unsigned long duration_ns;
 };
 
 struct bench {
diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
index 926ee822143e..297e32390cd1 100644
--- a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
+++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
@@ -279,6 +279,7 @@ static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value)
 	}
 
 	got = read(fd, buf, sizeof(buf) - 1);
+	close(fd);
 	if (got <= 0) {
 		*value = 0;
 		return;
@@ -286,8 +287,6 @@ static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value)
 	buf[got] = 0;
 
 	*value = strtoull(buf, NULL, 0);
-
-	close(fd);
 }
 
 static void htab_mem_measure(struct bench_res *res)
diff --git a/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
new file mode 100644
index 000000000000..246f6cb3387d
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_lpm_trie_map.c
@@ -0,0 +1,555 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Cloudflare */
+
+/*
+ * All of these benchmarks operate on tries with keys in the range
+ * [0, args.nr_entries), i.e. there are no gaps or partially filled
+ * branches of the trie for any key < args.nr_entries.
+ *
+ * This gives an idea of worst-case behaviour.
+ */
+
+#include <argp.h>
+#include <linux/time64.h>
+#include <linux/if_ether.h>
+#include "lpm_trie_bench.skel.h"
+#include "lpm_trie_map.skel.h"
+#include "bench.h"
+#include "testing_helpers.h"
+#include "progs/lpm_trie.h"
+
+static struct ctx {
+	struct lpm_trie_bench *bench;
+} ctx;
+
+static struct {
+	__u32 nr_entries;
+	__u32 prefixlen;
+	bool random;
+} args = {
+	.nr_entries = 0,
+	.prefixlen = 32,
+	.random = false,
+};
+
+enum {
+	ARG_NR_ENTRIES = 9000,
+	ARG_PREFIX_LEN,
+	ARG_RANDOM,
+};
+
+static const struct argp_option opts[] = {
+	{ "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0,
+	  "Number of unique entries in the LPM trie" },
+	{ "prefix_len", ARG_PREFIX_LEN, "PREFIX_LEN", 0,
+	  "Number of prefix bits to use in the LPM trie" },
+	{ "random", ARG_RANDOM, NULL, 0, "Access random keys during op" },
+	{},
+};
+
+static error_t lpm_parse_arg(int key, char *arg, struct argp_state *state)
+{
+	long ret;
+
+	switch (key) {
+	case ARG_NR_ENTRIES:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1 || ret > UINT_MAX) {
+			fprintf(stderr, "Invalid nr_entries count.");
+			argp_usage(state);
+		}
+		args.nr_entries = ret;
+		break;
+	case ARG_PREFIX_LEN:
+		ret = strtol(arg, NULL, 10);
+		if (ret < 1 || ret > UINT_MAX) {
+			fprintf(stderr, "Invalid prefix_len value.");
+			argp_usage(state);
+		}
+		args.prefixlen = ret;
+		break;
+	case ARG_RANDOM:
+		args.random = true;
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+	return 0;
+}
+
+const struct argp bench_lpm_trie_map_argp = {
+	.options = opts,
+	.parser = lpm_parse_arg,
+};
+
+static void validate_common(void)
+{
+	if (env.consumer_cnt != 0) {
+		fprintf(stderr, "benchmark doesn't support consumer\n");
+		exit(1);
+	}
+
+	if (args.nr_entries == 0) {
+		fprintf(stderr, "Missing --nr_entries parameter\n");
+		exit(1);
+	}
+
+	if ((1UL << args.prefixlen) < args.nr_entries) {
+		fprintf(stderr, "prefix_len value too small for nr_entries\n");
+		exit(1);
+	}
+}
+
+static void lpm_insert_validate(void)
+{
+	validate_common();
+
+	if (env.producer_cnt != 1) {
+		fprintf(stderr, "lpm-trie-insert requires a single producer\n");
+		exit(1);
+	}
+
+	if (args.random) {
+		fprintf(stderr, "lpm-trie-insert does not support --random\n");
+		exit(1);
+	}
+}
+
+static void lpm_delete_validate(void)
+{
+	validate_common();
+
+	if (env.producer_cnt != 1) {
+		fprintf(stderr, "lpm-trie-delete requires a single producer\n");
+		exit(1);
+	}
+
+	if (args.random) {
+		fprintf(stderr, "lpm-trie-delete does not support --random\n");
+		exit(1);
+	}
+}
+
+static void lpm_free_validate(void)
+{
+	validate_common();
+
+	if (env.producer_cnt != 1) {
+		fprintf(stderr, "lpm-trie-free requires a single producer\n");
+		exit(1);
+	}
+
+	if (args.random) {
+		fprintf(stderr, "lpm-trie-free does not support --random\n");
+		exit(1);
+	}
+}
+
+static struct trie_key *keys;
+static __u32 *vals;
+
+static void fill_map(int map_fd)
+{
+	int err;
+
+	DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+		.elem_flags = 0,
+		.flags = 0,
+	);
+
+	err = bpf_map_update_batch(map_fd, keys, vals, &args.nr_entries, &opts);
+	if (err) {
+		fprintf(stderr, "failed to batch update keys to map: %d\n",
+			-err);
+		exit(1);
+	}
+}
+
+static void empty_map(int map_fd)
+{
+	int err;
+
+	DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+		.elem_flags = 0,
+		.flags = 0,
+	);
+
+	err = bpf_map_delete_batch(map_fd, keys, &args.nr_entries, &opts);
+	if (err) {
+		fprintf(stderr, "failed to batch delete keys for map: %d\n",
+			-err);
+		exit(1);
+	}
+}
+
+static void attach_prog(void)
+{
+	int i;
+
+	ctx.bench = lpm_trie_bench__open_and_load();
+	if (!ctx.bench) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+
+	ctx.bench->bss->nr_entries = args.nr_entries;
+	ctx.bench->bss->prefixlen = args.prefixlen;
+	ctx.bench->bss->random = args.random;
+
+	if (lpm_trie_bench__attach(ctx.bench)) {
+		fprintf(stderr, "failed to attach skeleton\n");
+		exit(1);
+	}
+
+	keys = calloc(args.nr_entries, sizeof(*keys));
+	vals = calloc(args.nr_entries, sizeof(*vals));
+
+	for (i = 0; i < args.nr_entries; i++) {
+		struct trie_key *k = &keys[i];
+		__u32 *v = &vals[i];
+
+		k->prefixlen = args.prefixlen;
+		k->data = i;
+		*v = 1;
+	}
+}
+
+static void attach_prog_and_fill_map(void)
+{
+	int fd;
+
+	attach_prog();
+
+	fd = bpf_map__fd(ctx.bench->maps.trie_map);
+	fill_map(fd);
+}
+
+static void lpm_noop_setup(void)
+{
+	attach_prog();
+	ctx.bench->bss->op = LPM_OP_NOOP;
+}
+
+static void lpm_baseline_setup(void)
+{
+	attach_prog();
+	ctx.bench->bss->op = LPM_OP_BASELINE;
+}
+
+static void lpm_lookup_setup(void)
+{
+	attach_prog_and_fill_map();
+	ctx.bench->bss->op = LPM_OP_LOOKUP;
+}
+
+static void lpm_insert_setup(void)
+{
+	attach_prog();
+	ctx.bench->bss->op = LPM_OP_INSERT;
+}
+
+static void lpm_update_setup(void)
+{
+	attach_prog_and_fill_map();
+	ctx.bench->bss->op = LPM_OP_UPDATE;
+}
+
+static void lpm_delete_setup(void)
+{
+	attach_prog_and_fill_map();
+	ctx.bench->bss->op = LPM_OP_DELETE;
+}
+
+static void lpm_free_setup(void)
+{
+	attach_prog();
+	ctx.bench->bss->op = LPM_OP_FREE;
+}
+
+static void lpm_measure(struct bench_res *res)
+{
+	res->hits = atomic_swap(&ctx.bench->bss->hits, 0);
+	res->duration_ns = atomic_swap(&ctx.bench->bss->duration_ns, 0);
+}
+
+static void bench_reinit_map(void)
+{
+	int fd = bpf_map__fd(ctx.bench->maps.trie_map);
+
+	switch (ctx.bench->bss->op) {
+	case LPM_OP_INSERT:
+		/* trie_map needs to be emptied */
+		empty_map(fd);
+		break;
+	case LPM_OP_DELETE:
+		/* trie_map needs to be refilled */
+		fill_map(fd);
+		break;
+	default:
+		fprintf(stderr, "Unexpected REINIT return code for op %d\n",
+				ctx.bench->bss->op);
+		exit(1);
+	}
+}
+
+/* For NOOP, BASELINE, LOOKUP, INSERT, UPDATE, and DELETE */
+static void *lpm_producer(void *unused __always_unused)
+{
+	int err;
+	char in[ETH_HLEN]; /* unused */
+
+	LIBBPF_OPTS(bpf_test_run_opts, opts, .data_in = in,
+		    .data_size_in = sizeof(in), .repeat = 1, );
+
+	while (true) {
+		int fd = bpf_program__fd(ctx.bench->progs.run_bench);
+		err = bpf_prog_test_run_opts(fd, &opts);
+		if (err) {
+			fprintf(stderr, "failed to run BPF prog: %d\n", err);
+			exit(1);
+		}
+
+		/* Check for kernel error code */
+		if ((int)opts.retval < 0) {
+			fprintf(stderr, "BPF prog returned error: %d\n",
+				opts.retval);
+			exit(1);
+		}
+
+		switch (opts.retval) {
+		case LPM_BENCH_SUCCESS:
+			break;
+		case LPM_BENCH_REINIT_MAP:
+			bench_reinit_map();
+			break;
+		default:
+			fprintf(stderr, "Unexpected BPF prog return code %d for op %d\n",
+					opts.retval, ctx.bench->bss->op);
+			exit(1);
+		}
+	}
+
+	return NULL;
+}
+
+static void *lpm_free_producer(void *unused __always_unused)
+{
+	while (true) {
+		struct lpm_trie_map *skel;
+
+		skel = lpm_trie_map__open_and_load();
+		if (!skel) {
+			fprintf(stderr, "failed to open skeleton\n");
+			exit(1);
+		}
+
+		fill_map(bpf_map__fd(skel->maps.trie_free_map));
+		lpm_trie_map__destroy(skel);
+	}
+
+	return NULL;
+}
+
+/*
+ * The standard bench op_report_*() functions assume measurements are
+ * taken over a 1-second interval but operations that modify the map
+ * (INSERT, DELETE, and FREE) cannot run indefinitely without
+ * "resetting" the map to the initial state. Depending on the size of
+ * the map, this likely needs to happen before the 1-second timer fires.
+ *
+ * Calculate the fraction of a second over which the op measurement was
+ * taken (to ignore any time spent doing the reset) and report the
+ * throughput results per second.
+ */
+static void frac_second_report_progress(int iter, struct bench_res *res,
+					long delta_ns, double rate_divisor,
+					char rate)
+{
+	double hits_per_sec, hits_per_prod;
+
+	hits_per_sec = res->hits / rate_divisor /
+		(res->duration_ns / (double)NSEC_PER_SEC);
+	hits_per_prod = hits_per_sec / env.producer_cnt;
+
+	printf("Iter %3d (%7.3lfus): ", iter,
+	       (delta_ns - NSEC_PER_SEC) / 1000.0);
+	printf("hits %8.3lf%c/s (%7.3lf%c/prod)\n", hits_per_sec, rate,
+	       hits_per_prod, rate);
+}
+
+static void frac_second_report_final(struct bench_res res[], int res_cnt,
+				     double lat_divisor, double rate_divisor,
+				     char rate, const char *unit)
+{
+	double hits_mean = 0.0, hits_stddev = 0.0;
+	double latency = 0.0;
+	int i;
+
+	for (i = 0; i < res_cnt; i++) {
+		double val = res[i].hits / rate_divisor /
+			     (res[i].duration_ns / (double)NSEC_PER_SEC);
+		hits_mean += val / (0.0 + res_cnt);
+		latency += res[i].duration_ns / res[i].hits / (0.0 + res_cnt);
+	}
+
+	if (res_cnt > 1) {
+		for (i = 0; i < res_cnt; i++) {
+			double val =
+				res[i].hits / rate_divisor /
+				(res[i].duration_ns / (double)NSEC_PER_SEC);
+			hits_stddev += (hits_mean - val) * (hits_mean - val) /
+				       (res_cnt - 1.0);
+		}
+
+		hits_stddev = sqrt(hits_stddev);
+	}
+	printf("Summary: throughput %8.3lf \u00B1 %5.3lf %c ops/s (%7.3lf%c ops/prod), ",
+	       hits_mean, hits_stddev, rate, hits_mean / env.producer_cnt,
+	       rate);
+	printf("latency %8.3lf %s/op\n",
+	       latency / lat_divisor / env.producer_cnt, unit);
+}
+
+static void insert_ops_report_progress(int iter, struct bench_res *res,
+				       long delta_ns)
+{
+	double rate_divisor = 1000000.0;
+	char rate = 'M';
+
+	frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void delete_ops_report_progress(int iter, struct bench_res *res,
+				       long delta_ns)
+{
+	double rate_divisor = 1000000.0;
+	char rate = 'M';
+
+	frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void free_ops_report_progress(int iter, struct bench_res *res,
+				     long delta_ns)
+{
+	double rate_divisor = 1000.0;
+	char rate = 'K';
+
+	frac_second_report_progress(iter, res, delta_ns, rate_divisor, rate);
+}
+
+static void insert_ops_report_final(struct bench_res res[], int res_cnt)
+{
+	double lat_divisor = 1.0;
+	double rate_divisor = 1000000.0;
+	const char *unit = "ns";
+	char rate = 'M';
+
+	frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+				 unit);
+}
+
+static void delete_ops_report_final(struct bench_res res[], int res_cnt)
+{
+	double lat_divisor = 1.0;
+	double rate_divisor = 1000000.0;
+	const char *unit = "ns";
+	char rate = 'M';
+
+	frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+				 unit);
+}
+
+static void free_ops_report_final(struct bench_res res[], int res_cnt)
+{
+	double lat_divisor = 1000000.0;
+	double rate_divisor = 1000.0;
+	const char *unit = "ms";
+	char rate = 'K';
+
+	frac_second_report_final(res, res_cnt, lat_divisor, rate_divisor, rate,
+				 unit);
+}
+
+/* noop bench measures harness-overhead */
+const struct bench bench_lpm_trie_noop = {
+	.name = "lpm-trie-noop",
+	.argp = &bench_lpm_trie_map_argp,
+	.validate = validate_common,
+	.setup = lpm_noop_setup,
+	.producer_thread = lpm_producer,
+	.measure = lpm_measure,
+	.report_progress = ops_report_progress,
+	.report_final = ops_report_final,
+};
+
+/* baseline overhead for lookup and update */
+const struct bench bench_lpm_trie_baseline = {
+	.name = "lpm-trie-baseline",
+	.argp = &bench_lpm_trie_map_argp,
+	.validate = validate_common,
+	.setup = lpm_baseline_setup,
+	.producer_thread = lpm_producer,
+	.measure = lpm_measure,
+	.report_progress = ops_report_progress,
+	.report_final = ops_report_final,
+};
+
+/* measure cost of doing a lookup on existing entries in a full trie */
+const struct bench bench_lpm_trie_lookup = {
+	.name = "lpm-trie-lookup",
+	.argp = &bench_lpm_trie_map_argp,
+	.validate = validate_common,
+	.setup = lpm_lookup_setup,
+	.producer_thread = lpm_producer,
+	.measure = lpm_measure,
+	.report_progress = ops_report_progress,
+	.report_final = ops_report_final,
+};
+
+/* measure cost of inserting new entries into an empty trie */
+const struct bench bench_lpm_trie_insert = {
+	.name = "lpm-trie-insert",
+	.argp = &bench_lpm_trie_map_argp,
+	.validate = lpm_insert_validate,
+	.setup = lpm_insert_setup,
+	.producer_thread = lpm_producer,
+	.measure = lpm_measure,
+	.report_progress = insert_ops_report_progress,
+	.report_final = insert_ops_report_final,
+};
+
+/* measure cost of updating existing entries in a full trie */
+const struct bench bench_lpm_trie_update = {
+	.name = "lpm-trie-update",
+	.argp = &bench_lpm_trie_map_argp,
+	.validate = validate_common,
+	.setup = lpm_update_setup,
+	.producer_thread = lpm_producer,
+	.measure = lpm_measure,
+	.report_progress = ops_report_progress,
+	.report_final = ops_report_final,
+};
+
+/* measure cost of deleting existing entries from a full trie */
+const struct bench bench_lpm_trie_delete = {
+	.name = "lpm-trie-delete",
+	.argp = &bench_lpm_trie_map_argp,
+	.validate = lpm_delete_validate,
+	.setup = lpm_delete_setup,
+	.producer_thread = lpm_producer,
+	.measure = lpm_measure,
+	.report_progress = delete_ops_report_progress,
+	.report_final = delete_ops_report_final,
+};
+
+/* measure cost of freeing a full trie */
+const struct bench bench_lpm_trie_free = {
+	.name = "lpm-trie-free",
+	.argp = &bench_lpm_trie_map_argp,
+	.validate = lpm_free_validate,
+	.setup = lpm_free_setup,
+	.producer_thread = lpm_free_producer,
+	.measure = lpm_measure,
+	.report_progress = free_ops_report_progress,
+	.report_final = free_ops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index e1ee979e6acc..01bdce692799 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -19,6 +19,8 @@ static struct {
 	int ringbuf_sz; /* per-ringbuf, in bytes */
 	bool ringbuf_use_output; /* use slower output API */
 	int perfbuf_sz; /* per-CPU size, in pages */
+	bool overwrite;
+	bool bench_producer;
 } args = {
 	.back2back = false,
 	.batch_cnt = 500,
@@ -27,6 +29,8 @@ static struct {
 	.ringbuf_sz = 512 * 1024,
 	.ringbuf_use_output = false,
 	.perfbuf_sz = 128,
+	.overwrite = false,
+	.bench_producer = false,
 };
 
 enum {
@@ -35,6 +39,8 @@ enum {
 	ARG_RB_BATCH_CNT = 2002,
 	ARG_RB_SAMPLED = 2003,
 	ARG_RB_SAMPLE_RATE = 2004,
+	ARG_RB_OVERWRITE = 2005,
+	ARG_RB_BENCH_PRODUCER = 2006,
 };
 
 static const struct argp_option opts[] = {
@@ -43,6 +49,8 @@ static const struct argp_option opts[] = {
 	{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
 	{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
 	{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
+	{ "rb-overwrite", ARG_RB_OVERWRITE, NULL, 0, "Overwrite mode"},
+	{ "rb-bench-producer", ARG_RB_BENCH_PRODUCER, NULL, 0, "Benchmark producer"},
 	{},
 };
 
@@ -72,6 +80,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 			argp_usage(state);
 		}
 		break;
+	case ARG_RB_OVERWRITE:
+		args.overwrite = true;
+		break;
+	case ARG_RB_BENCH_PRODUCER:
+		args.bench_producer = true;
+		break;
 	default:
 		return ARGP_ERR_UNKNOWN;
 	}
@@ -95,8 +109,33 @@ static inline void bufs_trigger_batch(void)
 
 static void bufs_validate(void)
 {
-	if (env.consumer_cnt != 1) {
-		fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n");
+	if (args.bench_producer && strcmp(env.bench_name, "rb-libbpf")) {
+		fprintf(stderr, "--rb-bench-producer only works with rb-libbpf!\n");
+		exit(1);
+	}
+
+	if (args.overwrite && !args.bench_producer) {
+		fprintf(stderr, "overwrite mode only works with --rb-bench-producer for now!\n");
+		exit(1);
+	}
+
+	if (args.bench_producer && env.consumer_cnt != 0) {
+		fprintf(stderr, "no consumer is needed for --rb-bench-producer!\n");
+		exit(1);
+	}
+
+	if (args.bench_producer && args.back2back) {
+		fprintf(stderr, "back-to-back mode makes no sense for --rb-bench-producer!\n");
+		exit(1);
+	}
+
+	if (args.bench_producer && args.sampled) {
+		fprintf(stderr, "sampling mode makes no sense for --rb-bench-producer!\n");
+		exit(1);
+	}
+
+	if (!args.bench_producer && env.consumer_cnt != 1) {
+		fprintf(stderr, "benchmarks without --rb-bench-producer require exactly one consumer!\n");
 		exit(1);
 	}
 
@@ -128,12 +167,17 @@ static void ringbuf_libbpf_measure(struct bench_res *res)
 {
 	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
 
-	res->hits = atomic_swap(&buf_hits.value, 0);
+	if (args.bench_producer)
+		res->hits = atomic_swap(&ctx->skel->bss->hits, 0);
+	else
+		res->hits = atomic_swap(&buf_hits.value, 0);
 	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
 }
 
 static struct ringbuf_bench *ringbuf_setup_skeleton(void)
 {
+	__u32 flags;
+	struct bpf_map *ringbuf;
 	struct ringbuf_bench *skel;
 
 	setup_libbpf();
@@ -146,12 +190,19 @@ static struct ringbuf_bench *ringbuf_setup_skeleton(void)
 
 	skel->rodata->batch_cnt = args.batch_cnt;
 	skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
+	skel->rodata->bench_producer = args.bench_producer;
 
 	if (args.sampled)
 		/* record data + header take 16 bytes */
 		skel->rodata->wakeup_data_size = args.sample_rate * 16;
 
-	bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
+	ringbuf = skel->maps.ringbuf;
+	if (args.overwrite) {
+		flags = bpf_map__map_flags(ringbuf) | BPF_F_RB_OVERWRITE;
+		bpf_map__set_map_flags(ringbuf, flags);
+	}
+
+	bpf_map__set_max_entries(ringbuf, args.ringbuf_sz);
 
 	if (ringbuf_bench__load(skel)) {
 		fprintf(stderr, "failed to load skeleton\n");
@@ -171,10 +222,12 @@ static void ringbuf_libbpf_setup(void)
 {
 	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
 	struct bpf_link *link;
+	int map_fd;
 
 	ctx->skel = ringbuf_setup_skeleton();
-	ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
-					buf_process_sample, NULL, NULL);
+
+	map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
+	ctx->ringbuf = ring_buffer__new(map_fd, buf_process_sample, NULL, NULL);
 	if (!ctx->ringbuf) {
 		fprintf(stderr, "failed to create ringbuf\n");
 		exit(1);
diff --git a/tools/testing/selftests/bpf/benchs/bench_sockmap.c b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
new file mode 100644
index 000000000000..cfc072aa7fff
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_sockmap.c
@@ -0,0 +1,599 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <error.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <sys/sendfile.h>
+#include <arpa/inet.h>
+#include <fcntl.h>
+#include <argp.h>
+#include "bench.h"
+#include "bench_sockmap_prog.skel.h"
+#include "bpf_util.h"
+
+#define FILE_SIZE (128 * 1024)
+#define DATA_REPEAT_SIZE 10
+
+static const char snd_data[DATA_REPEAT_SIZE] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+
+/* c1 <-> [p1, p2] <-> c2
+ * RX bench(BPF_SK_SKB_STREAM_VERDICT):
+ *	ARG_FW_RX_PASS:
+ *		send(p2) -> recv(c2) -> bpf skb passthrough -> recv(c2)
+ *	ARG_FW_RX_VERDICT_EGRESS:
+ *		send(c1) -> verdict skb to tx queuec of p2 -> recv(c2)
+ *	ARG_FW_RX_VERDICT_INGRESS:
+ *		send(c1) -> verdict skb to rx queuec of c2 -> recv(c2)
+ *
+ * TX bench(BPF_SK_MSG_VERDIC):
+ *	ARG_FW_TX_PASS:
+ *		send(p2) -> bpf msg passthrough -> send(p2) -> recv(c2)
+ *	ARG_FW_TX_VERDICT_INGRESS:
+ *		send(p2) -> verdict msg to rx queue of c2 -> recv(c2)
+ *	ARG_FW_TX_VERDICT_EGRESS:
+ *		send(p1) -> verdict msg to tx queue of p2 -> recv(c2)
+ */
+enum SOCKMAP_ARG_FLAG {
+	ARG_FW_RX_NORMAL = 11000,
+	ARG_FW_RX_PASS,
+	ARG_FW_RX_VERDICT_EGRESS,
+	ARG_FW_RX_VERDICT_INGRESS,
+	ARG_FW_TX_NORMAL,
+	ARG_FW_TX_PASS,
+	ARG_FW_TX_VERDICT_INGRESS,
+	ARG_FW_TX_VERDICT_EGRESS,
+	ARG_CTL_RX_STRP,
+	ARG_CONSUMER_DELAY_TIME,
+	ARG_PRODUCER_DURATION,
+};
+
+#define TXMODE_NORMAL()				\
+	((ctx.mode) == ARG_FW_TX_NORMAL)
+
+#define TXMODE_BPF_INGRESS()			\
+	((ctx.mode) == ARG_FW_TX_VERDICT_INGRESS)
+
+#define TXMODE_BPF_EGRESS()			\
+	((ctx.mode) == ARG_FW_TX_VERDICT_EGRESS)
+
+#define TXMODE_BPF_PASS()			\
+	((ctx.mode) == ARG_FW_TX_PASS)
+
+#define TXMODE_BPF() (				\
+	TXMODE_BPF_PASS() ||			\
+	TXMODE_BPF_INGRESS() ||			\
+	TXMODE_BPF_EGRESS())
+
+#define TXMODE() (				\
+	TXMODE_NORMAL() ||			\
+	TXMODE_BPF())
+
+#define RXMODE_NORMAL()				\
+	((ctx.mode) == ARG_FW_RX_NORMAL)
+
+#define RXMODE_BPF_PASS()			\
+	((ctx.mode) == ARG_FW_RX_PASS)
+
+#define RXMODE_BPF_VERDICT_EGRESS()		\
+	((ctx.mode) == ARG_FW_RX_VERDICT_EGRESS)
+
+#define RXMODE_BPF_VERDICT_INGRESS()		\
+	((ctx.mode) == ARG_FW_RX_VERDICT_INGRESS)
+
+#define RXMODE_BPF_VERDICT() (			\
+	RXMODE_BPF_VERDICT_INGRESS() ||		\
+	RXMODE_BPF_VERDICT_EGRESS())
+
+#define RXMODE_BPF() (				\
+	RXMODE_BPF_PASS() ||			\
+	RXMODE_BPF_VERDICT())
+
+#define RXMODE() (				\
+	RXMODE_NORMAL() ||			\
+	RXMODE_BPF())
+
+static struct socmap_ctx {
+	struct bench_sockmap_prog *skel;
+	enum SOCKMAP_ARG_FLAG mode;
+	#define c1	fds[0]
+	#define p1	fds[1]
+	#define c2	fds[2]
+	#define p2	fds[3]
+	#define sfd	fds[4]
+	int		fds[5];
+	long		send_calls;
+	long		read_calls;
+	long		prod_send;
+	long		user_read;
+	int		file_size;
+	int		delay_consumer;
+	int		prod_run_time;
+	int		strp_size;
+} ctx = {
+	.prod_send	= 0,
+	.user_read	= 0,
+	.file_size	= FILE_SIZE,
+	.mode		= ARG_FW_RX_VERDICT_EGRESS,
+	.fds		= {0},
+	.delay_consumer = 0,
+	.prod_run_time	= 0,
+	.strp_size	= 0,
+};
+
+static void bench_sockmap_prog_destroy(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ctx.fds); i++) {
+		if (ctx.fds[i] > 0)
+			close(ctx.fds[i]);
+	}
+
+	bench_sockmap_prog__destroy(ctx.skel);
+}
+
+static void init_addr(struct sockaddr_storage *ss,
+		      socklen_t *len)
+{
+	struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
+
+	addr4->sin_family = AF_INET;
+	addr4->sin_port = 0;
+	addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+	*len = sizeof(*addr4);
+}
+
+static bool set_non_block(int fd, bool blocking)
+{
+	int flags = fcntl(fd, F_GETFL, 0);
+
+	if (flags == -1)
+		return false;
+	flags = blocking ? (flags | O_NONBLOCK) : (flags & ~O_NONBLOCK);
+	return (fcntl(fd, F_SETFL, flags) == 0);
+}
+
+static int create_pair(int *c, int *p, int type)
+{
+	struct sockaddr_storage addr;
+	int err, cfd, pfd;
+	socklen_t addr_len = sizeof(struct sockaddr_storage);
+
+	err = getsockname(ctx.sfd, (struct sockaddr *)&addr, &addr_len);
+	if (err) {
+		fprintf(stderr, "getsockname error %d\n", errno);
+		return err;
+	}
+	cfd = socket(AF_INET, type, 0);
+	if (cfd < 0) {
+		fprintf(stderr, "socket error %d\n", errno);
+		return err;
+	}
+
+	err = connect(cfd, (struct sockaddr *)&addr, addr_len);
+	if (err && errno != EINPROGRESS) {
+		fprintf(stderr, "connect error %d\n", errno);
+		return err;
+	}
+
+	pfd = accept(ctx.sfd, NULL, NULL);
+	if (pfd < 0) {
+		fprintf(stderr, "accept error %d\n", errno);
+		return err;
+	}
+	*c = cfd;
+	*p = pfd;
+	return 0;
+}
+
+static int create_sockets(void)
+{
+	struct sockaddr_storage addr;
+	int err, one = 1;
+	socklen_t addr_len;
+
+	init_addr(&addr, &addr_len);
+	ctx.sfd = socket(AF_INET, SOCK_STREAM, 0);
+	if (ctx.sfd < 0) {
+		fprintf(stderr, "socket error:%d\n", errno);
+		return ctx.sfd;
+	}
+	err = setsockopt(ctx.sfd, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+	if (err) {
+		fprintf(stderr, "setsockopt error:%d\n", errno);
+		return err;
+	}
+
+	err = bind(ctx.sfd, (struct sockaddr *)&addr, addr_len);
+	if (err) {
+		fprintf(stderr, "bind error:%d\n", errno);
+		return err;
+	}
+
+	err = listen(ctx.sfd, SOMAXCONN);
+	if (err) {
+		fprintf(stderr, "listen error:%d\n", errno);
+		return err;
+	}
+
+	err = create_pair(&ctx.c1, &ctx.p1, SOCK_STREAM);
+	if (err) {
+		fprintf(stderr, "create_pair 1 error\n");
+		return err;
+	}
+
+	err = create_pair(&ctx.c2, &ctx.p2, SOCK_STREAM);
+	if (err) {
+		fprintf(stderr, "create_pair 2 error\n");
+		return err;
+	}
+	printf("create socket fd c1:%d p1:%d c2:%d p2:%d\n",
+	       ctx.c1, ctx.p1, ctx.c2, ctx.p2);
+	return 0;
+}
+
+static void validate(void)
+{
+	if (env.consumer_cnt != 2 || env.producer_cnt != 1 ||
+	    !env.affinity)
+		goto err;
+	return;
+err:
+	fprintf(stderr, "argument '-c 2 -p 1 -a' is necessary");
+	exit(1);
+}
+
+static int setup_rx_sockmap(void)
+{
+	int verdict, pass, parser, map;
+	int zero = 0, one = 1;
+	int err;
+
+	parser = bpf_program__fd(ctx.skel->progs.prog_skb_parser);
+	verdict = bpf_program__fd(ctx.skel->progs.prog_skb_verdict);
+	pass = bpf_program__fd(ctx.skel->progs.prog_skb_pass);
+	map = bpf_map__fd(ctx.skel->maps.sock_map_rx);
+
+	if (ctx.strp_size != 0) {
+		ctx.skel->bss->pkt_size = ctx.strp_size;
+		err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0);
+		if (err)
+			return err;
+	}
+
+	if (RXMODE_BPF_VERDICT())
+		err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+	else if (RXMODE_BPF_PASS())
+		err = bpf_prog_attach(pass, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (err)
+		return err;
+
+	if (RXMODE_BPF_PASS())
+		return bpf_map_update_elem(map, &zero, &ctx.c2, BPF_NOEXIST);
+
+	err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST);
+	if (err < 0)
+		return err;
+
+	if (RXMODE_BPF_VERDICT_INGRESS()) {
+		ctx.skel->bss->verdict_dir = BPF_F_INGRESS;
+		err = bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST);
+	} else {
+		err = bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST);
+	}
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int setup_tx_sockmap(void)
+{
+	int zero = 0, one = 1;
+	int prog, map;
+	int err;
+
+	map = bpf_map__fd(ctx.skel->maps.sock_map_tx);
+	prog = TXMODE_BPF_PASS() ?
+		bpf_program__fd(ctx.skel->progs.prog_skmsg_pass) :
+		bpf_program__fd(ctx.skel->progs.prog_skmsg_verdict);
+
+	err = bpf_prog_attach(prog, map, BPF_SK_MSG_VERDICT, 0);
+	if (err)
+		return err;
+
+	if (TXMODE_BPF_EGRESS()) {
+		err = bpf_map_update_elem(map, &zero, &ctx.p1, BPF_NOEXIST);
+		err |= bpf_map_update_elem(map, &one, &ctx.p2, BPF_NOEXIST);
+	} else {
+		ctx.skel->bss->verdict_dir = BPF_F_INGRESS;
+		err = bpf_map_update_elem(map, &zero, &ctx.p2, BPF_NOEXIST);
+		err |= bpf_map_update_elem(map, &one, &ctx.c2, BPF_NOEXIST);
+	}
+
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static void setup(void)
+{
+	int err;
+
+	ctx.skel = bench_sockmap_prog__open_and_load();
+	if (!ctx.skel) {
+		fprintf(stderr, "error loading skel\n");
+		exit(1);
+	}
+
+	if (create_sockets()) {
+		fprintf(stderr, "create_net_mode error\n");
+		goto err;
+	}
+
+	if (RXMODE_BPF()) {
+		err = setup_rx_sockmap();
+		if (err) {
+			fprintf(stderr, "setup_rx_sockmap error:%d\n", err);
+			goto err;
+		}
+	} else if (TXMODE_BPF()) {
+		err = setup_tx_sockmap();
+		if (err) {
+			fprintf(stderr, "setup_tx_sockmap error:%d\n", err);
+			goto err;
+		}
+	} else {
+		fprintf(stderr, "unknown sockmap bench mode: %d\n", ctx.mode);
+		goto err;
+	}
+
+	return;
+
+err:
+	bench_sockmap_prog_destroy();
+	exit(1);
+}
+
+static void measure(struct bench_res *res)
+{
+	res->drops = atomic_swap(&ctx.prod_send, 0);
+	res->hits = atomic_swap(&ctx.skel->bss->process_byte, 0);
+	res->false_hits = atomic_swap(&ctx.user_read, 0);
+	res->important_hits = atomic_swap(&ctx.send_calls, 0);
+	res->important_hits |= atomic_swap(&ctx.read_calls, 0) << 32;
+}
+
+static void verify_data(int *check_pos, char *buf, int rcv)
+{
+	for (int i = 0 ; i < rcv; i++) {
+		if (buf[i] != snd_data[(*check_pos) % DATA_REPEAT_SIZE]) {
+			fprintf(stderr, "verify data fail");
+			exit(1);
+		}
+		(*check_pos)++;
+		if (*check_pos >= FILE_SIZE)
+			*check_pos = 0;
+	}
+}
+
+static void *consumer(void *input)
+{
+	int rcv, sent;
+	int check_pos = 0;
+	int tid = (long)input;
+	int recv_buf_size = FILE_SIZE;
+	char *buf = malloc(recv_buf_size);
+	int delay_read = ctx.delay_consumer;
+
+	if (!buf) {
+		fprintf(stderr, "fail to init read buffer");
+		return NULL;
+	}
+
+	while (true) {
+		if (tid == 1) {
+			/* consumer 1 is unused for tx test and stream verdict test */
+			if (RXMODE_BPF() || TXMODE())
+				return NULL;
+			/* it's only for RX_NORMAL which service as reserve-proxy mode */
+			rcv = read(ctx.p1, buf, recv_buf_size);
+			if (rcv < 0) {
+				fprintf(stderr, "fail to read p1");
+				return NULL;
+			}
+
+			sent = send(ctx.p2, buf, recv_buf_size, 0);
+			if (sent < 0) {
+				fprintf(stderr, "fail to send p2");
+				return NULL;
+			}
+		} else {
+			if (delay_read != 0) {
+				if (delay_read < 0)
+					return NULL;
+				sleep(delay_read);
+				delay_read = 0;
+			}
+			/* read real endpoint by consumer 0 */
+			atomic_inc(&ctx.read_calls);
+			rcv = read(ctx.c2, buf, recv_buf_size);
+			if (rcv < 0 && errno != EAGAIN) {
+				fprintf(stderr, "%s fail to read c2 %d\n", __func__, errno);
+				return NULL;
+			}
+			verify_data(&check_pos, buf, rcv);
+			atomic_add(&ctx.user_read, rcv);
+		}
+	}
+
+	return NULL;
+}
+
+static void *producer(void *input)
+{
+	int off = 0, fp, need_sent, sent;
+	int file_size = ctx.file_size;
+	struct timespec ts1, ts2;
+	int target;
+	FILE *file;
+
+	file = tmpfile();
+	if (!file) {
+		fprintf(stderr, "create file for sendfile");
+		return NULL;
+	}
+
+	/* we need simple verify */
+	for (int i = 0; i < file_size; i++) {
+		if (fwrite(&snd_data[off], sizeof(char), 1, file) != 1) {
+			fprintf(stderr, "init tmpfile error");
+			return NULL;
+		}
+		if (++off >= sizeof(snd_data))
+			off = 0;
+	}
+	fflush(file);
+	fseek(file, 0, SEEK_SET);
+
+	fp = fileno(file);
+	need_sent = file_size;
+	clock_gettime(CLOCK_MONOTONIC, &ts1);
+
+	if (RXMODE_BPF_VERDICT())
+		target = ctx.c1;
+	else if (TXMODE_BPF_EGRESS())
+		target = ctx.p1;
+	else
+		target = ctx.p2;
+	set_non_block(target, true);
+	while (true) {
+		if (ctx.prod_run_time) {
+			clock_gettime(CLOCK_MONOTONIC, &ts2);
+			if (ts2.tv_sec - ts1.tv_sec > ctx.prod_run_time)
+				return NULL;
+		}
+
+		errno = 0;
+		atomic_inc(&ctx.send_calls);
+		sent = sendfile(target, fp, NULL, need_sent);
+		if (sent < 0) {
+			if (errno != EAGAIN && errno != ENOMEM && errno != ENOBUFS) {
+				fprintf(stderr, "sendfile return %d, errorno %d:%s\n",
+					sent, errno, strerror(errno));
+				return NULL;
+			}
+			continue;
+		} else if (sent < need_sent) {
+			need_sent -= sent;
+			atomic_add(&ctx.prod_send, sent);
+			continue;
+		}
+		atomic_add(&ctx.prod_send, need_sent);
+		need_sent = file_size;
+		lseek(fp, 0, SEEK_SET);
+	}
+
+	return NULL;
+}
+
+static void report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+	double speed_mbs, prod_mbs, bpf_mbs, send_hz, read_hz;
+
+	prod_mbs = res->drops / 1000000.0 / (delta_ns / 1000000000.0);
+	speed_mbs = res->false_hits / 1000000.0 / (delta_ns / 1000000000.0);
+	bpf_mbs = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
+	send_hz = (res->important_hits & 0xFFFFFFFF) / (delta_ns / 1000000000.0);
+	read_hz = (res->important_hits >> 32) / (delta_ns / 1000000000.0);
+
+	printf("Iter %3d (%7.3lfus): ",
+	       iter, (delta_ns - 1000000000) / 1000.0);
+	printf("Send Speed %8.3lf MB/s (%8.3lf calls/s), BPF Speed %8.3lf MB/s, "
+	       "Rcv Speed %8.3lf MB/s (%8.3lf calls/s)\n",
+	       prod_mbs, send_hz, bpf_mbs, speed_mbs, read_hz);
+}
+
+static void report_final(struct bench_res res[], int res_cnt)
+{
+	double verdict_mbs_mean = 0.0;
+	long verdict_total = 0;
+	int i;
+
+	for (i = 0; i < res_cnt; i++) {
+		verdict_mbs_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+		verdict_total += res[i].hits / 1000000.0;
+	}
+
+	printf("Summary: total trans %8.3lu MB \u00B1 %5.3lf MB/s\n",
+	       verdict_total, verdict_mbs_mean);
+}
+
+static const struct argp_option opts[] = {
+	{ "rx-normal", ARG_FW_RX_NORMAL, NULL, 0,
+		"simple reserve-proxy mode, no bfp enabled"},
+	{ "rx-pass", ARG_FW_RX_PASS, NULL, 0,
+		"run bpf prog but no redir applied"},
+	{ "rx-strp", ARG_CTL_RX_STRP, "Byte", 0,
+		"enable strparser and set the encapsulation size"},
+	{ "rx-verdict-egress", ARG_FW_RX_VERDICT_EGRESS, NULL, 0,
+		"forward data with bpf(stream verdict)"},
+	{ "rx-verdict-ingress", ARG_FW_RX_VERDICT_INGRESS, NULL, 0,
+		"forward data with bpf(stream verdict)"},
+	{ "tx-normal", ARG_FW_TX_NORMAL, NULL, 0,
+		"simple c-s mode, no bfp enabled"},
+	{ "tx-pass", ARG_FW_TX_PASS, NULL, 0,
+		"run bpf prog but no redir applied"},
+	{ "tx-verdict-ingress", ARG_FW_TX_VERDICT_INGRESS, NULL, 0,
+		"forward msg to ingress queue of another socket"},
+	{ "tx-verdict-egress", ARG_FW_TX_VERDICT_EGRESS, NULL, 0,
+		"forward msg to egress queue of another socket"},
+	{ "delay-consumer", ARG_CONSUMER_DELAY_TIME, "SEC", 0,
+		"delay consumer start"},
+	{ "producer-duration", ARG_PRODUCER_DURATION, "SEC", 0,
+		"producer duration"},
+	{},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	switch (key) {
+	case ARG_FW_RX_NORMAL...ARG_FW_TX_VERDICT_EGRESS:
+		ctx.mode = key;
+		break;
+	case ARG_CONSUMER_DELAY_TIME:
+		ctx.delay_consumer = strtol(arg, NULL, 10);
+		break;
+	case ARG_PRODUCER_DURATION:
+		ctx.prod_run_time = strtol(arg, NULL, 10);
+		break;
+	case ARG_CTL_RX_STRP:
+		ctx.strp_size = strtol(arg, NULL, 10);
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+
+	return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_sockmap_argp = {
+	.options	= opts,
+	.parser		= parse_arg,
+};
+
+/* Benchmark performance of creating bpf local storage  */
+const struct bench bench_sockmap = {
+	.name			= "sockmap",
+	.argp			= &bench_sockmap_argp,
+	.validate		= validate,
+	.setup			= setup,
+	.producer_thread	= producer,
+	.consumer_thread	= consumer,
+	.measure		= measure,
+	.report_progress	= report_progress,
+	.report_final		= report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 32e9f194d449..34018fc3927f 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -180,10 +180,10 @@ static void trigger_kernel_count_setup(void)
 {
 	setup_ctx();
 	bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
-	bpf_program__set_autoload(ctx.skel->progs.trigger_count, true);
+	bpf_program__set_autoload(ctx.skel->progs.trigger_kernel_count, true);
 	load_ctx();
 	/* override driver program */
-	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
+	ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_kernel_count);
 }
 
 static void trigger_kprobe_setup(void)
@@ -226,6 +226,65 @@ static void trigger_fentry_setup(void)
 	attach_bpf(ctx.skel->progs.bench_trigger_fentry);
 }
 
+static void attach_ksyms_all(struct bpf_program *empty, bool kretprobe)
+{
+	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+	char **syms = NULL;
+	size_t cnt = 0;
+
+	/* Some recursive functions will be skipped in
+	 * bpf_get_ksyms -> skip_entry, as they can introduce sufficient
+	 * overhead. However, it's difficut to skip all the recursive
+	 * functions for a debug kernel.
+	 *
+	 * So, don't run the kprobe-multi-all and kretprobe-multi-all on
+	 * a debug kernel.
+	 */
+	if (bpf_get_ksyms(&syms, &cnt, true)) {
+		fprintf(stderr, "failed to get ksyms\n");
+		exit(1);
+	}
+
+	opts.syms = (const char **) syms;
+	opts.cnt = cnt;
+	opts.retprobe = kretprobe;
+	/* attach empty to all the kernel functions except bpf_get_numa_node_id. */
+	if (!bpf_program__attach_kprobe_multi_opts(empty, NULL, &opts)) {
+		fprintf(stderr, "failed to attach bpf_program__attach_kprobe_multi_opts to all\n");
+		exit(1);
+	}
+}
+
+static void trigger_kprobe_multi_all_setup(void)
+{
+	struct bpf_program *prog, *empty;
+
+	setup_ctx();
+	empty = ctx.skel->progs.bench_kprobe_multi_empty;
+	prog = ctx.skel->progs.bench_trigger_kprobe_multi;
+	bpf_program__set_autoload(empty, true);
+	bpf_program__set_autoload(prog, true);
+	load_ctx();
+
+	attach_ksyms_all(empty, false);
+	attach_bpf(prog);
+}
+
+static void trigger_kretprobe_multi_all_setup(void)
+{
+	struct bpf_program *prog, *empty;
+
+	setup_ctx();
+	empty = ctx.skel->progs.bench_kretprobe_multi_empty;
+	prog = ctx.skel->progs.bench_trigger_kretprobe_multi;
+	bpf_program__set_autoload(empty, true);
+	bpf_program__set_autoload(prog, true);
+	load_ctx();
+
+	attach_ksyms_all(empty, true);
+	attach_bpf(prog);
+}
+
 static void trigger_fexit_setup(void)
 {
 	setup_ctx();
@@ -333,6 +392,20 @@ static void *uprobe_producer_ret(void *input)
 	return NULL;
 }
 
+#ifdef __x86_64__
+__nocf_check __weak void uprobe_target_nop5(void)
+{
+	asm volatile (".byte 0x0f, 0x1f, 0x44, 0x00, 0x00");
+}
+
+static void *uprobe_producer_nop5(void *input)
+{
+	while (true)
+		uprobe_target_nop5();
+	return NULL;
+}
+#endif
+
 static void usetup(bool use_retprobe, bool use_multi, void *target_addr)
 {
 	size_t uprobe_offset;
@@ -448,6 +521,28 @@ static void uretprobe_multi_ret_setup(void)
 	usetup(true, true /* use_multi */, &uprobe_target_ret);
 }
 
+#ifdef __x86_64__
+static void uprobe_nop5_setup(void)
+{
+	usetup(false, false /* !use_multi */, &uprobe_target_nop5);
+}
+
+static void uretprobe_nop5_setup(void)
+{
+	usetup(true, false /* !use_multi */, &uprobe_target_nop5);
+}
+
+static void uprobe_multi_nop5_setup(void)
+{
+	usetup(false, true /* use_multi */, &uprobe_target_nop5);
+}
+
+static void uretprobe_multi_nop5_setup(void)
+{
+	usetup(true, true /* use_multi */, &uprobe_target_nop5);
+}
+#endif
+
 const struct bench bench_trig_syscall_count = {
 	.name = "trig-syscall-count",
 	.validate = trigger_validate,
@@ -476,6 +571,8 @@ BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
 BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
 BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
 BENCH_TRIG_KERNEL(fentry, "fentry");
+BENCH_TRIG_KERNEL(kprobe_multi_all, "kprobe-multi-all");
+BENCH_TRIG_KERNEL(kretprobe_multi_all, "kretprobe-multi-all");
 BENCH_TRIG_KERNEL(fexit, "fexit");
 BENCH_TRIG_KERNEL(fmodret, "fmodret");
 BENCH_TRIG_KERNEL(tp, "tp");
@@ -506,3 +603,9 @@ BENCH_TRIG_USERMODE(uprobe_multi_ret, ret, "uprobe-multi-ret");
 BENCH_TRIG_USERMODE(uretprobe_multi_nop, nop, "uretprobe-multi-nop");
 BENCH_TRIG_USERMODE(uretprobe_multi_push, push, "uretprobe-multi-push");
 BENCH_TRIG_USERMODE(uretprobe_multi_ret, ret, "uretprobe-multi-ret");
+#ifdef __x86_64__
+BENCH_TRIG_USERMODE(uprobe_nop5, nop5, "uprobe-nop5");
+BENCH_TRIG_USERMODE(uretprobe_nop5, nop5, "uretprobe-nop5");
+BENCH_TRIG_USERMODE(uprobe_multi_nop5, nop5, "uprobe-multi-nop5");
+BENCH_TRIG_USERMODE(uretprobe_multi_nop5, nop5, "uretprobe-multi-nop5");
+#endif
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
index 91e3567962ff..83e05e837871 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -49,3 +49,7 @@ for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
 	summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
 done
 
+header "Ringbuf, multi-producer contention in overwrite mode, no consumer"
+for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
+	summarize "rb-prod nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 --rb-overwrite --rb-bench-producer rb-libbpf)"
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
index a690f5a68b6b..f7573708a0c3 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -6,8 +6,8 @@ def_tests=( \
 	usermode-count kernel-count syscall-count \
 	fentry fexit fmodret \
 	rawtp tp \
-	kprobe kprobe-multi \
-	kretprobe kretprobe-multi \
+	kprobe kprobe-multi kprobe-multi-all \
+	kretprobe kretprobe-multi kretprobe-multi-all \
 )
 
 tests=("$@")
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
index af169f831f2f..03f55405484b 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -2,7 +2,7 @@
 
 set -eufo pipefail
 
-for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret}
+for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret,nop5}
 do
 	summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
 	printf "%-15s: %s\n" $i "$summary"
diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h
index 68a51dcc0669..16f8ce832004 100644
--- a/tools/testing/selftests/bpf/bpf_arena_common.h
+++ b/tools/testing/selftests/bpf/bpf_arena_common.h
@@ -46,8 +46,11 @@
 
 void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
 				    int node_id, __u64 flags) __ksym __weak;
+int bpf_arena_reserve_pages(void *map, void __arena *addr, __u32 page_cnt) __ksym __weak;
 void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
 
+#define arena_base(map) ((void __arena *)((struct bpf_arena *)(map))->user_vm_start)
+
 #else /* when compiled as user space code */
 
 #define __arena
diff --git a/tools/testing/selftests/bpf/bpf_arena_list.h b/tools/testing/selftests/bpf/bpf_arena_list.h
index 85dbc3ea4da5..e16fa7d95fcf 100644
--- a/tools/testing/selftests/bpf/bpf_arena_list.h
+++ b/tools/testing/selftests/bpf/bpf_arena_list.h
@@ -64,14 +64,12 @@ static inline void list_add_head(arena_list_node_t *n, arena_list_head_t *h)
 
 static inline void __list_del(arena_list_node_t *n)
 {
-	arena_list_node_t *next = n->next, *tmp;
+	arena_list_node_t *next = n->next;
 	arena_list_node_t * __arena *pprev = n->pprev;
 
 	cast_user(next);
 	cast_kern(pprev);
-	tmp = *pprev;
-	cast_kern(tmp);
-	WRITE_ONCE(tmp, next);
+	WRITE_ONCE(*pprev, next);
 	if (next) {
 		cast_user(pprev);
 		cast_kern(next);
diff --git a/tools/testing/selftests/bpf/bpf_arena_strsearch.h b/tools/testing/selftests/bpf/bpf_arena_strsearch.h
new file mode 100644
index 000000000000..c1b6eaa905bb
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_strsearch.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include "bpf_arena_common.h"
+
+__noinline int bpf_arena_strlen(const char __arena *s __arg_arena)
+{
+	const char __arena *sc;
+
+	for (sc = s; *sc != '\0'; ++sc)
+		cond_break;
+	return sc - s;
+}
+
+/**
+ * glob_match - Shell-style pattern matching, like !fnmatch(pat, str, 0)
+ * @pat: Shell-style pattern to match, e.g. "*.[ch]".
+ * @str: String to match.  The pattern must match the entire string.
+ *
+ * Perform shell-style glob matching, returning true (1) if the match
+ * succeeds, or false (0) if it fails.  Equivalent to !fnmatch(@pat, @str, 0).
+ *
+ * Pattern metacharacters are ?, *, [ and \.
+ * (And, inside character classes, !, - and ].)
+ *
+ * This is small and simple implementation intended for device blacklists
+ * where a string is matched against a number of patterns.  Thus, it
+ * does not preprocess the patterns.  It is non-recursive, and run-time
+ * is at most quadratic: strlen(@str)*strlen(@pat).
+ *
+ * An example of the worst case is glob_match("*aaaaa", "aaaaaaaaaa");
+ * it takes 6 passes over the pattern before matching the string.
+ *
+ * Like !fnmatch(@pat, @str, 0) and unlike the shell, this does NOT
+ * treat / or leading . specially; it isn't actually used for pathnames.
+ *
+ * Note that according to glob(7) (and unlike bash), character classes
+ * are complemented by a leading !; this does not support the regex-style
+ * [^a-z] syntax.
+ *
+ * An opening bracket without a matching close is matched literally.
+ */
+__noinline bool glob_match(char const __arena *pat __arg_arena, char const __arena *str __arg_arena)
+{
+	/*
+	 * Backtrack to previous * on mismatch and retry starting one
+	 * character later in the string.  Because * matches all characters
+	 * (no exception for /), it can be easily proved that there's
+	 * never a need to backtrack multiple levels.
+	 */
+	char const __arena *back_pat = NULL, *back_str;
+
+	/*
+	 * Loop over each token (character or class) in pat, matching
+	 * it against the remaining unmatched tail of str.  Return false
+	 * on mismatch, or true after matching the trailing nul bytes.
+	 */
+	for (;;) {
+		unsigned char c = *str++;
+		unsigned char d = *pat++;
+
+		switch (d) {
+		case '?':	/* Wildcard: anything but nul */
+			if (c == '\0')
+				return false;
+			break;
+		case '*':	/* Any-length wildcard */
+			if (*pat == '\0')	/* Optimize trailing * case */
+				return true;
+			back_pat = pat;
+			back_str = --str;	/* Allow zero-length match */
+			break;
+		case '[': {	/* Character class */
+			bool match = false, inverted = (*pat == '!');
+			char const __arena *class = pat + inverted;
+			unsigned char a = *class++;
+
+			/*
+			 * Iterate over each span in the character class.
+			 * A span is either a single character a, or a
+			 * range a-b.  The first span may begin with ']'.
+			 */
+			do {
+				unsigned char b = a;
+
+				if (a == '\0')	/* Malformed */
+					goto literal;
+
+				if (class[0] == '-' && class[1] != ']') {
+					b = class[1];
+
+					if (b == '\0')
+						goto literal;
+
+					class += 2;
+					/* Any special action if a > b? */
+				}
+				match |= (a <= c && c <= b);
+				cond_break;
+			} while ((a = *class++) != ']');
+
+			if (match == inverted)
+				goto backtrack;
+			pat = class;
+			}
+			break;
+		case '\\':
+			d = *pat++;
+			__attribute__((__fallthrough__));
+		default:	/* Literal character */
+literal:
+			if (c == d) {
+				if (d == '\0')
+					return true;
+				break;
+			}
+backtrack:
+			if (c == '\0' || !back_pat)
+				return false;	/* No point continuing */
+			/* Try again from last *, one character later in str. */
+			pat = back_pat;
+			str = ++back_str;
+			break;
+		}
+		cond_break;
+	}
+	return false;
+}
diff --git a/tools/testing/selftests/bpf/bpf_atomic.h b/tools/testing/selftests/bpf/bpf_atomic.h
new file mode 100644
index 000000000000..c550e5711967
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_atomic.h
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#ifndef BPF_ATOMIC_H
+#define BPF_ATOMIC_H
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+extern bool CONFIG_X86_64 __kconfig __weak;
+
+/*
+ * __unqual_typeof(x) - Declare an unqualified scalar type, leaving
+ *			non-scalar types unchanged,
+ *
+ * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char'
+ * is not type-compatible with 'signed char', and we define a separate case.
+ *
+ * This is copied verbatim from kernel's include/linux/compiler_types.h, but
+ * with default expression (for pointers) changed from (x) to (typeof(x)0).
+ *
+ * This is because LLVM has a bug where for lvalue (x), it does not get rid of
+ * an extra address_space qualifier, but does in case of rvalue (typeof(x)0).
+ * Hence, for pointers, we need to create an rvalue expression to get the
+ * desired type. See https://github.com/llvm/llvm-project/issues/53400.
+ */
+#define __scalar_type_to_expr_cases(type) \
+	unsigned type : (unsigned type)0, signed type : (signed type)0
+
+#define __unqual_typeof(x)                              \
+	typeof(_Generic((x),                            \
+		char: (char)0,                          \
+		__scalar_type_to_expr_cases(char),      \
+		__scalar_type_to_expr_cases(short),     \
+		__scalar_type_to_expr_cases(int),       \
+		__scalar_type_to_expr_cases(long),      \
+		__scalar_type_to_expr_cases(long long), \
+		default: (typeof(x))0))
+
+/* No-op for BPF */
+#define cpu_relax() ({})
+
+#define READ_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *)&(x)) = (val))
+
+#define cmpxchg(p, old, new) __sync_val_compare_and_swap((p), old, new)
+
+#define try_cmpxchg(p, pold, new)                                 \
+	({                                                        \
+		__unqual_typeof(*(pold)) __o = *(pold);           \
+		__unqual_typeof(*(p)) __r = cmpxchg(p, __o, new); \
+		if (__r != __o)                                   \
+			*(pold) = __r;                            \
+		__r == __o;                                       \
+	})
+
+#define try_cmpxchg_relaxed(p, pold, new) try_cmpxchg(p, pold, new)
+
+#define try_cmpxchg_acquire(p, pold, new) try_cmpxchg(p, pold, new)
+
+#define smp_mb()                                 \
+	({                                       \
+		volatile unsigned long __val;    \
+		__sync_fetch_and_add(&__val, 0); \
+	})
+
+#define smp_rmb()                   \
+	({                          \
+		if (!CONFIG_X86_64) \
+			smp_mb();   \
+		else                \
+			barrier();  \
+	})
+
+#define smp_wmb()                   \
+	({                          \
+		if (!CONFIG_X86_64) \
+			smp_mb();   \
+		else                \
+			barrier();  \
+	})
+
+/* Control dependency provides LOAD->STORE, provide LOAD->LOAD */
+#define smp_acquire__after_ctrl_dep() ({ smp_rmb(); })
+
+#define smp_load_acquire(p)                                  \
+	({                                                   \
+		__unqual_typeof(*(p)) __v = READ_ONCE(*(p)); \
+		if (!CONFIG_X86_64)                          \
+			smp_mb();                            \
+		barrier();                                   \
+		__v;                                         \
+	})
+
+#define smp_store_release(p, val)      \
+	({                             \
+		if (!CONFIG_X86_64)    \
+			smp_mb();      \
+		barrier();             \
+		WRITE_ONCE(*(p), val); \
+	})
+
+#define smp_cond_load_relaxed_label(p, cond_expr, label)                \
+	({                                                              \
+		typeof(p) __ptr = (p);                                  \
+		__unqual_typeof(*(p)) VAL;                              \
+		for (;;) {                                              \
+			VAL = (__unqual_typeof(*(p)))READ_ONCE(*__ptr); \
+			if (cond_expr)                                  \
+				break;                                  \
+			cond_break_label(label);                        \
+			cpu_relax();                                    \
+		}                                                       \
+		(typeof(*(p)))VAL;                                      \
+	})
+
+#define smp_cond_load_acquire_label(p, cond_expr, label)                  \
+	({                                                                \
+		__unqual_typeof(*p) __val =                               \
+			smp_cond_load_relaxed_label(p, cond_expr, label); \
+		smp_acquire__after_ctrl_dep();                            \
+		(typeof(*(p)))__val;                                      \
+	})
+
+#define atomic_read(p) READ_ONCE((p)->counter)
+
+#define atomic_cond_read_relaxed_label(p, cond_expr, label) \
+	smp_cond_load_relaxed_label(&(p)->counter, cond_expr, label)
+
+#define atomic_cond_read_acquire_label(p, cond_expr, label) \
+	smp_cond_load_acquire_label(&(p)->counter, cond_expr, label)
+
+#define atomic_try_cmpxchg_relaxed(p, pold, new) \
+	try_cmpxchg_relaxed(&(p)->counter, pold, new)
+
+#define atomic_try_cmpxchg_acquire(p, pold, new) \
+	try_cmpxchg_acquire(&(p)->counter, pold, new)
+
+#endif /* BPF_ATOMIC_H */
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index cd8ecd39c3f3..2cd9165c7348 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -219,7 +219,7 @@ extern void bpf_put_file(struct file *file) __ksym;
  *	including the NULL termination character, stored in the supplied
  *	buffer. On error, a negative integer is returned.
  */
-extern int bpf_path_d_path(struct path *path, char *buf, size_t buf__sz) __ksym;
+extern int bpf_path_d_path(const struct path *path, char *buf, size_t buf__sz) __ksym;
 
 /* This macro must be used to mark the exception callback corresponding to the
  * main program. For example:
@@ -368,12 +368,12 @@ l_true:												\
 	ret;						\
 	})
 
-#define cond_break					\
+#define __cond_break(expr)				\
 	({ __label__ l_break, l_continue;		\
 	asm volatile goto("may_goto %l[l_break]"	\
 		      :::: l_break);			\
 	goto l_continue;				\
-	l_break: break;					\
+	l_break: expr;					\
 	l_continue:;					\
 	})
 #else
@@ -392,7 +392,7 @@ l_true:												\
 	ret;						\
 	})
 
-#define cond_break					\
+#define __cond_break(expr)				\
 	({ __label__ l_break, l_continue;		\
 	asm volatile goto("1:.byte 0xe5;		\
 		      .byte 0;				\
@@ -400,7 +400,7 @@ l_true:												\
 		      .short 0"				\
 		      :::: l_break);			\
 	goto l_continue;				\
-	l_break: break;					\
+	l_break: expr;					\
 	l_continue:;					\
 	})
 #else
@@ -418,7 +418,7 @@ l_true:												\
 	ret;						\
 	})
 
-#define cond_break					\
+#define __cond_break(expr)				\
 	({ __label__ l_break, l_continue;		\
 	asm volatile goto("1:.byte 0xe5;		\
 		      .byte 0;				\
@@ -426,12 +426,15 @@ l_true:												\
 		      .short 0"				\
 		      :::: l_break);			\
 	goto l_continue;				\
-	l_break: break;					\
+	l_break: expr;					\
 	l_continue:;					\
 	})
 #endif
 #endif
 
+#define cond_break __cond_break(break)
+#define cond_break_label(label) __cond_break(goto label)
+
 #ifndef bpf_nop_mov
 #define bpf_nop_mov(var) \
 	asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var))
@@ -588,4 +591,66 @@ extern int bpf_iter_kmem_cache_new(struct bpf_iter_kmem_cache *it) __weak __ksym
 extern struct kmem_cache *bpf_iter_kmem_cache_next(struct bpf_iter_kmem_cache *it) __weak __ksym;
 extern void bpf_iter_kmem_cache_destroy(struct bpf_iter_kmem_cache *it) __weak __ksym;
 
+struct bpf_iter_dmabuf;
+extern int bpf_iter_dmabuf_new(struct bpf_iter_dmabuf *it) __weak __ksym;
+extern struct dma_buf *bpf_iter_dmabuf_next(struct bpf_iter_dmabuf *it) __weak __ksym;
+extern void bpf_iter_dmabuf_destroy(struct bpf_iter_dmabuf *it) __weak __ksym;
+
+extern int bpf_cgroup_read_xattr(struct cgroup *cgroup, const char *name__str,
+				 struct bpf_dynptr *value_p) __weak __ksym;
+
+#define PREEMPT_BITS	8
+#define SOFTIRQ_BITS	8
+#define HARDIRQ_BITS	4
+#define NMI_BITS	4
+
+#define PREEMPT_SHIFT	0
+#define SOFTIRQ_SHIFT	(PREEMPT_SHIFT + PREEMPT_BITS)
+#define HARDIRQ_SHIFT	(SOFTIRQ_SHIFT + SOFTIRQ_BITS)
+#define NMI_SHIFT	(HARDIRQ_SHIFT + HARDIRQ_BITS)
+
+#define __IRQ_MASK(x)	((1UL << (x))-1)
+
+#define SOFTIRQ_MASK	(__IRQ_MASK(SOFTIRQ_BITS) << SOFTIRQ_SHIFT)
+#define HARDIRQ_MASK	(__IRQ_MASK(HARDIRQ_BITS) << HARDIRQ_SHIFT)
+#define NMI_MASK	(__IRQ_MASK(NMI_BITS)     << NMI_SHIFT)
+
+extern bool CONFIG_PREEMPT_RT __kconfig __weak;
+#ifdef bpf_target_x86
+extern const int __preempt_count __ksym;
+#endif
+
+struct task_struct___preempt_rt {
+	int softirq_disable_cnt;
+} __attribute__((preserve_access_index));
+
+static inline int get_preempt_count(void)
+{
+#if defined(bpf_target_x86)
+	return *(int *) bpf_this_cpu_ptr(&__preempt_count);
+#elif defined(bpf_target_arm64)
+	return bpf_get_current_task_btf()->thread_info.preempt.count;
+#endif
+	return 0;
+}
+
+/* Description
+ *	Report whether it is in interrupt context. Only works on the following archs:
+ *	* x86
+ *	* arm64
+ */
+static inline int bpf_in_interrupt(void)
+{
+	struct task_struct___preempt_rt *tsk;
+	int pcnt;
+
+	pcnt = get_preempt_count();
+	if (!CONFIG_PREEMPT_RT)
+		return pcnt & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_MASK);
+
+	tsk = (void *) bpf_get_current_task_btf();
+	return (pcnt & (NMI_MASK | HARDIRQ_MASK)) |
+	       (tsk->softirq_disable_cnt & SOFTIRQ_MASK);
+}
+
 #endif
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 2eb3483f2fb0..e0189254bb6e 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -19,14 +19,17 @@ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
 extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags,
 			       struct bpf_dynptr *ptr__uninit) __ksym __weak;
 
+extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags,
+				    struct bpf_dynptr *ptr__uninit) __ksym __weak;
+
 /* Description
  *  Obtain a read-only pointer to the dynptr's data
  * Returns
  *  Either a direct pointer to the dynptr data or a pointer to the user-provided
  *  buffer if unable to obtain a direct pointer
  */
-extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
-			      void *buffer, __u32 buffer__szk) __ksym __weak;
+extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u64 offset,
+			      void *buffer, __u64 buffer__szk) __ksym __weak;
 
 /* Description
  *  Obtain a read-write pointer to the dynptr's data
@@ -34,13 +37,13 @@ extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
  *  Either a direct pointer to the dynptr data or a pointer to the user-provided
  *  buffer if unable to obtain a direct pointer
  */
-extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset,
-			      void *buffer, __u32 buffer__szk) __ksym __weak;
+extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u64 offset, void *buffer,
+				   __u64 buffer__szk) __ksym __weak;
 
-extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym __weak;
+extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u64 start, __u64 end) __ksym __weak;
 extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak;
 extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak;
-extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak;
+extern __u64 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak;
 extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak;
 
 /* Description
@@ -69,7 +72,7 @@ extern int bpf_get_file_xattr(struct file *file, const char *name,
 			      struct bpf_dynptr *value_ptr) __ksym;
 extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym;
 
-extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
 extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
 extern void bpf_key_put(struct bpf_key *key) __ksym;
 extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
@@ -87,4 +90,9 @@ struct dentry;
  */
 extern int bpf_get_dentry_xattr(struct dentry *dentry, const char *name,
 			      struct bpf_dynptr *value_ptr) __ksym __weak;
+
+extern int bpf_set_dentry_xattr(struct dentry *dentry, const char *name__str,
+				const struct bpf_dynptr *value_p, int flags) __ksym __weak;
+extern int bpf_remove_dentry_xattr(struct dentry *dentry, const char *name__str) __ksym __weak;
+
 #endif
diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile b/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile
deleted file mode 100644
index 40b25b98ad1b..000000000000
--- a/tools/testing/selftests/bpf/bpf_test_modorder_x/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
-KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..)
-
-ifeq ($(V),1)
-Q =
-else
-Q = @
-endif
-
-MODULES = bpf_test_modorder_x.ko
-
-obj-m += bpf_test_modorder_x.o
-
-all:
-	+$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules
-
-clean:
-	+$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean
-
diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile b/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile
deleted file mode 100644
index 52c3ab9d84e2..000000000000
--- a/tools/testing/selftests/bpf/bpf_test_modorder_y/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
-KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..)
-
-ifeq ($(V),1)
-Q =
-else
-Q = @
-endif
-
-MODULES = bpf_test_modorder_y.ko
-
-obj-m += bpf_test_modorder_y.o
-
-all:
-	+$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules
-
-clean:
-	+$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean
-
diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile b/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile
deleted file mode 100644
index ed5143b79edf..000000000000
--- a/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile
+++ /dev/null
@@ -1,19 +0,0 @@
-BPF_TEST_NO_CFI_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
-KDIR ?= $(abspath $(BPF_TEST_NO_CFI_DIR)/../../../../..)
-
-ifeq ($(V),1)
-Q =
-else
-Q = @
-endif
-
-MODULES = bpf_test_no_cfi.ko
-
-obj-m += bpf_test_no_cfi.o
-
-all:
-	+$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) modules
-
-clean:
-	+$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) clean
-
diff --git a/tools/testing/selftests/bpf/bpf_testmod/Makefile b/tools/testing/selftests/bpf/bpf_testmod/Makefile
deleted file mode 100644
index 15cb36c4483a..000000000000
--- a/tools/testing/selftests/bpf/bpf_testmod/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
-KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..)
-
-ifeq ($(V),1)
-Q =
-else
-Q = @
-endif
-
-MODULES = bpf_testmod.ko
-
-obj-m += bpf_testmod.o
-CFLAGS_bpf_testmod.o = -I$(src)
-
-all:
-	+$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules
-
-clean:
-	+$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean
-
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index 5f6963a320d7..4bc2d25f33e1 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -67,6 +67,9 @@ static inline void bpf_strlcpy(char *dst, const char *src, size_t sz)
 #define sys_gettid() syscall(SYS_gettid)
 #endif
 
+/* and poison usage to ensure it does not creep back in. */
+#pragma GCC poison gettid
+
 #ifndef ENOTSUPP
 #define ENOTSUPP 524
 #endif
diff --git a/tools/testing/selftests/bpf/cap_helpers.c b/tools/testing/selftests/bpf/cap_helpers.c
index d5ac507401d7..98f840c3a38f 100644
--- a/tools/testing/selftests/bpf/cap_helpers.c
+++ b/tools/testing/selftests/bpf/cap_helpers.c
@@ -19,7 +19,7 @@ int cap_enable_effective(__u64 caps, __u64 *old_caps)
 
 	err = capget(&hdr, data);
 	if (err)
-		return err;
+		return -errno;
 
 	if (old_caps)
 		*old_caps = (__u64)(data[1].effective) << 32 | data[0].effective;
@@ -32,7 +32,7 @@ int cap_enable_effective(__u64 caps, __u64 *old_caps)
 	data[1].effective |= cap1;
 	err = capset(&hdr, data);
 	if (err)
-		return err;
+		return -errno;
 
 	return 0;
 }
@@ -49,7 +49,7 @@ int cap_disable_effective(__u64 caps, __u64 *old_caps)
 
 	err = capget(&hdr, data);
 	if (err)
-		return err;
+		return -errno;
 
 	if (old_caps)
 		*old_caps = (__u64)(data[1].effective) << 32 | data[0].effective;
@@ -61,7 +61,7 @@ int cap_disable_effective(__u64 caps, __u64 *old_caps)
 	data[1].effective &= ~cap1;
 	err = capset(&hdr, data);
 	if (err)
-		return err;
+		return -errno;
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/cap_helpers.h b/tools/testing/selftests/bpf/cap_helpers.h
index 6d163530cb0f..8dcb28557f76 100644
--- a/tools/testing/selftests/bpf/cap_helpers.h
+++ b/tools/testing/selftests/bpf/cap_helpers.h
@@ -4,6 +4,7 @@
 
 #include <linux/types.h>
 #include <linux/capability.h>
+#include <errno.h>
 
 #ifndef CAP_PERFMON
 #define CAP_PERFMON		38
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index e4535451322e..20cede4db3ce 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -4,6 +4,7 @@
 #include <sys/mount.h>
 #include <sys/stat.h>
 #include <sys/types.h>
+#include <sys/xattr.h>
 #include <linux/limits.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -319,6 +320,26 @@ int join_parent_cgroup(const char *relative_path)
 }
 
 /**
+ * set_cgroup_xattr() - Set xattr on a cgroup dir
+ * @relative_path: The cgroup path, relative to the workdir, to set xattr
+ * @name: xattr name
+ * @value: xattr value
+ *
+ * This function set xattr on cgroup dir.
+ *
+ * On success, it returns 0, otherwise on failure it returns -1.
+ */
+int set_cgroup_xattr(const char *relative_path,
+		     const char *name,
+		     const char *value)
+{
+	char cgroup_path[PATH_MAX + 1];
+
+	format_cgroup_path(cgroup_path, relative_path);
+	return setxattr(cgroup_path, name, value, strlen(value) + 1, 0);
+}
+
+/**
  * __cleanup_cgroup_environment() - Delete temporary cgroups
  *
  * This is a helper for cleanup_cgroup_environment() that is responsible for
@@ -391,6 +412,26 @@ void remove_cgroup(const char *relative_path)
 		log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
 }
 
+/*
+ * remove_cgroup_pid() - Remove a cgroup setup by process identified by PID
+ * @relative_path: The cgroup path, relative to the workdir, to remove
+ * @pid: PID to be used to find cgroup_path
+ *
+ * This function expects a cgroup to already be created, relative to the cgroup
+ * work dir. It also expects the cgroup doesn't have any children or live
+ * processes and it removes the cgroup.
+ *
+ * On failure, it will print an error to stderr.
+ */
+void remove_cgroup_pid(const char *relative_path, int pid)
+{
+	char cgroup_path[PATH_MAX + 1];
+
+	format_cgroup_path_pid(cgroup_path, relative_path, pid);
+	if (rmdir(cgroup_path))
+		log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
+}
+
 /**
  * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
  * @relative_path: The cgroup path, relative to the workdir, to join
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 502845160d88..3857304be874 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -19,6 +19,7 @@ int cgroup_setup_and_join(const char *relative_path);
 int get_root_cgroup(void);
 int create_and_get_cgroup(const char *relative_path);
 void remove_cgroup(const char *relative_path);
+void remove_cgroup_pid(const char *relative_path, int pid);
 unsigned long long get_cgroup_id(const char *relative_path);
 int get_cgroup1_hierarchy_id(const char *subsys_name);
 
@@ -26,6 +27,10 @@ int join_cgroup(const char *relative_path);
 int join_root_cgroup(void);
 int join_parent_cgroup(const char *relative_path);
 
+int set_cgroup_xattr(const char *relative_path,
+		     const char *name,
+		     const char *value);
+
 int setup_cgroup_environment(void);
 void cleanup_cgroup_environment(void);
 
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 4ca84c8d9116..558839e3c185 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -22,6 +22,8 @@ CONFIG_CRYPTO_AES=y
 CONFIG_DEBUG_INFO=y
 CONFIG_DEBUG_INFO_BTF=y
 CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DMABUF_HEAPS=y
+CONFIG_DMABUF_HEAPS_SYSTEM=y
 CONFIG_DUMMY=y
 CONFIG_DYNAMIC_FTRACE=y
 CONFIG_FPROBE=y
@@ -48,6 +50,7 @@ CONFIG_IPV6_SIT=y
 CONFIG_IPV6_TUNNEL=y
 CONFIG_KEYS=y
 CONFIG_LIRC=y
+CONFIG_LIVEPATCH=y
 CONFIG_LWTUNNEL=y
 CONFIG_MODULE_SIG=y
 CONFIG_MODULE_SRCVERSION_ALL=y
@@ -58,6 +61,8 @@ CONFIG_MPLS=y
 CONFIG_MPLS_IPTUNNEL=y
 CONFIG_MPLS_ROUTING=y
 CONFIG_MPTCP=y
+CONFIG_NET_ACT_GACT=y
+CONFIG_NET_ACT_MIRRED=y
 CONFIG_NET_ACT_SKBMOD=y
 CONFIG_NET_CLS=y
 CONFIG_NET_CLS_ACT=y
@@ -70,8 +75,10 @@ CONFIG_NET_IPGRE=y
 CONFIG_NET_IPGRE_DEMUX=y
 CONFIG_NET_IPIP=y
 CONFIG_NET_MPLS_GSO=y
+CONFIG_NET_SCH_BPF=y
 CONFIG_NET_SCH_FQ=y
 CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_SCH_HTB=y
 CONFIG_NET_SCHED=y
 CONFIG_NETDEVSIM=y
 CONFIG_NETFILTER=y
@@ -92,6 +99,9 @@ CONFIG_NF_TABLES_NETDEV=y
 CONFIG_NF_TABLES_IPV4=y
 CONFIG_NF_TABLES_IPV6=y
 CONFIG_NETFILTER_INGRESS=y
+CONFIG_IP_NF_IPTABLES_LEGACY=y
+CONFIG_IP6_NF_IPTABLES_LEGACY=y
+CONFIG_NETFILTER_XTABLES_LEGACY=y
 CONFIG_NF_FLOW_TABLE=y
 CONFIG_NF_FLOW_TABLE_INET=y
 CONFIG_NETFILTER_NETLINK=y
@@ -100,11 +110,15 @@ CONFIG_IP_NF_IPTABLES=y
 CONFIG_IP6_NF_IPTABLES=y
 CONFIG_IP6_NF_FILTER=y
 CONFIG_NF_NAT=y
+CONFIG_PACKET=y
 CONFIG_RC_CORE=y
+CONFIG_SAMPLES=y
+CONFIG_SAMPLE_LIVEPATCH=m
 CONFIG_SECURITY=y
 CONFIG_SECURITYFS=y
 CONFIG_SYN_COOKIES=y
 CONFIG_TEST_BPF=m
+CONFIG_UDMABUF=y
 CONFIG_USERFAULTFD=y
 CONFIG_VSOCKETS=y
 CONFIG_VXLAN=y
@@ -112,3 +126,8 @@ CONFIG_XDP_SOCKETS=y
 CONFIG_XFRM_INTERFACE=y
 CONFIG_TCP_CONG_DCTCP=y
 CONFIG_TCP_CONG_BBR=y
+CONFIG_INFINIBAND=y
+CONFIG_SMC=y
+CONFIG_SMC_HS_CTRL_BPF=y
+CONFIG_DIBS=y
+CONFIG_DIBS_LO=y
+\ No newline at end of file
diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
index 3720b7611523..7efad36ceb26 100644
--- a/tools/testing/selftests/bpf/config.aarch64
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -31,10 +31,7 @@ CONFIG_COMPAT=y
 CONFIG_CPUSETS=y
 CONFIG_CRASH_DUMP=y
 CONFIG_CRYPTO_USER_API_RNG=y
-CONFIG_CRYPTO_USER_API_SKCIPHER=y
 CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_INFO_BTF=y
-CONFIG_DEBUG_INFO_DWARF4=y
 CONFIG_DEBUG_INFO_REDUCED=n
 CONFIG_DEBUG_LIST=y
 CONFIG_DEBUG_LOCKDEP=y
@@ -46,7 +43,6 @@ CONFIG_DETECT_HUNG_TASK=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_DEVTMPFS=y
 CONFIG_DRM=y
-CONFIG_DUMMY=y
 CONFIG_EXPERT=y
 CONFIG_EXT4_FS_POSIX_ACL=y
 CONFIG_EXT4_FS_SECURITY=y
@@ -70,13 +66,11 @@ CONFIG_HZ_100=y
 CONFIG_IDLE_PAGE_TRACKING=y
 CONFIG_IKHEADERS=y
 CONFIG_INET6_ESP=y
-CONFIG_INET_ESP=y
 CONFIG_INET=y
 CONFIG_INPUT_EVDEV=y
 CONFIG_IP_ADVANCED_ROUTER=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_NF_IPTABLES=y
 CONFIG_IPV6_SEG6_LWTUNNEL=y
 CONFIG_IPVLAN=y
 CONFIG_JUMP_LABEL=y
@@ -97,22 +91,18 @@ CONFIG_MEMORY_HOTPLUG=y
 CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_NAMESPACES=y
 CONFIG_NET_ACT_BPF=y
-CONFIG_NET_ACT_GACT=y
 CONFIG_NETDEVICES=y
 CONFIG_NETFILTER_XT_MATCH_BPF=y
 CONFIG_NETFILTER_XT_TARGET_MARK=y
 CONFIG_NET_KEY=y
-CONFIG_NET_SCH_FQ=y
 CONFIG_NET_VRF=y
 CONFIG_NET=y
-CONFIG_NF_TABLES=y
 CONFIG_NLMON=y
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NR_CPUS=256
 CONFIG_NUMA=y
 CONFIG_OVERLAY_FS=y
 CONFIG_PACKET_DIAG=y
-CONFIG_PACKET=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_PCI_HOST_GENERIC=y
@@ -149,7 +139,6 @@ CONFIG_TASK_XACCT=y
 CONFIG_TCG_TIS=y
 CONFIG_TCG_TPM=y
 CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_DCTCP=y
 CONFIG_TLS=y
 CONFIG_TMPFS_POSIX_ACL=y
 CONFIG_TMPFS=y
@@ -158,10 +147,8 @@ CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_TUN=y
 CONFIG_UNIX=y
 CONFIG_UPROBES=y
-CONFIG_USELIB=y
 CONFIG_USER_NS=y
 CONFIG_VETH=y
 CONFIG_VLAN_8021Q=y
-CONFIG_VSOCKETS=y
 CONFIG_VSOCKETS_LOOPBACK=y
 CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.ppc64el b/tools/testing/selftests/bpf/config.ppc64el
new file mode 100644
index 000000000000..b53afb5e0b71
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.ppc64el
@@ -0,0 +1,92 @@
+CONFIG_ALTIVEC=y
+CONFIG_AUDIT=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BONDING=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
+CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_CPUSETS=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_FS=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DEVTMPFS=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_FRAME_POINTER=y
+CONFIG_FRAME_WARN=1280
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HVC_CONSOLE=y
+CONFIG_INET=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KPROBES=y
+CONFIG_MEMCG=y
+CONFIG_NAMESPACES=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_VRF=y
+CONFIG_NET=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NONPORTABLE=y
+CONFIG_NR_CPUS=256
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCI=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_PPC64=y
+CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
+CONFIG_PPC_PSERIES=y
+CONFIG_PPC_RADIX_MMU=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_SMP=y
+CONFIG_SOC_VIRT=y
+CONFIG_SYSVIPC=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_THREAD_SHIFT=14
+CONFIG_TLS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_UPROBES=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VLAN_8021Q=y
+CONFIG_VSOCKETS_LOOPBACK=y
+CONFIG_VSX=y
+CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.riscv64 b/tools/testing/selftests/bpf/config.riscv64
index bb7043a80e1a..7bee24a79a71 100644
--- a/tools/testing/selftests/bpf/config.riscv64
+++ b/tools/testing/selftests/bpf/config.riscv64
@@ -48,7 +48,6 @@ CONFIG_NET_VRF=y
 CONFIG_NONPORTABLE=y
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NR_CPUS=256
-CONFIG_PACKET=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_PCI=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
index 706931a8c2c6..db61878148e4 100644
--- a/tools/testing/selftests/bpf/config.s390x
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -22,10 +22,7 @@ CONFIG_CHECKPOINT_RESTORE=y
 CONFIG_CPUSETS=y
 CONFIG_CRASH_DUMP=y
 CONFIG_CRYPTO_USER_API_RNG=y
-CONFIG_CRYPTO_USER_API_SKCIPHER=y
 CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_INFO_BTF=y
-CONFIG_DEBUG_INFO_DWARF4=y
 CONFIG_DEBUG_LIST=y
 CONFIG_DEBUG_LOCKDEP=y
 CONFIG_DEBUG_NOTIFIERS=y
@@ -56,11 +53,9 @@ CONFIG_IDLE_PAGE_TRACKING=y
 CONFIG_IKHEADERS=y
 CONFIG_INET6_ESP=y
 CONFIG_INET=y
-CONFIG_INET_ESP=y
 CONFIG_IP_ADVANCED_ROUTER=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_NF_IPTABLES=y
 CONFIG_IPV6_SEG6_LWTUNNEL=y
 CONFIG_IPVLAN=y
 CONFIG_JUMP_LABEL=y
@@ -83,18 +78,14 @@ CONFIG_MEMORY_HOTREMOVE=y
 CONFIG_NAMESPACES=y
 CONFIG_NET=y
 CONFIG_NET_ACT_BPF=y
-CONFIG_NET_ACT_GACT=y
 CONFIG_NET_KEY=y
-CONFIG_NET_SCH_FQ=y
 CONFIG_NET_VRF=y
 CONFIG_NETDEVICES=y
 CONFIG_NETFILTER_XT_MATCH_BPF=y
 CONFIG_NETFILTER_XT_TARGET_MARK=y
-CONFIG_NF_TABLES=y
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NR_CPUS=256
 CONFIG_NUMA=y
-CONFIG_PACKET=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_PCI=y
@@ -119,7 +110,6 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_TASK_XACCT=y
 CONFIG_TASKSTATS=y
 CONFIG_TCP_CONG_ADVANCED=y
-CONFIG_TCP_CONG_DCTCP=y
 CONFIG_TLS=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
@@ -128,10 +118,8 @@ CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_TUN=y
 CONFIG_UNIX=y
 CONFIG_UPROBES=y
-CONFIG_USELIB=y
 CONFIG_USER_NS=y
 CONFIG_VETH=y
 CONFIG_VLAN_8021Q=y
-CONFIG_VSOCKETS=y
 CONFIG_VSOCKETS_LOOPBACK=y
 CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index 5680befae8c6..42ad817b00ae 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -39,13 +39,11 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y
 CONFIG_CPU_FREQ_STAT=y
 CONFIG_CPU_IDLE_GOV_LADDER=y
 CONFIG_CPUSETS=y
-CONFIG_CRC_T10DIF=y
 CONFIG_CRYPTO_BLAKE2B=y
 CONFIG_CRYPTO_SEQIV=y
 CONFIG_CRYPTO_XXHASH=y
 CONFIG_DCB=y
 CONFIG_DEBUG_ATOMIC_SLEEP=y
-CONFIG_DEBUG_INFO_BTF=y
 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_DEFAULT_FQ_CODEL=y
@@ -105,12 +103,10 @@ CONFIG_HZ_1000=y
 CONFIG_INET=y
 CONFIG_INPUT_EVDEV=y
 CONFIG_INTEL_POWERCLAMP=y
-CONFIG_IP6_NF_IPTABLES=y
 CONFIG_IP_ADVANCED_ROUTER=y
 CONFIG_IP_MROUTE=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_MULTIPLE_TABLES=y
-CONFIG_IP_NF_IPTABLES=y
 CONFIG_IP_PIMSM_V1=y
 CONFIG_IP_PIMSM_V2=y
 CONFIG_IP_ROUTE_MULTIPATH=y
@@ -163,7 +159,6 @@ CONFIG_NUMA=y
 CONFIG_NUMA_BALANCING=y
 CONFIG_NVMEM=y
 CONFIG_OSF_PARTITION=y
-CONFIG_PACKET=y
 CONFIG_PANIC_ON_OOPS=y
 CONFIG_PARTITION_ADVANCED=y
 CONFIG_PCI=y
@@ -221,7 +216,6 @@ CONFIG_VALIDATE_FS_PARSER=y
 CONFIG_VETH=y
 CONFIG_VIRT_DRIVERS=y
 CONFIG_VLAN_8021Q=y
-CONFIG_VSOCKETS=y
 CONFIG_VSOCKETS_LOOPBACK=y
 CONFIG_X86_ACPI_CPUFREQ=y
 CONFIG_X86_CPUID=y
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c
index d98c72dc563e..d32e4edac930 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_basic_ops.c
@@ -20,10 +20,12 @@
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
+#include <endian.h>
 #include <arpa/inet.h>
 #include <sys/time.h>
 
 #include <bpf/bpf.h>
+#include <test_maps.h>
 
 #include "bpf_util.h"
 
@@ -33,6 +35,22 @@ struct tlpm_node {
 	uint8_t key[];
 };
 
+struct lpm_trie_bytes_key {
+	union {
+		struct bpf_lpm_trie_key_hdr hdr;
+		__u32 prefixlen;
+	};
+	unsigned char data[8];
+};
+
+struct lpm_trie_int_key {
+	union {
+		struct bpf_lpm_trie_key_hdr hdr;
+		__u32 prefixlen;
+	};
+	unsigned int data;
+};
+
 static struct tlpm_node *tlpm_match(struct tlpm_node *list,
 				    const uint8_t *key,
 				    size_t n_bits);
@@ -223,7 +241,7 @@ static void test_lpm_map(int keysize)
 	n_matches = 0;
 	n_matches_after_delete = 0;
 	n_nodes = 1 << 8;
-	n_lookups = 1 << 16;
+	n_lookups = 1 << 9;
 
 	data = alloca(keysize);
 	memset(data, 0, keysize);
@@ -770,16 +788,385 @@ static void test_lpm_multi_thread(void)
 	close(map_fd);
 }
 
-int main(void)
+static int lpm_trie_create(unsigned int key_size, unsigned int value_size, unsigned int max_entries)
+{
+	LIBBPF_OPTS(bpf_map_create_opts, opts);
+	int fd;
+
+	opts.map_flags = BPF_F_NO_PREALLOC;
+	fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie", key_size, value_size, max_entries,
+			    &opts);
+	CHECK(fd < 0, "bpf_map_create", "error %d\n", errno);
+
+	return fd;
+}
+
+static void test_lpm_trie_update_flags(void)
+{
+	struct lpm_trie_int_key key;
+	unsigned int value, got;
+	int fd, err;
+
+	fd = lpm_trie_create(sizeof(key), sizeof(value), 3);
+
+	/* invalid flags (Error) */
+	key.prefixlen = 32;
+	key.data = 0;
+	value = 0;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_F_LOCK);
+	CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err);
+
+	/* invalid flags (Error) */
+	key.prefixlen = 32;
+	key.data = 0;
+	value = 0;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST | BPF_EXIST);
+	CHECK(err != -EINVAL, "invalid update flag", "error %d\n", err);
+
+	/* overwrite an empty qp-trie (Error) */
+	key.prefixlen = 32;
+	key.data = 0;
+	value = 2;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
+	CHECK(err != -ENOENT, "overwrite empty qp-trie", "error %d\n", err);
+
+	/* add a new node */
+	key.prefixlen = 16;
+	key.data = 0;
+	value = 1;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+	CHECK(err, "add new elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup elem", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* add the same node as new node (Error) */
+	err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+	CHECK(err != -EEXIST, "add new elem again", "error %d\n", err);
+
+	/* overwrite the existed node */
+	value = 4;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
+	CHECK(err, "overwrite elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup elem", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* overwrite the node */
+	value = 1;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
+	CHECK(err, "update elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup elem", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* overwrite a non-existent node which is the prefix of the first
+	 * node (Error).
+	 */
+	key.prefixlen = 8;
+	key.data = 0;
+	value = 2;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
+	CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err);
+
+	/* add a new node which is the prefix of the first node */
+	err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+	CHECK(err, "add new elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup key", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* add another new node which will be the sibling of the first node */
+	key.prefixlen = 9;
+	key.data = htobe32(1 << 23);
+	value = 5;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+	CHECK(err, "add new elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup key", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* overwrite the third node */
+	value = 3;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
+	CHECK(err, "overwrite elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup key", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* delete the second node to make it an intermediate node */
+	key.prefixlen = 8;
+	key.data = 0;
+	err = bpf_map_delete_elem(fd, &key);
+	CHECK(err, "del elem", "error %d\n", err);
+
+	/* overwrite the intermediate node (Error) */
+	value = 2;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
+	CHECK(err != -ENOENT, "overwrite nonexistent elem", "error %d\n", err);
+
+	close(fd);
+}
+
+static void test_lpm_trie_update_full_map(void)
+{
+	struct lpm_trie_int_key key;
+	int value, got;
+	int fd, err;
+
+	fd = lpm_trie_create(sizeof(key), sizeof(value), 3);
+
+	/* add a new node */
+	key.prefixlen = 16;
+	key.data = 0;
+	value = 0;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+	CHECK(err, "add new elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup elem", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* add new node */
+	key.prefixlen = 8;
+	key.data = 0;
+	value = 1;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+	CHECK(err, "add new elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup elem", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* add new node */
+	key.prefixlen = 9;
+	key.data = htobe32(1 << 23);
+	value = 2;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+	CHECK(err, "add new elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup elem", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* try to add more node (Error) */
+	key.prefixlen = 32;
+	key.data = 0;
+	value = 3;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
+	CHECK(err != -ENOSPC, "add to full trie", "error %d\n", err);
+
+	/* update the value of an existed node with BPF_EXIST */
+	key.prefixlen = 16;
+	key.data = 0;
+	value = 4;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_EXIST);
+	CHECK(err, "overwrite elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup elem", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	/* update the value of an existed node with BPF_ANY */
+	key.prefixlen = 9;
+	key.data = htobe32(1 << 23);
+	value = 5;
+	err = bpf_map_update_elem(fd, &key, &value, BPF_ANY);
+	CHECK(err, "overwrite elem", "error %d\n", err);
+	got = 0;
+	err = bpf_map_lookup_elem(fd, &key, &got);
+	CHECK(err, "lookup elem", "error %d\n", err);
+	CHECK(got != value, "check value", "got %d exp %d\n", got, value);
+
+	close(fd);
+}
+
+static int cmp_str(const void *a, const void *b)
+{
+	const char *str_a = *(const char **)a, *str_b = *(const char **)b;
+
+	return strcmp(str_a, str_b);
+}
+
+/* Save strings in LPM trie. The trailing '\0' for each string will be
+ * accounted in the prefixlen. The strings returned during the iteration
+ * should be sorted as expected.
+ */
+static void test_lpm_trie_iterate_strs(void)
+{
+	static const char * const keys[] = {
+		"ab", "abO", "abc", "abo", "abS", "abcd",
+	};
+	const char *sorted_keys[ARRAY_SIZE(keys)];
+	struct lpm_trie_bytes_key key, next_key;
+	unsigned int value, got, i, j, len;
+	struct lpm_trie_bytes_key *cur;
+	int fd, err;
+
+	fd = lpm_trie_create(sizeof(key), sizeof(value), ARRAY_SIZE(keys));
+
+	for (i = 0; i < ARRAY_SIZE(keys); i++) {
+		unsigned int flags;
+
+		/* add i-th element */
+		flags = i % 2 ? BPF_NOEXIST : 0;
+		len = strlen(keys[i]);
+		/* include the trailing '\0' */
+		key.prefixlen = (len + 1) * 8;
+		memset(key.data, 0, sizeof(key.data));
+		memcpy(key.data, keys[i], len);
+		value = i + 100;
+		err = bpf_map_update_elem(fd, &key, &value, flags);
+		CHECK(err, "add elem", "#%u error %d\n", i, err);
+
+		err = bpf_map_lookup_elem(fd, &key, &got);
+		CHECK(err, "lookup elem", "#%u error %d\n", i, err);
+		CHECK(got != value, "lookup elem", "#%u expect %u got %u\n", i, value, got);
+
+		/* re-add i-th element (Error) */
+		err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+		CHECK(err != -EEXIST, "re-add elem", "#%u error %d\n", i, err);
+
+		/* Overwrite i-th element */
+		flags = i % 2 ? 0 : BPF_EXIST;
+		value = i;
+		err = bpf_map_update_elem(fd, &key, &value, flags);
+		CHECK(err, "update elem", "error %d\n", err);
+
+		/* Lookup #[0~i] elements */
+		for (j = 0; j <= i; j++) {
+			len = strlen(keys[j]);
+			key.prefixlen = (len + 1) * 8;
+			memset(key.data, 0, sizeof(key.data));
+			memcpy(key.data, keys[j], len);
+			err = bpf_map_lookup_elem(fd, &key, &got);
+			CHECK(err, "lookup elem", "#%u/%u error %d\n", i, j, err);
+			CHECK(got != j, "lookup elem", "#%u/%u expect %u got %u\n",
+			      i, j, value, got);
+		}
+	}
+
+	/* Add element to a full qp-trie (Error) */
+	key.prefixlen = sizeof(key.data) * 8;
+	memset(key.data, 0, sizeof(key.data));
+	value = 0;
+	err = bpf_map_update_elem(fd, &key, &value, 0);
+	CHECK(err != -ENOSPC, "add to full qp-trie", "error %d\n", err);
+
+	/* Iterate sorted elements: no deletion */
+	memcpy(sorted_keys, keys, sizeof(keys));
+	qsort(sorted_keys, ARRAY_SIZE(sorted_keys), sizeof(sorted_keys[0]), cmp_str);
+	cur = NULL;
+	for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) {
+		len = strlen(sorted_keys[i]);
+		err = bpf_map_get_next_key(fd, cur, &next_key);
+		CHECK(err, "iterate", "#%u error %d\n", i, err);
+		CHECK(next_key.prefixlen != (len + 1) * 8, "iterate",
+		      "#%u invalid len %u expect %u\n",
+		      i, next_key.prefixlen, (len + 1) * 8);
+		CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate",
+		      "#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]);
+
+		cur = &next_key;
+	}
+	err = bpf_map_get_next_key(fd, cur, &next_key);
+	CHECK(err != -ENOENT, "more element", "error %d\n", err);
+
+	/* Iterate sorted elements: delete the found key after each iteration */
+	cur = NULL;
+	for (i = 0; i < ARRAY_SIZE(sorted_keys); i++) {
+		len = strlen(sorted_keys[i]);
+		err = bpf_map_get_next_key(fd, cur, &next_key);
+		CHECK(err, "iterate", "#%u error %d\n", i, err);
+		CHECK(next_key.prefixlen != (len + 1) * 8, "iterate",
+		      "#%u invalid len %u expect %u\n",
+		      i, next_key.prefixlen, (len + 1) * 8);
+		CHECK(memcmp(sorted_keys[i], next_key.data, len + 1), "iterate",
+		      "#%u got %.*s exp %.*s\n", i, len, next_key.data, len, sorted_keys[i]);
+
+		cur = &next_key;
+
+		err = bpf_map_delete_elem(fd, cur);
+		CHECK(err, "delete", "#%u error %d\n", i, err);
+	}
+	err = bpf_map_get_next_key(fd, cur, &next_key);
+	CHECK(err != -ENOENT, "non-empty qp-trie", "error %d\n", err);
+
+	close(fd);
+}
+
+/* Use the fixed prefixlen (32) and save integers in LPM trie. The iteration of
+ * LPM trie will return these integers in big-endian order, therefore, convert
+ * these integers to big-endian before update. After each iteration, delete the
+ * found key (the smallest integer) and expect the next iteration will return
+ * the second smallest number.
+ */
+static void test_lpm_trie_iterate_ints(void)
+{
+	struct lpm_trie_int_key key, next_key;
+	unsigned int i, max_entries;
+	struct lpm_trie_int_key *cur;
+	unsigned int *data_set;
+	int fd, err;
+	bool value;
+
+	max_entries = 4096;
+	data_set = calloc(max_entries, sizeof(*data_set));
+	CHECK(!data_set, "malloc", "no mem\n");
+	for (i = 0; i < max_entries; i++)
+		data_set[i] = i;
+
+	fd = lpm_trie_create(sizeof(key), sizeof(value), max_entries);
+	value = true;
+	for (i = 0; i < max_entries; i++) {
+		key.prefixlen = 32;
+		key.data = htobe32(data_set[i]);
+
+		err = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+		CHECK(err, "add elem", "#%u error %d\n", i, err);
+	}
+
+	cur = NULL;
+	for (i = 0; i < max_entries; i++) {
+		err = bpf_map_get_next_key(fd, cur, &next_key);
+		CHECK(err, "iterate", "#%u error %d\n", i, err);
+		CHECK(next_key.prefixlen != 32, "iterate", "#%u invalid len %u\n",
+		      i, next_key.prefixlen);
+		CHECK(be32toh(next_key.data) != data_set[i], "iterate", "#%u got 0x%x exp 0x%x\n",
+		      i, be32toh(next_key.data), data_set[i]);
+		cur = &next_key;
+
+		/*
+		 * Delete the minimal key, the next call of bpf_get_next_key()
+		 * will return the second minimal key.
+		 */
+		err = bpf_map_delete_elem(fd, &next_key);
+		CHECK(err, "del elem", "#%u elem error %d\n", i, err);
+	}
+	err = bpf_map_get_next_key(fd, cur, &next_key);
+	CHECK(err != -ENOENT, "more element", "error %d\n", err);
+
+	err = bpf_map_get_next_key(fd, NULL, &next_key);
+	CHECK(err != -ENOENT, "no-empty qp-trie", "error %d\n", err);
+
+	free(data_set);
+
+	close(fd);
+}
+
+void test_lpm_trie_map_basic_ops(void)
 {
 	int i;
 
 	/* we want predictable, pseudo random tests */
 	srand(0xf00ba1);
 
-	/* Use libbpf 1.0 API mode */
-	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
-
 	test_lpm_basic();
 	test_lpm_order();
 
@@ -792,6 +1179,10 @@ int main(void)
 	test_lpm_get_next_key();
 	test_lpm_multi_thread();
 
-	printf("test_lpm: OK\n");
-	return 0;
+	test_lpm_trie_update_flags();
+	test_lpm_trie_update_full_map();
+	test_lpm_trie_iterate_strs();
+	test_lpm_trie_iterate_ints();
+
+	printf("%s: PASS\n", __func__);
 }
diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
index 66191ae9863c..79c3ccadb962 100644
--- a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
@@ -120,11 +120,12 @@ static void validate_fetch_results(int outer_map_fd,
 
 static void fetch_and_validate(int outer_map_fd,
 			       struct bpf_map_batch_opts *opts,
-			       __u32 batch_size, bool delete_entries)
+			       __u32 batch_size, bool delete_entries,
+			       bool has_holes)
 {
-	__u32 *fetched_keys, *fetched_values, total_fetched = 0;
-	__u32 batch_key = 0, fetch_count, step_size;
-	int err, max_entries = OUTER_MAP_ENTRIES;
+	int err, max_entries = OUTER_MAP_ENTRIES - !!has_holes;
+	__u32 *fetched_keys, *fetched_values, total_fetched = 0, i;
+	__u32 batch_key = 0, fetch_count, step_size = batch_size;
 	__u32 value_size = sizeof(__u32);
 
 	/* Total entries needs to be fetched */
@@ -134,9 +135,8 @@ static void fetch_and_validate(int outer_map_fd,
 	      "Memory allocation failed for fetched_keys or fetched_values",
 	      "error=%s\n", strerror(errno));
 
-	for (step_size = batch_size;
-	     step_size <= max_entries;
-	     step_size += batch_size) {
+	/* hash map may not always return full batch */
+	for (i = 0; i < OUTER_MAP_ENTRIES; i++) {
 		fetch_count = step_size;
 		err = delete_entries
 		      ? bpf_map_lookup_and_delete_batch(outer_map_fd,
@@ -155,6 +155,7 @@ static void fetch_and_validate(int outer_map_fd,
 		if (err && errno == ENOSPC) {
 			/* Fetch again with higher batch size */
 			total_fetched = 0;
+			step_size += batch_size;
 			continue;
 		}
 
@@ -184,18 +185,19 @@ static void fetch_and_validate(int outer_map_fd,
 }
 
 static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
-				  enum bpf_map_type inner_map_type)
+				  enum bpf_map_type inner_map_type,
+				  bool has_holes)
 {
+	__u32 max_entries = OUTER_MAP_ENTRIES - !!has_holes;
 	__u32 *outer_map_keys, *inner_map_fds;
-	__u32 max_entries = OUTER_MAP_ENTRIES;
 	LIBBPF_OPTS(bpf_map_batch_opts, opts);
 	__u32 value_size = sizeof(__u32);
 	int batch_size[2] = {5, 10};
 	__u32 map_index, op_index;
 	int outer_map_fd, ret;
 
-	outer_map_keys = calloc(max_entries, value_size);
-	inner_map_fds = calloc(max_entries, value_size);
+	outer_map_keys = calloc(OUTER_MAP_ENTRIES, value_size);
+	inner_map_fds = calloc(OUTER_MAP_ENTRIES, value_size);
 	CHECK((!outer_map_keys || !inner_map_fds),
 	      "Memory allocation failed for outer_map_keys or inner_map_fds",
 	      "error=%s\n", strerror(errno));
@@ -209,6 +211,24 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
 			((outer_map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
 			 ? 9 : 1000) - map_index;
 
+	/* This condition is only meaningful for array of maps.
+	 *
+	 * max_entries == OUTER_MAP_ENTRIES - 1 if it is true. Say
+	 * max_entries is short for n, then outer_map_keys looks like:
+	 *
+	 *   [n, n-1, ... 2, 1]
+	 *
+	 * We change it to
+	 *
+	 *   [n, n-1, ... 2, 0]
+	 *
+	 * So it will leave key 1 as a hole. It will serve to test the
+	 * correctness when batch on an array: a "non-exist" key might be
+	 * actually allocated and returned from key iteration.
+	 */
+	if (has_holes)
+		outer_map_keys[max_entries - 1]--;
+
 	/* batch operation - map_update */
 	ret = bpf_map_update_batch(outer_map_fd, outer_map_keys,
 				   inner_map_fds, &max_entries, &opts);
@@ -219,15 +239,17 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
 	/* batch operation - map_lookup */
 	for (op_index = 0; op_index < 2; ++op_index)
 		fetch_and_validate(outer_map_fd, &opts,
-				   batch_size[op_index], false);
+				   batch_size[op_index], false,
+				   has_holes);
 
 	/* batch operation - map_lookup_delete */
 	if (outer_map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
 		fetch_and_validate(outer_map_fd, &opts,
-				   max_entries, true /*delete*/);
+				   max_entries, true /*delete*/,
+				   has_holes);
 
 	/* close all map fds */
-	for (map_index = 0; map_index < max_entries; map_index++)
+	for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++)
 		close(inner_map_fds[map_index]);
 	close(outer_map_fd);
 
@@ -237,16 +259,20 @@ static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
 
 void test_map_in_map_batch_ops_array(void)
 {
-	_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY);
+	_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, false);
 	printf("%s:PASS with inner ARRAY map\n", __func__);
-	_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH);
+	_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, false);
 	printf("%s:PASS with inner HASH map\n", __func__);
+	_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY, true);
+	printf("%s:PASS with inner ARRAY map with holes\n", __func__);
+	_map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH, true);
+	printf("%s:PASS with inner HASH map with holes\n", __func__);
 }
 
 void test_map_in_map_batch_ops_hash(void)
 {
-	_map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY);
+	_map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY, false);
 	printf("%s:PASS with inner ARRAY map\n", __func__);
-	_map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH);
+	_map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH, false);
 	printf("%s:PASS with inner HASH map\n", __func__);
 }
diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
index 62971dbf2996..a4121d2248ac 100644
--- a/tools/testing/selftests/bpf/map_tests/task_storage_map.c
+++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
@@ -78,8 +78,8 @@ void test_task_storage_map_stress_lookup(void)
 	CHECK(err, "open_and_load", "error %d\n", err);
 
 	/* Only for a fully preemptible kernel */
-	if (!skel->kconfig->CONFIG_PREEMPT) {
-		printf("%s SKIP (no CONFIG_PREEMPT)\n", __func__);
+	if (!skel->kconfig->CONFIG_PREEMPTION) {
+		printf("%s SKIP (no CONFIG_PREEMPTION)\n", __func__);
 		read_bpf_task_storage_busy__destroy(skel);
 		skips++;
 		return;
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 27784946b01b..0a6a5561bed3 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -21,7 +21,7 @@
 #include <linux/limits.h>
 
 #include <linux/ip.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 #include <netinet/tcp.h>
 #include <net/if.h>
 
@@ -97,7 +97,7 @@ int settimeo(int fd, int timeout_ms)
 int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
 		      const struct network_helper_opts *opts)
 {
-	int fd;
+	int on = 1, fd;
 
 	if (!opts)
 		opts = &default_opts;
@@ -111,6 +111,12 @@ int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t a
 	if (settimeo(fd, opts->timeout_ms))
 		goto error_close;
 
+	if (type == SOCK_STREAM &&
+	    setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on))) {
+		log_err("Failed to enable SO_REUSEADDR");
+		goto error_close;
+	}
+
 	if (opts->post_socket_cb &&
 	    opts->post_socket_cb(fd, opts->cb_opts)) {
 		log_err("Failed to call post_socket_cb");
@@ -446,6 +452,23 @@ char *ping_command(int family)
 	return "ping";
 }
 
+int append_tid(char *str, size_t sz)
+{
+	size_t end;
+
+	if (!str)
+		return -1;
+
+	end = strlen(str);
+	if (end + 8 > sz)
+		return -1;
+
+	sprintf(&str[end], "%07ld", sys_gettid());
+	str[end + 7] = '\0';
+
+	return 0;
+}
+
 int remove_netns(const char *name)
 {
 	char *cmd;
@@ -548,6 +571,34 @@ void close_netns(struct nstoken *token)
 	free(token);
 }
 
+int open_tuntap(const char *dev_name, bool need_mac)
+{
+	int err = 0;
+	struct ifreq ifr;
+	int fd = open("/dev/net/tun", O_RDWR);
+
+	if (!ASSERT_GE(fd, 0, "open(/dev/net/tun)"))
+		return -1;
+
+	ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
+	strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1);
+	ifr.ifr_name[IFNAMSIZ - 1] = '\0';
+
+	err = ioctl(fd, TUNSETIFF, &ifr);
+	if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
+		close(fd);
+		return -1;
+	}
+
+	err = fcntl(fd, F_SETFL, O_NONBLOCK);
+	if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
+		close(fd);
+		return -1;
+	}
+
+	return fd;
+}
+
 int get_socket_local_port(int sock_fd)
 {
 	struct sockaddr_storage addr;
@@ -721,6 +772,50 @@ int send_recv_data(int lfd, int fd, uint32_t total_bytes)
 	return err;
 }
 
+int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd)
+{
+	int ifindex, ret;
+
+	if (!ASSERT_TRUE(ingress_fd >= 0 || egress_fd >= 0,
+			 "at least one program fd is valid"))
+		return -1;
+
+	ifindex = if_nametoindex(dev);
+	if (!ASSERT_NEQ(ifindex, 0, "get ifindex"))
+		return -1;
+
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = ifindex,
+			    .attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS);
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
+			    .priority = 1, .prog_fd = ingress_fd);
+	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
+			    .priority = 1, .prog_fd = egress_fd);
+
+	ret = bpf_tc_hook_create(&hook);
+	if (!ASSERT_OK(ret, "create tc hook"))
+		return ret;
+
+	if (ingress_fd >= 0) {
+		hook.attach_point = BPF_TC_INGRESS;
+		ret = bpf_tc_attach(&hook, &opts1);
+		if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+			bpf_tc_hook_destroy(&hook);
+			return ret;
+		}
+	}
+
+	if (egress_fd >= 0) {
+		hook.attach_point = BPF_TC_EGRESS;
+		ret = bpf_tc_attach(&hook, &opts2);
+		if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+			bpf_tc_hook_destroy(&hook);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
 #ifdef TRAFFIC_MONITOR
 struct tmonitor_ctx {
 	pcap_t *pcap;
@@ -733,6 +828,36 @@ struct tmonitor_ctx {
 	int pcap_fd;
 };
 
+static int __base_pr(const char *format, va_list args)
+{
+	return vfprintf(stdout, format, args);
+}
+
+static tm_print_fn_t __tm_pr = __base_pr;
+
+tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn)
+{
+	tm_print_fn_t old_print_fn;
+
+	old_print_fn = __atomic_exchange_n(&__tm_pr, fn, __ATOMIC_RELAXED);
+
+	return old_print_fn;
+}
+
+void tm_print(const char *format, ...)
+{
+	tm_print_fn_t print_fn;
+	va_list args;
+
+	print_fn = __atomic_load_n(&__tm_pr, __ATOMIC_RELAXED);
+	if (!print_fn)
+		return;
+
+	va_start(args, format);
+	print_fn(format, args);
+	va_end(args);
+}
+
 /* Is this packet captured with a Ethernet protocol type? */
 static bool is_ethernet(const u_char *packet)
 {
@@ -750,7 +875,7 @@ static bool is_ethernet(const u_char *packet)
 	case 770: /* ARPHRD_FRAD */
 	case 778: /* ARPHDR_IPGRE */
 	case 803: /* ARPHRD_IEEE80211_RADIOTAP */
-		printf("Packet captured: arphdr_type=%d\n", arphdr_type);
+		tm_print("Packet captured: arphdr_type=%d\n", arphdr_type);
 		return false;
 	}
 	return true;
@@ -771,12 +896,13 @@ static const char *pkt_type_str(u16 pkt_type)
 	return "Unknown";
 }
 
+#define MAX_FLAGS_STRLEN 21
 /* Show the information of the transport layer in the packet */
 static void show_transport(const u_char *packet, u16 len, u32 ifindex,
 			   const char *src_addr, const char *dst_addr,
 			   u16 proto, bool ipv6, u8 pkt_type)
 {
-	char *ifname, _ifname[IF_NAMESIZE];
+	char *ifname, _ifname[IF_NAMESIZE], flags[MAX_FLAGS_STRLEN] = "";
 	const char *transport_str;
 	u16 src_port, dst_port;
 	struct udphdr *udp;
@@ -799,47 +925,39 @@ static void show_transport(const u_char *packet, u16 len, u32 ifindex,
 		dst_port = ntohs(tcp->dest);
 		transport_str = "TCP";
 	} else if (proto == IPPROTO_ICMP) {
-		printf("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
-		       ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
-		       packet[0], packet[1]);
+		tm_print("%-7s %-3s IPv4 %s > %s: ICMP, length %d, type %d, code %d\n",
+			 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
+			 packet[0], packet[1]);
 		return;
 	} else if (proto == IPPROTO_ICMPV6) {
-		printf("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
-		       ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
-		       packet[0], packet[1]);
+		tm_print("%-7s %-3s IPv6 %s > %s: ICMPv6, length %d, type %d, code %d\n",
+			 ifname, pkt_type_str(pkt_type), src_addr, dst_addr, len,
+			 packet[0], packet[1]);
 		return;
 	} else {
-		printf("%-7s %-3s %s %s > %s: protocol %d\n",
-		       ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
-		       src_addr, dst_addr, proto);
+		tm_print("%-7s %-3s %s %s > %s: protocol %d\n",
+			 ifname, pkt_type_str(pkt_type), ipv6 ? "IPv6" : "IPv4",
+			 src_addr, dst_addr, proto);
 		return;
 	}
 
 	/* TCP or UDP*/
 
-	flockfile(stdout);
+	if (proto == IPPROTO_TCP)
+		snprintf(flags, MAX_FLAGS_STRLEN, "%s%s%s%s",
+			 tcp->fin ? ", FIN" : "",
+			 tcp->syn ? ", SYN" : "",
+			 tcp->rst ? ", RST" : "",
+			 tcp->ack ? ", ACK" : "");
+
 	if (ipv6)
-		printf("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d",
-		       ifname, pkt_type_str(pkt_type), src_addr, src_port,
-		       dst_addr, dst_port, transport_str, len);
+		tm_print("%-7s %-3s IPv6 %s.%d > %s.%d: %s, length %d%s\n",
+			 ifname, pkt_type_str(pkt_type), src_addr, src_port,
+			 dst_addr, dst_port, transport_str, len, flags);
 	else
-		printf("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d",
-		       ifname, pkt_type_str(pkt_type), src_addr, src_port,
-		       dst_addr, dst_port, transport_str, len);
-
-	if (proto == IPPROTO_TCP) {
-		if (tcp->fin)
-			printf(", FIN");
-		if (tcp->syn)
-			printf(", SYN");
-		if (tcp->rst)
-			printf(", RST");
-		if (tcp->ack)
-			printf(", ACK");
-	}
-
-	printf("\n");
-	funlockfile(stdout);
+		tm_print("%-7s %-3s IPv4 %s:%d > %s:%d: %s, length %d%s\n",
+			 ifname, pkt_type_str(pkt_type), src_addr, src_port,
+			 dst_addr, dst_port, transport_str, len, flags);
 }
 
 static void show_ipv6_packet(const u_char *packet, u32 ifindex, u8 pkt_type)
@@ -954,8 +1072,8 @@ static void *traffic_monitor_thread(void *arg)
 				ifname = _ifname;
 			}
 
-			printf("%-7s %-3s Unknown network protocol type 0x%x\n",
-			       ifname, pkt_type_str(ptype), proto);
+			tm_print("%-7s %-3s Unknown network protocol type 0x%x\n",
+				 ifname, pkt_type_str(ptype), proto);
 		}
 	}
 
@@ -1155,8 +1273,9 @@ void traffic_monitor_stop(struct tmonitor_ctx *ctx)
 	write(ctx->wake_fd, &w, sizeof(w));
 	pthread_join(ctx->thread, NULL);
 
-	printf("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
+	tm_print("Packet file: %s\n", strrchr(ctx->pkt_fname, '/') + 1);
 
 	traffic_monitor_release(ctx);
 }
+
 #endif /* TRAFFIC_MONITOR */
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 5764155b6d25..79a010c88e11 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -8,14 +8,17 @@
 typedef __u16 __sum16;
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
+#include <linux/if_tun.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/ethtool.h>
 #include <linux/sockios.h>
 #include <linux/err.h>
 #include <netinet/tcp.h>
+#include <netinet/udp.h>
 #include <bpf/bpf_endian.h>
 #include <net/if.h>
+#include <stdio.h>
 
 #define MAGIC_VAL 0x1234
 #define NUM_ITER 100000
@@ -84,6 +87,8 @@ int get_socket_local_port(int sock_fd);
 int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param);
 int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param);
 
+int open_tuntap(const char *dev_name, bool need_mac);
+
 struct nstoken;
 /**
  * open_netns() - Switch to specified network namespace by name.
@@ -97,6 +102,18 @@ int send_recv_data(int lfd, int fd, uint32_t total_bytes);
 int make_netns(const char *name);
 int remove_netns(const char *name);
 
+/**
+ * append_tid() - Append thread ID to the given string.
+ *
+ * @str: string to extend
+ * @sz: string's size
+ *
+ * 8 characters are used to append the thread ID (7 digits + '\0')
+ *
+ * Returns -1 on errors, 0 otherwise
+ */
+int append_tid(char *str, size_t sz);
+
 static __u16 csum_fold(__u32 csum)
 {
 	csum = (csum & 0xffff) + (csum >> 16);
@@ -105,6 +122,45 @@ static __u16 csum_fold(__u32 csum)
 	return (__u16)~csum;
 }
 
+static __wsum csum_partial(const void *buf, int len, __wsum sum)
+{
+	__u16 *p = (__u16 *)buf;
+	int num_u16 = len >> 1;
+	int i;
+
+	for (i = 0; i < num_u16; i++)
+		sum += p[i];
+
+	return sum;
+}
+
+static inline __sum16 build_ip_csum(struct iphdr *iph)
+{
+	__u32 sum = 0;
+	__u16 *p;
+
+	iph->check = 0;
+	p = (void *)iph;
+	sum = csum_partial(p, iph->ihl << 2, 0);
+
+	return csum_fold(sum);
+}
+
+/**
+ * csum_tcpudp_magic - compute IP pseudo-header checksum
+ *
+ * Compute the IPv4 pseudo header checksum. The helper can take a
+ * accumulated sum from the transport layer to accumulate it and directly
+ * return the transport layer
+ *
+ * @saddr: IP source address
+ * @daddr: IP dest address
+ * @len: IP data size
+ * @proto: transport layer protocol
+ * @csum: The accumulated partial sum to add to the computation
+ *
+ * Returns the folded sum
+ */
 static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
 					__u32 len, __u8 proto,
 					__wsum csum)
@@ -120,6 +176,21 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
 	return csum_fold((__u32)s);
 }
 
+/**
+ * csum_ipv6_magic - compute IPv6 pseudo-header checksum
+ *
+ * Compute the ipv6 pseudo header checksum. The helper can take a
+ * accumulated sum from the transport layer to accumulate it and directly
+ * return the transport layer
+ *
+ * @saddr: IPv6 source address
+ * @daddr: IPv6 dest address
+ * @len: IPv6 data size
+ * @proto: transport layer protocol
+ * @csum: The accumulated partial sum to add to the computation
+ *
+ * Returns the folded sum
+ */
 static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 				      const struct in6_addr *daddr,
 					__u32 len, __u8 proto,
@@ -139,12 +210,72 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 	return csum_fold((__u32)s);
 }
 
+/**
+ * build_udp_v4_csum - compute UDP checksum for UDP over IPv4
+ *
+ * Compute the checksum to embed in UDP header, composed of the sum of IP
+ * pseudo-header checksum, UDP header checksum and UDP data checksum
+ * @iph IP header
+ * @udph UDP header, which must be immediately followed by UDP data
+ *
+ * Returns the total checksum
+ */
+
+static inline __sum16 build_udp_v4_csum(const struct iphdr *iph,
+					const struct udphdr *udph)
+{
+	unsigned long sum;
+
+	sum = csum_partial(udph, ntohs(udph->len), 0);
+	return csum_tcpudp_magic(iph->saddr, iph->daddr, ntohs(udph->len),
+				 IPPROTO_UDP, sum);
+}
+
+/**
+ * build_udp_v6_csum - compute UDP checksum for UDP over IPv6
+ *
+ * Compute the checksum to embed in UDP header, composed of the sum of IPv6
+ * pseudo-header checksum, UDP header checksum and UDP data checksum
+ * @ip6h IPv6 header
+ * @udph UDP header, which must be immediately followed by UDP data
+ *
+ * Returns the total checksum
+ */
+static inline __sum16 build_udp_v6_csum(const struct ipv6hdr *ip6h,
+					const struct udphdr *udph)
+{
+	unsigned long sum;
+
+	sum = csum_partial(udph, ntohs(udph->len), 0);
+	return csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ntohs(udph->len),
+			       IPPROTO_UDP, sum);
+}
+
 struct tmonitor_ctx;
 
+typedef int (*tm_print_fn_t)(const char *format, va_list args);
+
+/**
+ * tc_prog_attach - attach BPF program(s) to an interface
+ *
+ * Takes file descriptors pointing to at least one, at most two BPF
+ * programs, and attach those programs to an interface ingress, egress or
+ * both.
+ *
+ * @dev: string containing the interface name
+ * @ingress_fd: file descriptor of the program to attach to interface ingress
+ * @egress_fd: file descriptor of the program to attach to interface egress
+ *
+ * Returns 0 on success, -1 if no valid file descriptor has been found, if
+ * the interface name is invalid or if an error ocurred during attach.
+ */
+int tc_prog_attach(const char *dev, int ingress_fd, int egress_fd);
+
 #ifdef TRAFFIC_MONITOR
 struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
 					   const char *subtest_name);
 void traffic_monitor_stop(struct tmonitor_ctx *ctx);
+tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn);
 #else
 static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, const char *test_name,
 							 const char *subtest_name)
@@ -155,6 +286,11 @@ static inline struct tmonitor_ctx *traffic_monitor_start(const char *netns, cons
 static inline void traffic_monitor_stop(struct tmonitor_ctx *ctx)
 {
 }
+
+static inline tm_print_fn_t traffic_monitor_set_print(tm_print_fn_t fn)
+{
+	return NULL;
+}
 #endif
 
 #endif
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index 4ebd0da898f5..24c509ce4e5b 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -42,11 +42,11 @@ static struct bpf_align_test tests[] = {
 		.matches = {
 			{0, "R1", "ctx()"},
 			{0, "R10", "fp0"},
-			{0, "R3_w", "2"},
-			{1, "R3_w", "4"},
-			{2, "R3_w", "8"},
-			{3, "R3_w", "16"},
-			{4, "R3_w", "32"},
+			{0, "R3", "2"},
+			{1, "R3", "4"},
+			{2, "R3", "8"},
+			{3, "R3", "16"},
+			{4, "R3", "32"},
 		},
 	},
 	{
@@ -70,17 +70,17 @@ static struct bpf_align_test tests[] = {
 		.matches = {
 			{0, "R1", "ctx()"},
 			{0, "R10", "fp0"},
-			{0, "R3_w", "1"},
-			{1, "R3_w", "2"},
-			{2, "R3_w", "4"},
-			{3, "R3_w", "8"},
-			{4, "R3_w", "16"},
-			{5, "R3_w", "1"},
-			{6, "R4_w", "32"},
-			{7, "R4_w", "16"},
-			{8, "R4_w", "8"},
-			{9, "R4_w", "4"},
-			{10, "R4_w", "2"},
+			{0, "R3", "1"},
+			{1, "R3", "2"},
+			{2, "R3", "4"},
+			{3, "R3", "8"},
+			{4, "R3", "16"},
+			{5, "R3", "1"},
+			{6, "R4", "32"},
+			{7, "R4", "16"},
+			{8, "R4", "8"},
+			{9, "R4", "4"},
+			{10, "R4", "2"},
 		},
 	},
 	{
@@ -99,12 +99,12 @@ static struct bpf_align_test tests[] = {
 		.matches = {
 			{0, "R1", "ctx()"},
 			{0, "R10", "fp0"},
-			{0, "R3_w", "4"},
-			{1, "R3_w", "8"},
-			{2, "R3_w", "10"},
-			{3, "R4_w", "8"},
-			{4, "R4_w", "12"},
-			{5, "R4_w", "14"},
+			{0, "R3", "4"},
+			{1, "R3", "8"},
+			{2, "R3", "10"},
+			{3, "R4", "8"},
+			{4, "R4", "12"},
+			{5, "R4", "14"},
 		},
 	},
 	{
@@ -121,10 +121,10 @@ static struct bpf_align_test tests[] = {
 		.matches = {
 			{0, "R1", "ctx()"},
 			{0, "R10", "fp0"},
-			{0, "R3_w", "7"},
-			{1, "R3_w", "7"},
-			{2, "R3_w", "14"},
-			{3, "R3_w", "56"},
+			{0, "R3", "7"},
+			{1, "R3", "7"},
+			{2, "R3", "14"},
+			{3, "R3", "56"},
 		},
 	},
 
@@ -162,19 +162,19 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{6, "R0_w", "pkt(off=8,r=8)"},
-			{6, "R3_w", "var_off=(0x0; 0xff)"},
-			{7, "R3_w", "var_off=(0x0; 0x1fe)"},
-			{8, "R3_w", "var_off=(0x0; 0x3fc)"},
-			{9, "R3_w", "var_off=(0x0; 0x7f8)"},
-			{10, "R3_w", "var_off=(0x0; 0xff0)"},
-			{12, "R3_w", "pkt_end()"},
-			{17, "R4_w", "var_off=(0x0; 0xff)"},
-			{18, "R4_w", "var_off=(0x0; 0x1fe0)"},
-			{19, "R4_w", "var_off=(0x0; 0xff0)"},
-			{20, "R4_w", "var_off=(0x0; 0x7f8)"},
-			{21, "R4_w", "var_off=(0x0; 0x3fc)"},
-			{22, "R4_w", "var_off=(0x0; 0x1fe)"},
+			{6, "R0", "pkt(off=8,r=8)"},
+			{6, "R3", "var_off=(0x0; 0xff)"},
+			{7, "R3", "var_off=(0x0; 0x1fe)"},
+			{8, "R3", "var_off=(0x0; 0x3fc)"},
+			{9, "R3", "var_off=(0x0; 0x7f8)"},
+			{10, "R3", "var_off=(0x0; 0xff0)"},
+			{12, "R3", "pkt_end()"},
+			{17, "R4", "var_off=(0x0; 0xff)"},
+			{18, "R4", "var_off=(0x0; 0x1fe0)"},
+			{19, "R4", "var_off=(0x0; 0xff0)"},
+			{20, "R4", "var_off=(0x0; 0x7f8)"},
+			{21, "R4", "var_off=(0x0; 0x3fc)"},
+			{22, "R4", "var_off=(0x0; 0x1fe)"},
 		},
 	},
 	{
@@ -195,16 +195,16 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{6, "R3_w", "var_off=(0x0; 0xff)"},
-			{7, "R4_w", "var_off=(0x0; 0xff)"},
-			{8, "R4_w", "var_off=(0x0; 0xff)"},
-			{9, "R4_w", "var_off=(0x0; 0xff)"},
-			{10, "R4_w", "var_off=(0x0; 0x1fe)"},
-			{11, "R4_w", "var_off=(0x0; 0xff)"},
-			{12, "R4_w", "var_off=(0x0; 0x3fc)"},
-			{13, "R4_w", "var_off=(0x0; 0xff)"},
-			{14, "R4_w", "var_off=(0x0; 0x7f8)"},
-			{15, "R4_w", "var_off=(0x0; 0xff0)"},
+			{6, "R3", "var_off=(0x0; 0xff)"},
+			{7, "R4", "var_off=(0x0; 0xff)"},
+			{8, "R4", "var_off=(0x0; 0xff)"},
+			{9, "R4", "var_off=(0x0; 0xff)"},
+			{10, "R4", "var_off=(0x0; 0x1fe)"},
+			{11, "R4", "var_off=(0x0; 0xff)"},
+			{12, "R4", "var_off=(0x0; 0x3fc)"},
+			{13, "R4", "var_off=(0x0; 0xff)"},
+			{14, "R4", "var_off=(0x0; 0x7f8)"},
+			{15, "R4", "var_off=(0x0; 0xff0)"},
 		},
 	},
 	{
@@ -235,14 +235,14 @@ static struct bpf_align_test tests[] = {
 		},
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
-			{2, "R5_w", "pkt(r=0)"},
-			{4, "R5_w", "pkt(off=14,r=0)"},
-			{5, "R4_w", "pkt(off=14,r=0)"},
+			{2, "R5", "pkt(r=0)"},
+			{4, "R5", "pkt(off=14,r=0)"},
+			{5, "R4", "pkt(off=14,r=0)"},
 			{9, "R2", "pkt(r=18)"},
 			{10, "R5", "pkt(off=14,r=18)"},
-			{10, "R4_w", "var_off=(0x0; 0xff)"},
-			{13, "R4_w", "var_off=(0x0; 0xffff)"},
-			{14, "R4_w", "var_off=(0x0; 0xffff)"},
+			{10, "R4", "var_off=(0x0; 0xff)"},
+			{13, "R4", "var_off=(0x0; 0xffff)"},
+			{14, "R4", "var_off=(0x0; 0xffff)"},
 		},
 	},
 	{
@@ -299,12 +299,12 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w", "pkt(r=8)"},
-			{7, "R6_w", "var_off=(0x0; 0x3fc)"},
+			{6, "R2", "pkt(r=8)"},
+			{7, "R6", "var_off=(0x0; 0x3fc)"},
 			/* Offset is added to packet pointer R5, resulting in
 			 * known fixed offset, and variable offset from R6.
 			 */
-			{11, "R5_w", "pkt(id=1,off=14,"},
+			{11, "R5", "pkt(id=1,off=14,"},
 			/* At the time the word size load is performed from R5,
 			 * it's total offset is NET_IP_ALIGN + reg->off (0) +
 			 * reg->aux_off (14) which is 16.  Then the variable
@@ -320,12 +320,12 @@ static struct bpf_align_test tests[] = {
 			 * instruction to validate R5 state. We also check
 			 * that R4 is what it should be in such case.
 			 */
-			{18, "R4_w", "var_off=(0x0; 0x3fc)"},
-			{18, "R5_w", "var_off=(0x0; 0x3fc)"},
+			{18, "R4", "var_off=(0x0; 0x3fc)"},
+			{18, "R5", "var_off=(0x0; 0x3fc)"},
 			/* Constant offset is added to R5, resulting in
 			 * reg->off of 14.
 			 */
-			{19, "R5_w", "pkt(id=2,off=14,"},
+			{19, "R5", "pkt(id=2,off=14,"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off
 			 * (14) which is 16.  Then the variable offset is 4-byte
@@ -337,21 +337,21 @@ static struct bpf_align_test tests[] = {
 			/* Constant offset is added to R5 packet pointer,
 			 * resulting in reg->off value of 14.
 			 */
-			{26, "R5_w", "pkt(off=14,r=8)"},
+			{26, "R5", "pkt(off=14,r=8)"},
 			/* Variable offset is added to R5, resulting in a
 			 * variable offset of (4n). See comment for insn #18
 			 * for R4 = R5 trick.
 			 */
-			{28, "R4_w", "var_off=(0x0; 0x3fc)"},
-			{28, "R5_w", "var_off=(0x0; 0x3fc)"},
+			{28, "R4", "var_off=(0x0; 0x3fc)"},
+			{28, "R5", "var_off=(0x0; 0x3fc)"},
 			/* Constant is added to R5 again, setting reg->off to 18. */
-			{29, "R5_w", "pkt(id=3,off=18,"},
+			{29, "R5", "pkt(id=3,off=18,"},
 			/* And once more we add a variable; resulting var_off
 			 * is still (4n), fixed offset is not changed.
 			 * Also, we create a new reg->id.
 			 */
-			{31, "R4_w", "var_off=(0x0; 0x7fc)"},
-			{31, "R5_w", "var_off=(0x0; 0x7fc)"},
+			{31, "R4", "var_off=(0x0; 0x7fc)"},
+			{31, "R5", "var_off=(0x0; 0x7fc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (18)
 			 * which is 20.  Then the variable offset is (4n), so
@@ -397,12 +397,12 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w", "pkt(r=8)"},
-			{7, "R6_w", "var_off=(0x0; 0x3fc)"},
+			{6, "R2", "pkt(r=8)"},
+			{7, "R6", "var_off=(0x0; 0x3fc)"},
 			/* Adding 14 makes R6 be (4n+2) */
-			{8, "R6_w", "var_off=(0x2; 0x7fc)"},
+			{8, "R6", "var_off=(0x2; 0x7fc)"},
 			/* Packet pointer has (4n+2) offset */
-			{11, "R5_w", "var_off=(0x2; 0x7fc)"},
+			{11, "R5", "var_off=(0x2; 0x7fc)"},
 			{12, "R4", "var_off=(0x2; 0x7fc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
@@ -414,11 +414,11 @@ static struct bpf_align_test tests[] = {
 			/* Newly read value in R6 was shifted left by 2, so has
 			 * known alignment of 4.
 			 */
-			{17, "R6_w", "var_off=(0x0; 0x3fc)"},
+			{17, "R6", "var_off=(0x0; 0x3fc)"},
 			/* Added (4n) to packet pointer's (4n+2) var_off, giving
 			 * another (4n+2).
 			 */
-			{19, "R5_w", "var_off=(0x2; 0xffc)"},
+			{19, "R5", "var_off=(0x2; 0xffc)"},
 			{20, "R4", "var_off=(0x2; 0xffc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
@@ -459,18 +459,18 @@ static struct bpf_align_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.result = REJECT,
 		.matches = {
-			{3, "R5_w", "pkt_end()"},
+			{3, "R5", "pkt_end()"},
 			/* (ptr - ptr) << 2 == unknown, (4n) */
-			{5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"},
+			{5, "R5", "var_off=(0x0; 0xfffffffffffffffc)"},
 			/* (4n) + 14 == (4n+2).  We blow our bounds, because
 			 * the add could overflow.
 			 */
-			{6, "R5_w", "var_off=(0x2; 0xfffffffffffffffc)"},
+			{6, "R5", "var_off=(0x2; 0xfffffffffffffffc)"},
 			/* Checked s>=0 */
 			{9, "R5", "var_off=(0x2; 0x7ffffffffffffffc)"},
 			/* packet pointer + nonnegative (4n+2) */
-			{11, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
-			{12, "R4_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
+			{11, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"},
+			{12, "R4", "var_off=(0x2; 0x7ffffffffffffffc)"},
 			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
 			 * We checked the bounds, but it might have been able
 			 * to overflow if the packet pointer started in the
@@ -478,7 +478,7 @@ static struct bpf_align_test tests[] = {
 			 * So we did not get a 'range' on R6, and the access
 			 * attempt will fail.
 			 */
-			{15, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
+			{15, "R6", "var_off=(0x2; 0x7ffffffffffffffc)"},
 		}
 	},
 	{
@@ -513,12 +513,12 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w", "pkt(r=8)"},
-			{8, "R6_w", "var_off=(0x0; 0x3fc)"},
+			{6, "R2", "pkt(r=8)"},
+			{8, "R6", "var_off=(0x0; 0x3fc)"},
 			/* Adding 14 makes R6 be (4n+2) */
-			{9, "R6_w", "var_off=(0x2; 0x7fc)"},
+			{9, "R6", "var_off=(0x2; 0x7fc)"},
 			/* New unknown value in R7 is (4n) */
-			{10, "R7_w", "var_off=(0x0; 0x3fc)"},
+			{10, "R7", "var_off=(0x0; 0x3fc)"},
 			/* Subtracting it from R6 blows our unsigned bounds */
 			{11, "R6", "var_off=(0x2; 0xfffffffffffffffc)"},
 			/* Checked s>= 0 */
@@ -566,16 +566,16 @@ static struct bpf_align_test tests[] = {
 			/* Calculated offset in R6 has unknown value, but known
 			 * alignment of 4.
 			 */
-			{6, "R2_w", "pkt(r=8)"},
-			{9, "R6_w", "var_off=(0x0; 0x3c)"},
+			{6, "R2", "pkt(r=8)"},
+			{9, "R6", "var_off=(0x0; 0x3c)"},
 			/* Adding 14 makes R6 be (4n+2) */
-			{10, "R6_w", "var_off=(0x2; 0x7c)"},
+			{10, "R6", "var_off=(0x2; 0x7c)"},
 			/* Subtracting from packet pointer overflows ubounds */
-			{13, "R5_w", "var_off=(0xffffffffffffff82; 0x7c)"},
+			{13, "R5", "var_off=(0xffffffffffffff82; 0x7c)"},
 			/* New unknown value in R7 is (4n), >= 76 */
-			{14, "R7_w", "var_off=(0x0; 0x7fc)"},
+			{14, "R7", "var_off=(0x0; 0x7fc)"},
 			/* Adding it to packet pointer gives nice bounds again */
-			{16, "R5_w", "var_off=(0x2; 0x7fc)"},
+			{16, "R5", "var_off=(0x2; 0x7fc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
@@ -610,9 +610,11 @@ static int do_test_single(struct bpf_align_test *test)
 		.log_size = sizeof(bpf_vlog),
 		.log_level = 2,
 	);
+	const char *main_pass_start = "0: R1=ctx() R10=fp0";
 	const char *line_ptr;
 	int cur_line = -1;
 	int prog_len, i;
+	char *start;
 	int fd_prog;
 	int ret;
 
@@ -632,7 +634,13 @@ static int do_test_single(struct bpf_align_test *test)
 		ret = 0;
 		/* We make a local copy so that we can strtok() it */
 		strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy));
-		line_ptr = strtok(bpf_vlog_copy, "\n");
+		start = strstr(bpf_vlog_copy, main_pass_start);
+		if (!start) {
+			ret = 1;
+			printf("Can't find initial line '%s'\n", main_pass_start);
+			goto out;
+		}
+		line_ptr = strtok(start, "\n");
 		for (i = 0; i < MAX_MATCHES; i++) {
 			struct bpf_reg_match m = test->matches[i];
 			const char *p;
@@ -682,6 +690,7 @@ static int do_test_single(struct bpf_align_test *test)
 				break;
 			}
 		}
+out:
 		if (fd_prog >= 0)
 			close(fd_prog);
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
index 26e7c06c6cb4..d98577a6babc 100644
--- a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
@@ -162,6 +162,66 @@ static void test_uaf(struct arena_atomics *skel)
 	ASSERT_EQ(skel->arena->uaf_recovery_fails, 0, "uaf_recovery_fails");
 }
 
+static void test_load_acquire(struct arena_atomics *skel)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	int err, prog_fd;
+
+	if (skel->data->skip_lacq_srel_tests) {
+		printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support load-acquire\n",
+		       __func__);
+		test__skip();
+		return;
+	}
+
+	/* No need to attach it, just run it directly */
+	prog_fd = bpf_program__fd(skel->progs.load_acquire);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		return;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+		return;
+
+	ASSERT_EQ(skel->arena->load_acquire8_result, 0x12,
+		  "load_acquire8_result");
+	ASSERT_EQ(skel->arena->load_acquire16_result, 0x1234,
+		  "load_acquire16_result");
+	ASSERT_EQ(skel->arena->load_acquire32_result, 0x12345678,
+		  "load_acquire32_result");
+	ASSERT_EQ(skel->arena->load_acquire64_result, 0x1234567890abcdef,
+		  "load_acquire64_result");
+}
+
+static void test_store_release(struct arena_atomics *skel)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	int err, prog_fd;
+
+	if (skel->data->skip_lacq_srel_tests) {
+		printf("%s:SKIP: ENABLE_ATOMICS_TESTS not defined, Clang doesn't support addr_space_cast, and/or JIT doesn't support store-release\n",
+		       __func__);
+		test__skip();
+		return;
+	}
+
+	/* No need to attach it, just run it directly */
+	prog_fd = bpf_program__fd(skel->progs.store_release);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		return;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+		return;
+
+	ASSERT_EQ(skel->arena->store_release8_result, 0x12,
+		  "store_release8_result");
+	ASSERT_EQ(skel->arena->store_release16_result, 0x1234,
+		  "store_release16_result");
+	ASSERT_EQ(skel->arena->store_release32_result, 0x12345678,
+		  "store_release32_result");
+	ASSERT_EQ(skel->arena->store_release64_result, 0x1234567890abcdef,
+		  "store_release64_result");
+}
+
 void test_arena_atomics(void)
 {
 	struct arena_atomics *skel;
@@ -171,7 +231,7 @@ void test_arena_atomics(void)
 	if (!ASSERT_OK_PTR(skel, "arena atomics skeleton open"))
 		return;
 
-	if (skel->data->skip_tests) {
+	if (skel->data->skip_all_tests) {
 		printf("%s:SKIP:no ENABLE_ATOMICS_TESTS or no addr_space_cast support in clang",
 		       __func__);
 		test__skip();
@@ -198,6 +258,10 @@ void test_arena_atomics(void)
 		test_xchg(skel);
 	if (test__start_subtest("uaf"))
 		test_uaf(skel);
+	if (test__start_subtest("load_acquire"))
+		test_load_acquire(skel);
+	if (test__start_subtest("store_release"))
+		test_store_release(skel);
 
 cleanup:
 	arena_atomics__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
new file mode 100644
index 000000000000..693fd86fbde6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_spin_lock.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <sys/sysinfo.h>
+
+struct __qspinlock { int val; };
+typedef struct __qspinlock arena_spinlock_t;
+
+struct arena_qnode {
+	unsigned long next;
+	int count;
+	int locked;
+};
+
+#include "arena_spin_lock.skel.h"
+
+static long cpu;
+static int repeat;
+
+pthread_barrier_t barrier;
+
+static void *spin_lock_thread(void *arg)
+{
+	int err, prog_fd = *(u32 *)arg;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = repeat,
+	);
+	cpu_set_t cpuset;
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset);
+	ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset), "cpu affinity");
+
+	err = pthread_barrier_wait(&barrier);
+	if (err != PTHREAD_BARRIER_SERIAL_THREAD && err != 0)
+		ASSERT_FALSE(true, "pthread_barrier");
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run err");
+
+	if (topts.retval == -EOPNOTSUPP)
+		goto end;
+
+	ASSERT_EQ((int)topts.retval, 0, "test_run retval");
+
+end:
+	pthread_exit(arg);
+}
+
+static void test_arena_spin_lock_size(int size)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	struct arena_spin_lock *skel;
+	pthread_t thread_id[16];
+	int prog_fd, i, err;
+	int nthreads;
+	void *ret;
+
+	nthreads = MIN(get_nprocs(), ARRAY_SIZE(thread_id));
+	if (nthreads < 2) {
+		test__skip();
+		return;
+	}
+
+	skel = arena_spin_lock__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "arena_spin_lock__open_and_load"))
+		return;
+
+	if (skel->data->test_skip == 2) {
+		test__skip();
+		goto end;
+	}
+	skel->bss->cs_count = size;
+	skel->bss->limit = repeat * nthreads;
+
+	ASSERT_OK(pthread_barrier_init(&barrier, NULL, nthreads), "barrier init");
+
+	prog_fd = bpf_program__fd(skel->progs.prog);
+	for (i = 0; i < nthreads; i++) {
+		err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd);
+		if (!ASSERT_OK(err, "pthread_create"))
+			goto end_barrier;
+	}
+
+	for (i = 0; i < nthreads; i++) {
+		if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join"))
+			goto end_barrier;
+		if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd"))
+			goto end_barrier;
+	}
+
+	if (skel->data->test_skip == 3) {
+		printf("%s:SKIP: CONFIG_NR_CPUS exceed the maximum supported by arena spinlock\n",
+		       __func__);
+		test__skip();
+		goto end_barrier;
+	}
+
+	ASSERT_EQ(skel->bss->counter, repeat * nthreads, "check counter value");
+
+end_barrier:
+	pthread_barrier_destroy(&barrier);
+end:
+	arena_spin_lock__destroy(skel);
+	return;
+}
+
+void test_arena_spin_lock(void)
+{
+	repeat = 1000;
+	if (test__start_subtest("arena_spin_lock_1"))
+		test_arena_spin_lock_size(1);
+	cpu = 0;
+	if (test__start_subtest("arena_spin_lock_1000"))
+		test_arena_spin_lock_size(1000);
+	cpu = 0;
+	repeat = 100;
+	if (test__start_subtest("arena_spin_lock_50000"))
+		test_arena_spin_lock_size(50000);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c b/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c
new file mode 100644
index 000000000000..f81a0c066505
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_strsearch.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "arena_strsearch.skel.h"
+
+static void test_arena_str(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts);
+	struct arena_strsearch *skel;
+	int ret;
+
+	skel = arena_strsearch__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "arena_strsearch__open_and_load"))
+		return;
+
+	ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_strsearch), &opts);
+	ASSERT_OK(ret, "ret_add");
+	ASSERT_OK(opts.retval, "retval");
+	if (skel->bss->skip) {
+		printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
+		test__skip();
+	}
+	arena_strsearch__destroy(skel);
+}
+
+void test_arena_strsearch(void)
+{
+	if (test__start_subtest("arena_strsearch"))
+		test_arena_str();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
index bb143de68875..e27d66b75fb1 100644
--- a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
+++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
@@ -144,11 +144,17 @@ static void test_parse_test_list_file(void)
 	if (!ASSERT_OK(ferror(fp), "prepare tmp"))
 		goto out_fclose;
 
+	if (!ASSERT_OK(fsync(fileno(fp)), "fsync tmp"))
+		goto out_fclose;
+
 	init_test_filter_set(&set);
 
-	ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file");
+	if (!ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file"))
+		goto out_fclose;
+
+	if (!ASSERT_EQ(set.cnt, 4, "test  count"))
+		goto out_free_set;
 
-	ASSERT_EQ(set.cnt, 4, "test  count");
 	ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name");
 	ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count");
 	ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name");
@@ -158,8 +164,8 @@ static void test_parse_test_list_file(void)
 	ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name");
 	ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name");
 
+out_free_set:
 	free_test_filter_set(&set);
-
 out_fclose:
 	fclose(fp);
 out_remove:
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
index 13e101f370a1..92b5f378bfb8 100644
--- a/tools/testing/selftests/bpf/prog_tests/atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -165,11 +165,17 @@ static void test_xchg(struct atomics_lskel *skel)
 void test_atomics(void)
 {
 	struct atomics_lskel *skel;
+	int err;
 
-	skel = atomics_lskel__open_and_load();
-	if (!ASSERT_OK_PTR(skel, "atomics skeleton load"))
+	skel = atomics_lskel__open();
+	if (!ASSERT_OK_PTR(skel, "atomics skeleton open"))
 		return;
 
+	skel->keyring_id = KEY_SPEC_SESSION_KEYRING;
+	err = atomics_lskel__load(skel);
+	if (!ASSERT_OK(err, "atomics skeleton load"))
+		goto cleanup;
+
 	if (skel->data->skip_tests) {
 		printf("%s:SKIP:no ENABLE_ATOMICS_TESTS (missing Clang BPF atomics support)",
 		       __func__);
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 329c7862b52d..9e77e5da7097 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -3,6 +3,7 @@
 #include "test_attach_kprobe_sleepable.skel.h"
 #include "test_attach_probe_manual.skel.h"
 #include "test_attach_probe.skel.h"
+#include "kprobe_write_ctx.skel.h"
 
 /* this is how USDT semaphore is actually defined, except volatile modifier */
 volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
@@ -122,6 +123,110 @@ cleanup:
 	test_attach_probe_manual__destroy(skel);
 }
 
+/* attach uprobe/uretprobe long event name testings */
+static void test_attach_uprobe_long_event_name(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+	struct bpf_link *uprobe_link, *uretprobe_link;
+	struct test_attach_probe_manual *skel;
+	ssize_t uprobe_offset;
+	char path[PATH_MAX] = {0};
+
+	skel = test_attach_probe_manual__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
+		return;
+
+	uprobe_offset = get_uprobe_offset(&trigger_func);
+	if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
+		goto cleanup;
+
+	if (!ASSERT_GT(readlink("/proc/self/exe", path, PATH_MAX - 1), 0, "readlink"))
+		goto cleanup;
+
+	/* manual-attach uprobe/uretprobe */
+	uprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY;
+	uprobe_opts.ref_ctr_offset = 0;
+	uprobe_opts.retprobe = false;
+	uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe,
+						      0 /* self pid */,
+						      path,
+						      uprobe_offset,
+						      &uprobe_opts);
+	if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe_long_event_name"))
+		goto cleanup;
+	skel->links.handle_uprobe = uprobe_link;
+
+	uprobe_opts.retprobe = true;
+	uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe,
+							 -1 /* any pid */,
+							 path,
+							 uprobe_offset, &uprobe_opts);
+	if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe_long_event_name"))
+		goto cleanup;
+	skel->links.handle_uretprobe = uretprobe_link;
+
+cleanup:
+	test_attach_probe_manual__destroy(skel);
+}
+
+/* attach kprobe/kretprobe long event name testings */
+static void test_attach_kprobe_long_event_name(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
+	struct bpf_link *kprobe_link, *kretprobe_link;
+	struct test_attach_probe_manual *skel;
+
+	skel = test_attach_probe_manual__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
+		return;
+
+	/* manual-attach kprobe/kretprobe */
+	kprobe_opts.attach_mode = PROBE_ATTACH_MODE_LEGACY;
+	kprobe_opts.retprobe = false;
+	kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+						      "bpf_testmod_looooooooooooooooooooooooooooooong_name",
+						      &kprobe_opts);
+	if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe_long_event_name"))
+		goto cleanup;
+	skel->links.handle_kprobe = kprobe_link;
+
+	kprobe_opts.retprobe = true;
+	kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+							 "bpf_testmod_looooooooooooooooooooooooooooooong_name",
+							 &kprobe_opts);
+	if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe_long_event_name"))
+		goto cleanup;
+	skel->links.handle_kretprobe = kretprobe_link;
+
+cleanup:
+	test_attach_probe_manual__destroy(skel);
+}
+
+#ifdef __x86_64__
+/* attach kprobe/kretprobe long event name testings */
+static void test_attach_kprobe_write_ctx(void)
+{
+	struct kprobe_write_ctx *skel = NULL;
+	struct bpf_link *link = NULL;
+
+	skel = kprobe_write_ctx__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load"))
+		return;
+
+	link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_write_ctx,
+					     "bpf_fentry_test1", NULL);
+	if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts"))
+		bpf_link__destroy(link);
+
+	kprobe_write_ctx__destroy(skel);
+}
+#else
+static void test_attach_kprobe_write_ctx(void)
+{
+	test__skip();
+}
+#endif
+
 static void test_attach_probe_auto(struct test_attach_probe *skel)
 {
 	struct bpf_link *uprobe_err_link;
@@ -323,6 +428,13 @@ void test_attach_probe(void)
 	if (test__start_subtest("uprobe-ref_ctr"))
 		test_uprobe_ref_ctr(skel);
 
+	if (test__start_subtest("uprobe-long_name"))
+		test_attach_uprobe_long_event_name();
+	if (test__start_subtest("kprobe-long_name"))
+		test_attach_kprobe_long_event_name();
+	if (test__start_subtest("kprobe-write-ctx"))
+		test_attach_kprobe_write_ctx();
+
 cleanup:
 	test_attach_probe__destroy(skel);
 	ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_cleanup");
diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
index cc184e4420f6..42b49870e520 100644
--- a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
@@ -6,10 +6,14 @@
 #include <test_progs.h>
 #include "bloom_filter_map.skel.h"
 
+#ifndef NUMA_NO_NODE
+#define NUMA_NO_NODE	(-1)
+#endif
+
 static void test_fail_cases(void)
 {
 	LIBBPF_OPTS(bpf_map_create_opts, opts);
-	__u32 value;
+	__u32 value = 0;
 	int fd, err;
 
 	/* Invalid key size */
@@ -69,6 +73,7 @@ static void test_success_cases(void)
 
 	/* Create a map */
 	opts.map_flags = BPF_F_ZERO_SEED | BPF_F_NUMA_NODE;
+	opts.numa_node = NUMA_NO_NODE;
 	fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts);
 	if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter success case"))
 		return;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 6befa870434b..75f4dff7d042 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -450,8 +450,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
 	attr.size = sizeof(attr);
 	attr.type = PERF_TYPE_SOFTWARE;
 	attr.config = PERF_COUNT_SW_CPU_CLOCK;
-	attr.freq = 1;
-	attr.sample_freq = 10000;
+	attr.sample_period = 100000;
 	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
 	if (!ASSERT_GE(pfd, 0, "perf_fd"))
 		goto cleanup;
@@ -489,10 +488,28 @@ cleanup:
 	bpf_link__destroy(link);
 }
 
+static int verify_tracing_link_info(int fd, u64 cookie)
+{
+	struct bpf_link_info info;
+	int err;
+	u32 len = sizeof(info);
+
+	err = bpf_link_get_info_by_fd(fd, &info, &len);
+	if (!ASSERT_OK(err, "get_link_info"))
+		return -1;
+
+	if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_TRACING, "link_type"))
+		return -1;
+
+	ASSERT_EQ(info.tracing.cookie, cookie, "tracing_cookie");
+
+	return 0;
+}
+
 static void tracing_subtest(struct test_bpf_cookie *skel)
 {
 	__u64 cookie;
-	int prog_fd;
+	int prog_fd, err;
 	int fentry_fd = -1, fexit_fd = -1, fmod_ret_fd = -1;
 	LIBBPF_OPTS(bpf_test_run_opts, opts);
 	LIBBPF_OPTS(bpf_link_create_opts, link_opts);
@@ -507,6 +524,10 @@ static void tracing_subtest(struct test_bpf_cookie *skel)
 	if (!ASSERT_GE(fentry_fd, 0, "fentry.link_create"))
 		goto cleanup;
 
+	err = verify_tracing_link_info(fentry_fd, cookie);
+	if (!ASSERT_OK(err, "verify_tracing_link_info"))
+		goto cleanup;
+
 	cookie = 0x20000000000000L;
 	prog_fd = bpf_program__fd(skel->progs.fexit_test1);
 	link_opts.tracing.cookie = cookie;
@@ -635,10 +656,29 @@ cleanup:
 	bpf_link__destroy(link);
 }
 
+static int verify_raw_tp_link_info(int fd, u64 cookie)
+{
+	struct bpf_link_info info;
+	int err;
+	u32 len = sizeof(info);
+
+	memset(&info, 0, sizeof(info));
+	err = bpf_link_get_info_by_fd(fd, &info, &len);
+	if (!ASSERT_OK(err, "get_link_info"))
+		return -1;
+
+	if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type"))
+		return -1;
+
+	ASSERT_EQ(info.raw_tracepoint.cookie, cookie, "raw_tp_cookie");
+
+	return 0;
+}
+
 static void raw_tp_subtest(struct test_bpf_cookie *skel)
 {
 	__u64 cookie;
-	int prog_fd, link_fd = -1;
+	int err, prog_fd, link_fd = -1;
 	struct bpf_link *link = NULL;
 	LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts);
 	LIBBPF_OPTS(bpf_raw_tracepoint_opts, opts);
@@ -656,6 +696,11 @@ static void raw_tp_subtest(struct test_bpf_cookie *skel)
 		goto cleanup;
 
 	usleep(1); /* trigger */
+
+	err = verify_raw_tp_link_info(link_fd, cookie);
+	if (!ASSERT_OK(err, "verify_raw_tp_link_info"))
+		goto cleanup;
+
 	close(link_fd); /* detach */
 	link_fd = -1;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
new file mode 100644
index 000000000000..d138cc7b1bda
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_gotox.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+
+#include <sys/syscall.h>
+#include <bpf/bpf.h>
+
+#include "bpf_gotox.skel.h"
+
+static void __test_run(struct bpf_program *prog, void *ctx_in, size_t ctx_size_in)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+			    .ctx_in = ctx_in,
+			    .ctx_size_in = ctx_size_in,
+		   );
+	int err, prog_fd;
+
+	prog_fd = bpf_program__fd(prog);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run_opts err");
+}
+
+static void __subtest(struct bpf_gotox *skel, void (*check)(struct bpf_gotox *))
+{
+	if (skel->data->skip)
+		test__skip();
+	else
+		check(skel);
+}
+
+static void check_simple(struct bpf_gotox *skel,
+			 struct bpf_program *prog,
+			 __u64 ctx_in,
+			 __u64 expected)
+{
+	skel->bss->ret_user = 0;
+
+	__test_run(prog, &ctx_in, sizeof(ctx_in));
+
+	if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user"))
+		return;
+}
+
+static void check_simple_fentry(struct bpf_gotox *skel,
+				struct bpf_program *prog,
+				__u64 ctx_in,
+				__u64 expected)
+{
+	skel->bss->in_user = ctx_in;
+	skel->bss->ret_user = 0;
+
+	/* trigger */
+	usleep(1);
+
+	if (!ASSERT_EQ(skel->bss->ret_user, expected, "skel->bss->ret_user"))
+		return;
+}
+
+/* validate that for two loads of the same jump table libbpf generates only one map */
+static void check_one_map_two_jumps(struct bpf_gotox *skel)
+{
+	struct bpf_prog_info prog_info;
+	struct bpf_map_info map_info;
+	__u32 len;
+	__u32 map_ids[16];
+	int prog_fd, map_fd;
+	int ret;
+	int i;
+	bool seen = false;
+
+	memset(&prog_info, 0, sizeof(prog_info));
+	prog_info.map_ids = (long)map_ids;
+	prog_info.nr_map_ids = ARRAY_SIZE(map_ids);
+	prog_fd = bpf_program__fd(skel->progs.one_map_two_jumps);
+	if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd(one_map_two_jumps)"))
+		return;
+
+	len = sizeof(prog_info);
+	ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &len);
+	if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(prog_fd)"))
+		return;
+
+	for (i = 0; i < prog_info.nr_map_ids; i++) {
+		map_fd  = bpf_map_get_fd_by_id(map_ids[i]);
+		if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id"))
+			return;
+
+		len = sizeof(map_info);
+		memset(&map_info, 0, len);
+		ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &len);
+		if (!ASSERT_OK(ret, "bpf_obj_get_info_by_fd(map_fd)")) {
+			close(map_fd);
+			return;
+		}
+
+		if (map_info.type == BPF_MAP_TYPE_INSN_ARRAY) {
+			if (!ASSERT_EQ(seen, false, "more than one INSN_ARRAY map")) {
+				close(map_fd);
+				return;
+			}
+			seen = true;
+		}
+		close(map_fd);
+	}
+
+	ASSERT_EQ(seen, true, "no INSN_ARRAY map");
+}
+
+static void check_one_switch(struct bpf_gotox *skel)
+{
+	__u64 in[]   = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[]  = {2, 3, 4, 5, 7, 19, 19};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.one_switch, in[i], out[i]);
+}
+
+static void check_one_switch_non_zero_sec_off(struct bpf_gotox *skel)
+{
+	__u64 in[]   = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[]  = {2, 3, 4, 5, 7, 19, 19};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.one_switch_non_zero_sec_off, in[i], out[i]);
+}
+
+static void check_two_switches(struct bpf_gotox *skel)
+{
+	__u64 in[]   = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[] = {103, 104, 107, 205, 115, 1019, 1019};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.two_switches, in[i], out[i]);
+}
+
+static void check_big_jump_table(struct bpf_gotox *skel)
+{
+	__u64 in[]  = {0, 11, 27, 31, 22, 45, 99};
+	__u64 out[] = {2,  3,  4,  5, 19, 19, 19};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.big_jump_table, in[i], out[i]);
+}
+
+static void check_one_jump_two_maps(struct bpf_gotox *skel)
+{
+	__u64 in[]  = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[] = {12, 15, 7 , 15, 12, 15, 15};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.one_jump_two_maps, in[i], out[i]);
+}
+
+static void check_static_global(struct bpf_gotox *skel)
+{
+	__u64 in[]   = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[]  = {2, 3, 4, 5, 7, 19, 19};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.use_static_global1, in[i], out[i]);
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.use_static_global2, in[i], out[i]);
+}
+
+static void check_nonstatic_global(struct bpf_gotox *skel)
+{
+	__u64 in[]   = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[]  = {2, 3, 4, 5, 7, 19, 19};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.use_nonstatic_global1, in[i], out[i]);
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple(skel, skel->progs.use_nonstatic_global2, in[i], out[i]);
+}
+
+static void check_other_sec(struct bpf_gotox *skel)
+{
+	struct bpf_link *link;
+	__u64 in[]   = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[]  = {2, 3, 4, 5, 7, 19, 19};
+	int i;
+
+	link = bpf_program__attach(skel->progs.simple_test_other_sec);
+	if (!ASSERT_OK_PTR(link, "link"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple_fentry(skel, skel->progs.simple_test_other_sec, in[i], out[i]);
+
+	bpf_link__destroy(link);
+}
+
+static void check_static_global_other_sec(struct bpf_gotox *skel)
+{
+	struct bpf_link *link;
+	__u64 in[]   = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[]  = {2, 3, 4, 5, 7, 19, 19};
+	int i;
+
+	link = bpf_program__attach(skel->progs.use_static_global_other_sec);
+	if (!ASSERT_OK_PTR(link, "link"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple_fentry(skel, skel->progs.use_static_global_other_sec, in[i], out[i]);
+
+	bpf_link__destroy(link);
+}
+
+static void check_nonstatic_global_other_sec(struct bpf_gotox *skel)
+{
+	struct bpf_link *link;
+	__u64 in[]   = {0, 1, 2, 3, 4,  5, 77};
+	__u64 out[]  = {2, 3, 4, 5, 7, 19, 19};
+	int i;
+
+	link = bpf_program__attach(skel->progs.use_nonstatic_global_other_sec);
+	if (!ASSERT_OK_PTR(link, "link"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(in); i++)
+		check_simple_fentry(skel, skel->progs.use_nonstatic_global_other_sec, in[i], out[i]);
+
+	bpf_link__destroy(link);
+}
+
+void test_bpf_gotox(void)
+{
+	struct bpf_gotox *skel;
+	int ret;
+
+	skel = bpf_gotox__open();
+	if (!ASSERT_NEQ(skel, NULL, "bpf_gotox__open"))
+		return;
+
+	ret = bpf_gotox__load(skel);
+	if (!ASSERT_OK(ret, "bpf_gotox__load"))
+		return;
+
+	skel->bss->pid = getpid();
+
+	if (test__start_subtest("one-switch"))
+		__subtest(skel, check_one_switch);
+
+	if (test__start_subtest("one-switch-non-zero-sec-offset"))
+		__subtest(skel, check_one_switch_non_zero_sec_off);
+
+	if (test__start_subtest("two-switches"))
+		__subtest(skel, check_two_switches);
+
+	if (test__start_subtest("big-jump-table"))
+		__subtest(skel, check_big_jump_table);
+
+	if (test__start_subtest("static-global"))
+		__subtest(skel, check_static_global);
+
+	if (test__start_subtest("nonstatic-global"))
+		__subtest(skel, check_nonstatic_global);
+
+	if (test__start_subtest("other-sec"))
+		__subtest(skel, check_other_sec);
+
+	if (test__start_subtest("static-global-other-sec"))
+		__subtest(skel, check_static_global_other_sec);
+
+	if (test__start_subtest("nonstatic-global-other-sec"))
+		__subtest(skel, check_nonstatic_global_other_sec);
+
+	if (test__start_subtest("one-jump-two-maps"))
+		__subtest(skel, check_one_jump_two_maps);
+
+	if (test__start_subtest("one-map-two-jumps"))
+		__subtest(skel, check_one_map_two_jumps);
+
+	bpf_gotox__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c
new file mode 100644
index 000000000000..269870bec941
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_insn_array.c
@@ -0,0 +1,504 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <bpf/bpf.h>
+#include <test_progs.h>
+
+#ifdef __x86_64__
+static int map_create(__u32 map_type, __u32 max_entries)
+{
+	const char *map_name = "insn_array";
+	__u32 key_size = 4;
+	__u32 value_size = sizeof(struct bpf_insn_array_value);
+
+	return bpf_map_create(map_type, map_name, key_size, value_size, max_entries, NULL);
+}
+
+static int prog_load(struct bpf_insn *insns, __u32 insn_cnt, int *fd_array, __u32 fd_array_cnt)
+{
+	LIBBPF_OPTS(bpf_prog_load_opts, opts);
+
+	opts.fd_array = fd_array;
+	opts.fd_array_cnt = fd_array_cnt;
+
+	return bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, &opts);
+}
+
+static void __check_success(struct bpf_insn *insns, __u32 insn_cnt, __u32 *map_in, __u32 *map_out)
+{
+	struct bpf_insn_array_value val = {};
+	int prog_fd = -1, map_fd, i;
+
+	map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, insn_cnt);
+	if (!ASSERT_GE(map_fd, 0, "map_create"))
+		return;
+
+	for (i = 0; i < insn_cnt; i++) {
+		val.orig_off = map_in[i];
+		if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+			goto cleanup;
+	}
+
+	if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+		goto cleanup;
+
+	prog_fd = prog_load(insns, insn_cnt, &map_fd, 1);
+	if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+		goto cleanup;
+
+	for (i = 0; i < insn_cnt; i++) {
+		char buf[64];
+
+		if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+			goto cleanup;
+
+		snprintf(buf, sizeof(buf), "val.xlated_off should be equal map_out[%d]", i);
+		ASSERT_EQ(val.xlated_off, map_out[i], buf);
+	}
+
+cleanup:
+	close(prog_fd);
+	close(map_fd);
+}
+
+/*
+ * Load a program, which will not be anyhow mangled by the verifier.  Add an
+ * insn_array map pointing to every instruction. Check that it hasn't changed
+ * after the program load.
+ */
+static void check_one_to_one_mapping(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 4),
+		BPF_MOV64_IMM(BPF_REG_0, 3),
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	__u32 map_in[] = {0, 1, 2, 3, 4, 5};
+	__u32 map_out[] = {0, 1, 2, 3, 4, 5};
+
+	__check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Load a program with two patches (get jiffies, for simplicity). Add an
+ * insn_array map pointing to every instruction. Check how it was changed
+ * after the program load.
+ */
+static void check_simple(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	__u32 map_in[] = {0, 1, 2, 3, 4, 5};
+	__u32 map_out[] = {0, 1, 4, 5, 8, 9};
+
+	__check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Verifier can delete code in two cases: nops & dead code. From insn
+ * array's point of view, the two cases are the same, so test using
+ * the simplest method: by loading some nops
+ */
+static void check_deletions(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	__u32 map_in[] = {0, 1, 2, 3, 4, 5};
+	__u32 map_out[] = {0, -1, 1, -1, 2, 3};
+
+	__check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Same test as check_deletions, but also add code which adds instructions
+ */
+static void check_deletions_with_functions(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+		BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+		BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+		BPF_JMP_IMM(BPF_JA, 0, 0, 0), /* nop */
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		BPF_EXIT_INSN(),
+	};
+	__u32 map_in[] =  { 0, 1,  2, 3, 4, 5, /* func */  6, 7,  8, 9, 10};
+	__u32 map_out[] = {-1, 0, -1, 3, 4, 5, /* func */ -1, 6, -1, 9, 10};
+
+	__check_success(insns, ARRAY_SIZE(insns), map_in, map_out);
+}
+
+/*
+ * Try to load a program with a map which points to outside of the program
+ */
+static void check_out_of_bounds_index(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 4),
+		BPF_MOV64_IMM(BPF_REG_0, 3),
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int prog_fd, map_fd;
+	struct bpf_insn_array_value val = {};
+	int key;
+
+	map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
+	if (!ASSERT_GE(map_fd, 0, "map_create"))
+		return;
+
+	key = 0;
+	val.orig_off = ARRAY_SIZE(insns); /* too big */
+	if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem"))
+		goto cleanup;
+
+	if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+		goto cleanup;
+
+	prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+	if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) {
+		close(prog_fd);
+		goto cleanup;
+	}
+
+cleanup:
+	close(map_fd);
+}
+
+/*
+ * Try to load a program with a map which points to the middle of 16-bit insn
+ */
+static void check_mid_insn_index(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_LD_IMM64(BPF_REG_0, 0), /* 2 x 8 */
+		BPF_EXIT_INSN(),
+	};
+	int prog_fd, map_fd;
+	struct bpf_insn_array_value val = {};
+	int key;
+
+	map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
+	if (!ASSERT_GE(map_fd, 0, "map_create"))
+		return;
+
+	key = 0;
+	val.orig_off = 1; /* middle of 16-byte instruction */
+	if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &key, &val, 0), 0, "bpf_map_update_elem"))
+		goto cleanup;
+
+	if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+		goto cleanup;
+
+	prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+	if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)")) {
+		close(prog_fd);
+		goto cleanup;
+	}
+
+cleanup:
+	close(map_fd);
+}
+
+static void check_incorrect_index(void)
+{
+	check_out_of_bounds_index();
+	check_mid_insn_index();
+}
+
+static int set_bpf_jit_harden(char *level)
+{
+	char old_level;
+	int err = -1;
+	int fd = -1;
+
+	fd = open("/proc/sys/net/core/bpf_jit_harden", O_RDWR | O_NONBLOCK);
+	if (fd < 0) {
+		ASSERT_FAIL("open .../bpf_jit_harden returned %d (errno=%d)", fd, errno);
+		return -1;
+	}
+
+	err = read(fd, &old_level, 1);
+	if (err != 1) {
+		ASSERT_FAIL("read from .../bpf_jit_harden returned %d (errno=%d)", err, errno);
+		err = -1;
+		goto end;
+	}
+
+	lseek(fd, 0, SEEK_SET);
+
+	err = write(fd, level, 1);
+	if (err != 1) {
+		ASSERT_FAIL("write to .../bpf_jit_harden returned %d (errno=%d)", err, errno);
+		err = -1;
+		goto end;
+	}
+
+	err = 0;
+	*level = old_level;
+end:
+	if (fd >= 0)
+		close(fd);
+	return err;
+}
+
+static void check_blindness(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 4),
+		BPF_MOV64_IMM(BPF_REG_0, 3),
+		BPF_MOV64_IMM(BPF_REG_0, 2),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+	};
+	int prog_fd = -1, map_fd;
+	struct bpf_insn_array_value val = {};
+	char bpf_jit_harden = '@'; /* non-exizsting value */
+	int i;
+
+	map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
+	if (!ASSERT_GE(map_fd, 0, "map_create"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(insns); i++) {
+		val.orig_off = i;
+		if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+			goto cleanup;
+	}
+
+	if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+		goto cleanup;
+
+	bpf_jit_harden = '2';
+	if (set_bpf_jit_harden(&bpf_jit_harden)) {
+		bpf_jit_harden = '@'; /* open, read or write failed => no write was done */
+		goto cleanup;
+	}
+
+	prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+	if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(insns); i++) {
+		char fmt[32];
+
+		if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+			goto cleanup;
+
+		snprintf(fmt, sizeof(fmt), "val should be equal 3*%d", i);
+		ASSERT_EQ(val.xlated_off, i * 3, fmt);
+	}
+
+cleanup:
+	/* restore the old one */
+	if (bpf_jit_harden != '@')
+		set_bpf_jit_harden(&bpf_jit_harden);
+
+	close(prog_fd);
+	close(map_fd);
+}
+
+/* Once map was initialized, it should be frozen */
+static void check_load_unfrozen_map(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int prog_fd = -1, map_fd;
+	struct bpf_insn_array_value val = {};
+	int i;
+
+	map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
+	if (!ASSERT_GE(map_fd, 0, "map_create"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(insns); i++) {
+		val.orig_off = i;
+		if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+			goto cleanup;
+	}
+
+	prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+	if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)"))
+		goto cleanup;
+
+	/* correctness: now freeze the map, the program should load fine */
+
+	if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+		goto cleanup;
+
+	prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+	if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(insns); i++) {
+		if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+			goto cleanup;
+
+		ASSERT_EQ(val.xlated_off, i, "val should be equal i");
+	}
+
+cleanup:
+	close(prog_fd);
+	close(map_fd);
+}
+
+/* Map can be used only by one BPF program */
+static void check_no_map_reuse(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int prog_fd = -1, map_fd, extra_fd = -1;
+	struct bpf_insn_array_value val = {};
+	int i;
+
+	map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, ARRAY_SIZE(insns));
+	if (!ASSERT_GE(map_fd, 0, "map_create"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(insns); i++) {
+		val.orig_off = i;
+		if (!ASSERT_EQ(bpf_map_update_elem(map_fd, &i, &val, 0), 0, "bpf_map_update_elem"))
+			goto cleanup;
+	}
+
+	if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+		goto cleanup;
+
+	prog_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+	if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+		goto cleanup;
+
+	for (i = 0; i < ARRAY_SIZE(insns); i++) {
+		if (!ASSERT_EQ(bpf_map_lookup_elem(map_fd, &i, &val), 0, "bpf_map_lookup_elem"))
+			goto cleanup;
+
+		ASSERT_EQ(val.xlated_off, i, "val should be equal i");
+	}
+
+	extra_fd = prog_load(insns, ARRAY_SIZE(insns), &map_fd, 1);
+	if (!ASSERT_EQ(extra_fd, -EBUSY, "program should have been rejected (extra_fd != -EBUSY)"))
+		goto cleanup;
+
+	/* correctness: check that prog is still loadable without fd_array */
+	extra_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
+	if (!ASSERT_GE(extra_fd, 0, "bpf(BPF_PROG_LOAD): expected no error"))
+		goto cleanup;
+
+cleanup:
+	close(extra_fd);
+	close(prog_fd);
+	close(map_fd);
+}
+
+static void check_bpf_no_lookup(void)
+{
+	struct bpf_insn insns[] = {
+		BPF_LD_MAP_FD(BPF_REG_1, 0),
+		BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_EXIT_INSN(),
+	};
+	int prog_fd = -1, map_fd;
+
+	map_fd = map_create(BPF_MAP_TYPE_INSN_ARRAY, 1);
+	if (!ASSERT_GE(map_fd, 0, "map_create"))
+		return;
+
+	insns[0].imm = map_fd;
+
+	if (!ASSERT_EQ(bpf_map_freeze(map_fd), 0, "bpf_map_freeze"))
+		goto cleanup;
+
+	prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
+	if (!ASSERT_EQ(prog_fd, -EINVAL, "program should have been rejected (prog_fd != -EINVAL)"))
+		goto cleanup;
+
+	/* correctness: check that prog is still loadable with normal map */
+	close(map_fd);
+	map_fd = map_create(BPF_MAP_TYPE_ARRAY, 1);
+	insns[0].imm = map_fd;
+	prog_fd = prog_load(insns, ARRAY_SIZE(insns), NULL, 0);
+	if (!ASSERT_GE(prog_fd, 0, "bpf(BPF_PROG_LOAD)"))
+		goto cleanup;
+
+cleanup:
+	close(prog_fd);
+	close(map_fd);
+}
+
+static void check_bpf_side(void)
+{
+	check_bpf_no_lookup();
+}
+
+static void __test_bpf_insn_array(void)
+{
+	/* Test if offsets are adjusted properly */
+
+	if (test__start_subtest("one2one"))
+		check_one_to_one_mapping();
+
+	if (test__start_subtest("simple"))
+		check_simple();
+
+	if (test__start_subtest("deletions"))
+		check_deletions();
+
+	if (test__start_subtest("deletions-with-functions"))
+		check_deletions_with_functions();
+
+	if (test__start_subtest("blindness"))
+		check_blindness();
+
+	/* Check all kinds of operations and related restrictions */
+
+	if (test__start_subtest("incorrect-index"))
+		check_incorrect_index();
+
+	if (test__start_subtest("load-unfrozen-map"))
+		check_load_unfrozen_map();
+
+	if (test__start_subtest("no-map-reuse"))
+		check_no_map_reuse();
+
+	if (test__start_subtest("bpf-side-ops"))
+		check_bpf_side();
+}
+#else
+static void __test_bpf_insn_array(void)
+{
+	test__skip();
+}
+#endif
+
+void test_bpf_insn_array(void)
+{
+	__test_bpf_insn_array();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 6f1bfacd7375..5225d69bf79b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -323,19 +323,87 @@ static void test_task_pidfd(void)
 static void test_task_sleepable(void)
 {
 	struct bpf_iter_tasks *skel;
+	int pid, status, err, data_pipe[2], finish_pipe[2], c = 0;
+	char *test_data = NULL;
+	char *test_data_long = NULL;
+	char *data[2];
+
+	if (!ASSERT_OK(pipe(data_pipe), "data_pipe") ||
+	    !ASSERT_OK(pipe(finish_pipe), "finish_pipe"))
+		return;
 
 	skel = bpf_iter_tasks__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load"))
 		return;
 
+	pid = fork();
+	if (!ASSERT_GE(pid, 0, "fork"))
+		return;
+
+	if (pid == 0) {
+		/* child */
+		close(data_pipe[0]);
+		close(finish_pipe[1]);
+
+		test_data = malloc(sizeof(char) * 10);
+		strncpy(test_data, "test_data", 10);
+		test_data[9] = '\0';
+
+		test_data_long = malloc(sizeof(char) * 5000);
+		for (int i = 0; i < 5000; ++i) {
+			if (i % 2 == 0)
+				test_data_long[i] = 'b';
+			else
+				test_data_long[i] = 'a';
+		}
+		test_data_long[4999] = '\0';
+
+		data[0] = test_data;
+		data[1] = test_data_long;
+
+		write(data_pipe[1], &data, sizeof(data));
+
+		/* keep child alive until after the test */
+		err = read(finish_pipe[0], &c, 1);
+		if (err != 1)
+			exit(-1);
+
+		close(data_pipe[1]);
+		close(finish_pipe[0]);
+		_exit(0);
+	}
+
+	/* parent */
+	close(data_pipe[1]);
+	close(finish_pipe[0]);
+
+	err = read(data_pipe[0], &data, sizeof(data));
+	ASSERT_EQ(err, sizeof(data), "read_check");
+
+	skel->bss->user_ptr = data[0];
+	skel->bss->user_ptr_long = data[1];
+	skel->bss->pid = pid;
+
 	do_dummy_read(skel->progs.dump_task_sleepable);
 
 	ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0,
 		  "num_expected_failure_copy_from_user_task");
 	ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
 		  "num_success_copy_from_user_task");
+	ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task_str, 0,
+		  "num_expected_failure_copy_from_user_task_str");
+	ASSERT_GT(skel->bss->num_success_copy_from_user_task_str, 0,
+		  "num_success_copy_from_user_task_str");
 
 	bpf_iter_tasks__destroy(skel);
+
+	write(finish_pipe[1], &c, 1);
+	err = waitpid(pid, &status, 0);
+	ASSERT_EQ(err, pid, "waitpid");
+	ASSERT_EQ(status, 0, "zero_child_exit");
+
+	close(data_pipe[0]);
+	close(finish_pipe[1]);
 }
 
 static void test_task_stack(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
index fe2c502e5089..ecc3d47919ad 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
@@ -78,7 +78,7 @@ static int test_setup_uffd(void *fault_addr)
 	}
 
 	uffd_register.range.start = (unsigned long)fault_addr;
-	uffd_register.range.len = 4096;
+	uffd_register.range.len = getpagesize();
 	uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
 	if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) {
 		close(uffd);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index a4a1f93878d4..dd6512fa652b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -63,6 +63,12 @@ static void test_bpf_nf_ct(int mode)
 		.repeat = 1,
 	);
 
+	if (SYS_NOFAIL("iptables-legacy --version")) {
+		fprintf(stdout, "Missing required iptables-legacy tool\n");
+		test__skip();
+		return;
+	}
+
 	skel = test_bpf_nf__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
 		return;
@@ -72,11 +78,14 @@ static void test_bpf_nf_ct(int mode)
 	if (!ASSERT_OK(system(cmd), cmd))
 		goto end;
 
-	srv_port = (mode == TEST_XDP) ? 5005 : 5006;
-	srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", srv_port, TIMEOUT_MS);
+	srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", 0, TIMEOUT_MS);
 	if (!ASSERT_GE(srv_fd, 0, "start_server"))
 		goto end;
 
+	srv_port = get_socket_local_port(srv_fd);
+	if (!ASSERT_GE(srv_port, 0, "get_sock_local_port"))
+		goto end;
+
 	client_fd = connect_to_server(srv_fd);
 	if (!ASSERT_GE(client_fd, 0, "connect_to_server"))
 		goto end;
@@ -91,7 +100,7 @@ static void test_bpf_nf_ct(int mode)
 	skel->bss->saddr = peer_addr.sin_addr.s_addr;
 	skel->bss->sport = peer_addr.sin_port;
 	skel->bss->daddr = peer_addr.sin_addr.s_addr;
-	skel->bss->dport = htons(srv_port);
+	skel->bss->dport = srv_port;
 
 	if (mode == TEST_XDP)
 		prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
new file mode 100644
index 000000000000..730357cd0c9a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pkt_sched.h>
+#include <linux/rtnetlink.h>
+#include <test_progs.h>
+
+#include "network_helpers.h"
+#include "bpf_qdisc_fifo.skel.h"
+#include "bpf_qdisc_fq.skel.h"
+#include "bpf_qdisc_fail__incompl_ops.skel.h"
+
+#define LO_IFINDEX 1
+
+static const unsigned int total_bytes = 10 * 1024 * 1024;
+
+static void do_test(char *qdisc)
+{
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+			    .attach_point = BPF_TC_QDISC,
+			    .parent = TC_H_ROOT,
+			    .handle = 0x8000000,
+			    .qdisc = qdisc);
+	int srv_fd = -1, cli_fd = -1;
+	int err;
+
+	err = bpf_tc_hook_create(&hook);
+	if (!ASSERT_OK(err, "attach qdisc"))
+		return;
+
+	srv_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+	if (!ASSERT_OK_FD(srv_fd, "start server"))
+		goto done;
+
+	cli_fd = connect_to_fd(srv_fd, 0);
+	if (!ASSERT_OK_FD(cli_fd, "connect to client"))
+		goto done;
+
+	err = send_recv_data(srv_fd, cli_fd, total_bytes);
+	ASSERT_OK(err, "send_recv_data");
+
+done:
+	if (srv_fd != -1)
+		close(srv_fd);
+	if (cli_fd != -1)
+		close(cli_fd);
+
+	bpf_tc_hook_destroy(&hook);
+}
+
+static void test_fifo(void)
+{
+	struct bpf_qdisc_fifo *fifo_skel;
+
+	fifo_skel = bpf_qdisc_fifo__open_and_load();
+	if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+		return;
+
+	if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+		goto out;
+
+	do_test("bpf_fifo");
+out:
+	bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_fq(void)
+{
+	struct bpf_qdisc_fq *fq_skel;
+
+	fq_skel = bpf_qdisc_fq__open_and_load();
+	if (!ASSERT_OK_PTR(fq_skel, "bpf_qdisc_fq__open_and_load"))
+		return;
+
+	if (!ASSERT_OK(bpf_qdisc_fq__attach(fq_skel), "bpf_qdisc_fq__attach"))
+		goto out;
+
+	do_test("bpf_fq");
+out:
+	bpf_qdisc_fq__destroy(fq_skel);
+}
+
+static void test_qdisc_attach_to_mq(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook,
+			    .attach_point = BPF_TC_QDISC,
+			    .parent = TC_H_MAKE(1 << 16, 1),
+			    .handle = 0x11 << 16,
+			    .qdisc = "bpf_fifo");
+	struct bpf_qdisc_fifo *fifo_skel;
+	int err;
+
+	fifo_skel = bpf_qdisc_fifo__open_and_load();
+	if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+		return;
+
+	if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+		goto out;
+
+	SYS(out, "ip link add veth0 type veth peer veth1");
+	hook.ifindex = if_nametoindex("veth0");
+	SYS(out, "tc qdisc add dev veth0 root handle 1: mq");
+
+	err = bpf_tc_hook_create(&hook);
+	ASSERT_OK(err, "attach qdisc");
+
+	bpf_tc_hook_destroy(&hook);
+
+	SYS(out, "tc qdisc delete dev veth0 root mq");
+out:
+	bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_qdisc_attach_to_non_root(void)
+{
+	DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+			    .attach_point = BPF_TC_QDISC,
+			    .parent = TC_H_MAKE(1 << 16, 1),
+			    .handle = 0x11 << 16,
+			    .qdisc = "bpf_fifo");
+	struct bpf_qdisc_fifo *fifo_skel;
+	int err;
+
+	fifo_skel = bpf_qdisc_fifo__open_and_load();
+	if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+		return;
+
+	if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+		goto out;
+
+	SYS(out, "tc qdisc add dev lo root handle 1: htb");
+	SYS(out_del_htb, "tc class add dev lo parent 1: classid 1:1 htb rate 75Kbit");
+
+	err = bpf_tc_hook_create(&hook);
+	if (!ASSERT_ERR(err, "attach qdisc"))
+		bpf_tc_hook_destroy(&hook);
+
+out_del_htb:
+	SYS(out, "tc qdisc delete dev lo root htb");
+out:
+	bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_incompl_ops(void)
+{
+	struct bpf_qdisc_fail__incompl_ops *skel;
+	struct bpf_link *link;
+
+	skel = bpf_qdisc_fail__incompl_ops__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "bpf_qdisc_fifo__open_and_load"))
+		return;
+
+	link = bpf_map__attach_struct_ops(skel->maps.test);
+	if (!ASSERT_ERR_PTR(link, "bpf_map__attach_struct_ops"))
+		bpf_link__destroy(link);
+
+	bpf_qdisc_fail__incompl_ops__destroy(skel);
+}
+
+static int get_default_qdisc(char *qdisc_name)
+{
+	FILE *f;
+	int num;
+
+	f = fopen("/proc/sys/net/core/default_qdisc", "r");
+	if (!f)
+		return -errno;
+
+	num = fscanf(f, "%s", qdisc_name);
+	fclose(f);
+
+	return num == 1 ? 0 : -EFAULT;
+}
+
+static void test_default_qdisc_attach_to_mq(void)
+{
+	char default_qdisc[IFNAMSIZ] = {};
+	struct bpf_qdisc_fifo *fifo_skel;
+	struct netns_obj *netns = NULL;
+	int err;
+
+	fifo_skel = bpf_qdisc_fifo__open_and_load();
+	if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+		return;
+
+	if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+		goto out;
+
+	err = get_default_qdisc(default_qdisc);
+	if (!ASSERT_OK(err, "read sysctl net.core.default_qdisc"))
+		goto out;
+
+	err = write_sysctl("/proc/sys/net/core/default_qdisc", "bpf_fifo");
+	if (!ASSERT_OK(err, "write sysctl net.core.default_qdisc"))
+		goto out;
+
+	netns = netns_new("bpf_qdisc_ns", true);
+	if (!ASSERT_OK_PTR(netns, "netns_new"))
+		goto out;
+
+	SYS(out, "ip link add veth0 type veth peer veth1");
+	SYS(out, "tc qdisc add dev veth0 root handle 1: mq");
+
+	ASSERT_EQ(fifo_skel->bss->init_called, true, "init_called");
+
+	SYS(out, "tc qdisc delete dev veth0 root mq");
+out:
+	netns_free(netns);
+	if (default_qdisc[0])
+		write_sysctl("/proc/sys/net/core/default_qdisc", default_qdisc);
+
+	bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+void test_ns_bpf_qdisc(void)
+{
+	if (test__start_subtest("fifo"))
+		test_fifo();
+	if (test__start_subtest("fq"))
+		test_fq();
+	if (test__start_subtest("attach to mq"))
+		test_qdisc_attach_to_mq();
+	if (test__start_subtest("attach to non root"))
+		test_qdisc_attach_to_non_root();
+	if (test__start_subtest("incompl_ops"))
+		test_incompl_ops();
+}
+
+void serial_test_bpf_qdisc_default(void)
+{
+	test_default_qdisc_attach_to_mq();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index e63d74ce046f..054ecb6b1e9f 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -3866,11 +3866,11 @@ static struct btf_raw_test raw_tests[] = {
 	.err_str = "vlen != 0",
 },
 {
-	.descr = "decl_tag test #8, invalid kflag",
+	.descr = "decl_tag test #8, tag with kflag",
 	.raw_types = {
 		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
 		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [2] */
-		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), 2), (-1),
+		BTF_DECL_ATTR_ENC(NAME_TBD, 2, -1),
 		BTF_END_RAW,
 	},
 	BTF_STR_SEC("\0local\0tag1"),
@@ -3881,8 +3881,6 @@ static struct btf_raw_test raw_tests[] = {
 	.key_type_id = 1,
 	.value_type_id = 1,
 	.max_entries = 1,
-	.btf_load_err = true,
-	.err_str = "Invalid btf_info kind_flag",
 },
 {
 	.descr = "decl_tag test #9, var, invalid component_idx",
@@ -4207,6 +4205,23 @@ static struct btf_raw_test raw_tests[] = {
 	.err_str = "Type tags don't precede modifiers",
 },
 {
+	.descr = "type_tag test #7, tag with kflag",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPE_ATTR_ENC(NAME_TBD, 1),			/* [2] */
+		BTF_PTR_ENC(2),					/* [3] */
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+},
+{
 	.descr = "enum64 test #1, unsigned, size 8",
 	.raw_types = {
 		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),			/* [1] */
@@ -7481,6 +7496,71 @@ static struct btf_dedup_test dedup_tests[] = {
 	},
 },
 {
+	.descr = "dedup: recursive typedef",
+	/*
+	 * This test simulates a recursive typedef, which in GO is defined as such:
+	 *
+	 *   type Foo func() Foo
+	 *
+	 * In BTF terms, this is represented as a TYPEDEF referencing
+	 * a FUNC_PROTO that returns the same TYPEDEF.
+	 */
+	.input = {
+		.raw_types = {
+			/*
+			 * [1] typedef Foo -> func() Foo
+			 * [2] func_proto() -> Foo
+			 * [3] typedef Foo -> func() Foo
+			 * [4] func_proto() -> Foo
+			 */
+			BTF_TYPEDEF_ENC(NAME_NTH(1), 2),	/* [1] */
+			BTF_FUNC_PROTO_ENC(1, 0),		/* [2] */
+			BTF_TYPEDEF_ENC(NAME_NTH(1), 4),	/* [3] */
+			BTF_FUNC_PROTO_ENC(3, 0),		/* [4] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0Foo"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_TYPEDEF_ENC(NAME_NTH(1), 2),	/* [1] */
+			BTF_FUNC_PROTO_ENC(1, 0),		/* [2] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0Foo"),
+	},
+},
+{
+	.descr = "dedup: typedef",
+    /*
+     * // CU 1:
+     * typedef int foo;
+     *
+     * // CU 2:
+     * typedef int foo;
+     */
+	.input = {
+		.raw_types = {
+			/* CU 1 */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			BTF_TYPEDEF_ENC(NAME_NTH(1), 1),		/* [2] */
+			/* CU 2 */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [3] */
+			BTF_TYPEDEF_ENC(NAME_NTH(1), 3),		/* [4] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0foo"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			BTF_TYPEDEF_ENC(NAME_NTH(1), 1),		/* [2] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0foo"),
+	},
+},
+{
 	.descr = "dedup: typedef tags",
 	.input = {
 		.raw_types = {
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
index d9024c7a892a..5bc15bb6b7ce 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
@@ -440,6 +440,105 @@ cleanup:
 	btf__free(btf1);
 }
 
+/* Ensure module split BTF dedup worked correctly; when dedup fails badly
+ * core kernel types are in split BTF also, so ensure that references to
+ * such types point at base - not split - BTF.
+ *
+ * bpf_testmod_test_write() has multiple core kernel type parameters;
+ *
+ * ssize_t
+ * bpf_testmod_test_write(struct file *file, struct kobject *kobj,
+ *                        struct bin_attribute *bin_attr,
+ *                        char *buf, loff_t off, size_t len);
+ *
+ * Ensure each of the FUNC_PROTO params is a core kernel type.
+ *
+ * Do the same for
+ *
+ * __bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk);
+ *
+ * ...and
+ *
+ * __bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb);
+ *
+ */
+const char *mod_funcs[] = {
+	"bpf_testmod_test_write",
+	"bpf_kfunc_call_test3",
+	"bpf_kfunc_call_test_pass_ctx"
+};
+
+static void test_split_module(void)
+{
+	struct btf *vmlinux_btf, *btf1 = NULL;
+	int i, nr_base_types;
+
+	vmlinux_btf = btf__load_vmlinux_btf();
+	if (!ASSERT_OK_PTR(vmlinux_btf, "vmlinux_btf"))
+		return;
+	nr_base_types = btf__type_cnt(vmlinux_btf);
+	if (!ASSERT_GT(nr_base_types, 0, "nr_base_types"))
+		goto cleanup;
+
+	btf1 = btf__parse_split("/sys/kernel/btf/bpf_testmod", vmlinux_btf);
+	if (!ASSERT_OK_PTR(btf1, "split_btf"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(mod_funcs); i++) {
+		const struct btf_param *p;
+		const struct btf_type *t;
+		__u16 vlen;
+		__u32 id;
+		int j;
+
+		id = btf__find_by_name_kind(btf1, mod_funcs[i], BTF_KIND_FUNC);
+		if (!ASSERT_GE(id, nr_base_types, "func_id"))
+			goto cleanup;
+		t = btf__type_by_id(btf1, id);
+		if (!ASSERT_OK_PTR(t, "func_id_type"))
+			goto cleanup;
+		t = btf__type_by_id(btf1, t->type);
+		if (!ASSERT_OK_PTR(t, "func_proto_id_type"))
+			goto cleanup;
+		if (!ASSERT_EQ(btf_is_func_proto(t), true, "is_func_proto"))
+			goto cleanup;
+		vlen = btf_vlen(t);
+
+		for (j = 0, p = btf_params(t); j < vlen; j++, p++) {
+			/* bpf_testmod uses resilient split BTF, so any
+			 * reference types will be added to split BTF and their
+			 * associated targets will be base BTF types; for example
+			 * for a "struct sock *" the PTR will be in split BTF
+			 * while the "struct sock" will be in base.
+			 *
+			 * In some cases like loff_t we have to resolve
+			 * multiple typedefs hence the while() loop below.
+			 *
+			 * Note that resilient split BTF generation depends
+			 * on pahole version, so we do not assert that
+			 * reference types are in split BTF, as if pahole
+			 * does not support resilient split BTF they will
+			 * also be base BTF types.
+			 */
+			id = p->type;
+			do {
+				t = btf__type_by_id(btf1, id);
+				if (!ASSERT_OK_PTR(t, "param_ref_type"))
+					goto cleanup;
+				if (!btf_is_mod(t) && !btf_is_ptr(t) && !btf_is_typedef(t))
+					break;
+				id = t->type;
+			} while (true);
+
+			if (!ASSERT_LT(id, nr_base_types, "verify_base_type"))
+				goto cleanup;
+		}
+	}
+cleanup:
+	btf__free(btf1);
+	btf__free(vmlinux_btf);
+}
+
 void test_btf_dedup_split()
 {
 	if (test__start_subtest("split_simple"))
@@ -450,4 +549,6 @@ void test_btf_dedup_split()
 		test_split_fwd_resolve();
 	if (test__start_subtest("split_dup_struct_in_cu"))
 		test_split_dup_struct_in_cu();
+	if (test__start_subtest("split_module"))
+		test_split_module();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_distill.c b/tools/testing/selftests/bpf/prog_tests/btf_distill.c
index ca84726d5ac1..fb67ae195a73 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_distill.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_distill.c
@@ -385,7 +385,7 @@ static void test_distilled_base_missing_err(void)
 		"[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED");
 	btf5 = btf__new_empty();
 	if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf"))
-		return;
+		goto cleanup;
 	btf__add_int(btf5, "int", 4, BTF_INT_SIGNED);   /* [1] int */
 	VALIDATE_RAW_BTF(
 		btf5,
@@ -478,7 +478,7 @@ static void test_distilled_base_multi_err2(void)
 		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
 	btf5 = btf__new_empty();
 	if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf"))
-		return;
+		goto cleanup;
 	btf__add_int(btf5, "int", 4, BTF_INT_SIGNED);   /* [1] int */
 	btf__add_int(btf5, "int", 4, BTF_INT_SIGNED);   /* [2] int */
 	VALIDATE_RAW_BTF(
@@ -601,6 +601,76 @@ cleanup:
 	btf__free(base);
 }
 
+/* If a needed composite type, which is the member of composite type
+ * in the split BTF, has a different size in the base BTF we wish to
+ * relocate with, btf__relocate() should error out.
+ */
+static void test_distilled_base_embedded_err(void)
+{
+	struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL, *btf5 = NULL;
+
+	btf1 = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+		return;
+
+	btf__add_int(btf1, "int", 4, BTF_INT_SIGNED);   /* [1] int */
+	btf__add_struct(btf1, "s1", 4);                 /* [2] struct s1 { */
+	btf__add_field(btf1, "f1", 1, 0, 0);            /*      int f1; */
+							/* } */
+	VALIDATE_RAW_BTF(
+		btf1,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] STRUCT 's1' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0");
+
+	btf2 = btf__new_empty_split(btf1);
+	if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+		goto cleanup;
+
+	btf__add_struct(btf2, "with_embedded", 8);      /* [3] struct with_embedded { */
+	btf__add_field(btf2, "e1", 2, 0, 0);		/*      struct s1 e1; */
+							/* } */
+
+	VALIDATE_RAW_BTF(
+		btf2,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] STRUCT 's1' size=4 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0",
+		"[3] STRUCT 'with_embedded' size=8 vlen=1\n"
+		"\t'e1' type_id=2 bits_offset=0");
+
+	if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+		       "distilled_base") ||
+	    !ASSERT_OK_PTR(btf3, "distilled_base") ||
+	    !ASSERT_OK_PTR(btf4, "distilled_split") ||
+	    !ASSERT_EQ(2, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+		goto cleanup;
+
+	VALIDATE_RAW_BTF(
+		btf4,
+		"[1] STRUCT 's1' size=4 vlen=0",
+		"[2] STRUCT 'with_embedded' size=8 vlen=1\n"
+		"\t'e1' type_id=1 bits_offset=0");
+
+	btf5 = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf"))
+		goto cleanup;
+
+	btf__add_int(btf5, "int", 4, BTF_INT_SIGNED);   /* [1] int */
+	/* struct with the same name but different size */
+	btf__add_struct(btf5, "s1", 8);                 /* [2] struct s1 { */
+	btf__add_field(btf5, "f1", 1, 0, 0);            /*      int f1; */
+							/* } */
+
+	ASSERT_EQ(btf__relocate(btf4, btf5), -EINVAL, "relocate_split");
+cleanup:
+	btf__free(btf5);
+	btf__free(btf4);
+	btf__free(btf3);
+	btf__free(btf2);
+	btf__free(btf1);
+}
+
 void test_btf_distill(void)
 {
 	if (test__start_subtest("distilled_base"))
@@ -613,6 +683,8 @@ void test_btf_distill(void)
 		test_distilled_base_multi_err();
 	if (test__start_subtest("distilled_base_multi_err2"))
 		test_distilled_base_multi_err2();
+	if (test__start_subtest("distilled_base_embedded_err"))
+		test_distilled_base_embedded_err();
 	if (test__start_subtest("distilled_base_vmlinux"))
 		test_distilled_base_vmlinux();
 	if (test__start_subtest("distilled_endianness"))
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index b293b8501fd6..10cba526d3e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -63,7 +63,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
 
 	/* tests with t->known_ptr_sz have no "long" or "unsigned long" type,
 	 * so it's impossible to determine correct pointer size; but if they
-	 * do, it should be 8 regardless of host architecture, becaues BPF
+	 * do, it should be 8 regardless of host architecture, because BPF
 	 * target is always 64-bit
 	 */
 	if (!t->known_ptr_sz) {
@@ -126,26 +126,69 @@ done:
 	return err;
 }
 
-static char *dump_buf;
-static size_t dump_buf_sz;
-static FILE *dump_buf_file;
+struct test_ctx {
+	struct btf *btf;
+	struct btf_dump *d;
+	char *dump_buf;
+	size_t dump_buf_sz;
+	FILE *dump_buf_file;
+};
 
-static void test_btf_dump_incremental(void)
+static void test_ctx__free(struct test_ctx *t)
 {
-	struct btf *btf = NULL;
-	struct btf_dump *d = NULL;
-	int id, err, i;
+	fclose(t->dump_buf_file);
+	free(t->dump_buf);
+	btf_dump__free(t->d);
+	btf__free(t->btf);
+}
 
-	dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
-	if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
-		return;
-	btf = btf__new_empty();
-	if (!ASSERT_OK_PTR(btf, "new_empty"))
+static int test_ctx__init(struct test_ctx *t)
+{
+	t->dump_buf_file = open_memstream(&t->dump_buf, &t->dump_buf_sz);
+	if (!ASSERT_OK_PTR(t->dump_buf_file, "dump_memstream"))
+		return -1;
+	t->btf = btf__new_empty();
+	if (!ASSERT_OK_PTR(t->btf, "new_empty"))
 		goto err_out;
-	d = btf_dump__new(btf, btf_dump_printf, dump_buf_file, NULL);
-	if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new"))
+	t->d = btf_dump__new(t->btf, btf_dump_printf, t->dump_buf_file, NULL);
+	if (!ASSERT_OK(libbpf_get_error(t->d), "btf_dump__new"))
 		goto err_out;
 
+	return 0;
+
+err_out:
+	test_ctx__free(t);
+	return -1;
+}
+
+static void test_ctx__dump_and_compare(struct test_ctx *t,
+				       const char *expected_output,
+				       const char *message)
+{
+	int i, err;
+
+	for (i = 1; i < btf__type_cnt(t->btf); i++) {
+		err = btf_dump__dump_type(t->d, i);
+		ASSERT_OK(err, "dump_type_ok");
+	}
+
+	fflush(t->dump_buf_file);
+	t->dump_buf[t->dump_buf_sz] = 0; /* some libc implementations don't do this */
+
+	ASSERT_STREQ(t->dump_buf, expected_output, message);
+}
+
+static void test_btf_dump_incremental(void)
+{
+	struct test_ctx t = {};
+	struct btf *btf;
+	int id, err;
+
+	if (test_ctx__init(&t))
+		return;
+
+	btf = t.btf;
+
 	/* First, generate BTF corresponding to the following C code:
 	 *
 	 * enum x;
@@ -182,15 +225,7 @@ static void test_btf_dump_incremental(void)
 	err = btf__add_field(btf, "x", 4, 0, 0);
 	ASSERT_OK(err, "field_ok");
 
-	for (i = 1; i < btf__type_cnt(btf); i++) {
-		err = btf_dump__dump_type(d, i);
-		ASSERT_OK(err, "dump_type_ok");
-	}
-
-	fflush(dump_buf_file);
-	dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
-
-	ASSERT_STREQ(dump_buf,
+	test_ctx__dump_and_compare(&t,
 "enum x;\n"
 "\n"
 "enum x {\n"
@@ -221,7 +256,7 @@ static void test_btf_dump_incremental(void)
 	 * enum values don't conflict;
 	 *
 	 */
-	fseek(dump_buf_file, 0, SEEK_SET);
+	fseek(t.dump_buf_file, 0, SEEK_SET);
 
 	id = btf__add_struct(btf, "s", 4);
 	ASSERT_EQ(id, 7, "struct_id");
@@ -232,14 +267,7 @@ static void test_btf_dump_incremental(void)
 	err = btf__add_field(btf, "s", 6, 64, 0);
 	ASSERT_OK(err, "field_ok");
 
-	for (i = 1; i < btf__type_cnt(btf); i++) {
-		err = btf_dump__dump_type(d, i);
-		ASSERT_OK(err, "dump_type_ok");
-	}
-
-	fflush(dump_buf_file);
-	dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
-	ASSERT_STREQ(dump_buf,
+	test_ctx__dump_and_compare(&t,
 "struct s___2 {\n"
 "	enum x x;\n"
 "	enum {\n"
@@ -248,11 +276,53 @@ static void test_btf_dump_incremental(void)
 "	struct s s;\n"
 "};\n\n" , "c_dump1");
 
-err_out:
-	fclose(dump_buf_file);
-	free(dump_buf);
-	btf_dump__free(d);
-	btf__free(btf);
+	test_ctx__free(&t);
+}
+
+static void test_btf_dump_type_tags(void)
+{
+	struct test_ctx t = {};
+	struct btf *btf;
+	int id, err;
+
+	if (test_ctx__init(&t))
+		return;
+
+	btf = t.btf;
+
+	/* Generate BTF corresponding to the following C code:
+	 *
+	 * struct s {
+	 *   void __attribute__((btf_type_tag(\"void_tag\"))) *p1;
+	 *   void __attribute__((void_attr)) *p2;
+	 * };
+	 *
+	 */
+
+	id = btf__add_type_tag(btf, "void_tag", 0);
+	ASSERT_EQ(id, 1, "type_tag_id");
+	id = btf__add_ptr(btf, id);
+	ASSERT_EQ(id, 2, "void_ptr_id1");
+
+	id = btf__add_type_attr(btf, "void_attr", 0);
+	ASSERT_EQ(id, 3, "type_attr_id");
+	id = btf__add_ptr(btf, id);
+	ASSERT_EQ(id, 4, "void_ptr_id2");
+
+	id = btf__add_struct(btf, "s", 8);
+	ASSERT_EQ(id, 5, "struct_id");
+	err = btf__add_field(btf, "p1", 2, 0, 0);
+	ASSERT_OK(err, "field_ok1");
+	err = btf__add_field(btf, "p2", 4, 0, 0);
+	ASSERT_OK(err, "field_ok2");
+
+	test_ctx__dump_and_compare(&t,
+"struct s {\n"
+"	void __attribute__((btf_type_tag(\"void_tag\"))) *p1;\n"
+"	void __attribute__((void_attr)) *p2;\n"
+"};\n\n", "dump_and_compare");
+
+	test_ctx__free(&t);
 }
 
 #define STRSIZE				4096
@@ -809,6 +879,122 @@ static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
 			  "static int bpf_cgrp_storage_busy = (int)2", 2);
 }
 
+struct btf_dump_string_ctx {
+	struct btf *btf;
+	struct btf_dump *d;
+	char *str;
+	struct btf_dump_type_data_opts *opts;
+	int array_id;
+};
+
+static int btf_dump_one_string(struct btf_dump_string_ctx *ctx,
+			       char *ptr, size_t ptr_sz,
+			       const char *expected_val)
+{
+	size_t type_sz;
+	int ret;
+
+	ctx->str[0] = '\0';
+	type_sz = btf__resolve_size(ctx->btf, ctx->array_id);
+	ret = btf_dump__dump_type_data(ctx->d, ctx->array_id, ptr, ptr_sz, ctx->opts);
+	if (type_sz <= ptr_sz) {
+		if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
+			return -EINVAL;
+	}
+	if (!ASSERT_STREQ(ctx->str, expected_val, "ensure expected/actual match"))
+		return -EFAULT;
+	return 0;
+}
+
+static void btf_dump_strings(struct btf_dump_string_ctx *ctx)
+{
+	struct btf_dump_type_data_opts *opts = ctx->opts;
+
+	opts->emit_strings = true;
+
+	opts->compact = true;
+	opts->emit_zeroes = false;
+
+	opts->skip_names = false;
+	btf_dump_one_string(ctx, "foo", 4, "(char[4])\"foo\"");
+
+	opts->skip_names = true;
+	btf_dump_one_string(ctx, "foo", 4, "\"foo\"");
+
+	/* This should have no effect. */
+	opts->emit_zeroes = false;
+	btf_dump_one_string(ctx, "foo", 4, "\"foo\"");
+
+	/* This should have no effect. */
+	opts->compact = false;
+	btf_dump_one_string(ctx, "foo", 4, "\"foo\"");
+
+	/* Non-printable characters come out as hex. */
+	btf_dump_one_string(ctx, "fo\xff", 4, "\"fo\\xff\"");
+	btf_dump_one_string(ctx, "fo\x7", 4, "\"fo\\x07\"");
+
+	/*
+	 * Strings that are too long for the specified type ("char[4]")
+	 * should fall back to the current behavior.
+	 */
+	opts->compact = true;
+	btf_dump_one_string(ctx, "abcde", 6, "['a','b','c','d',]");
+
+	/*
+	 * Strings that are too short for the specified type ("char[4]")
+	 * should work normally.
+	 */
+	btf_dump_one_string(ctx, "ab", 3, "\"ab\"");
+
+	/* Non-NUL-terminated arrays don't get printed as strings. */
+	char food[4] = { 'f', 'o', 'o', 'd' };
+	char bye[3] = { 'b', 'y', 'e' };
+
+	btf_dump_one_string(ctx, food, 4, "['f','o','o','d',]");
+	btf_dump_one_string(ctx, bye, 3, "['b','y','e',]");
+
+	/* The embedded NUL should terminate the string. */
+	char embed[4] = { 'f', 'o', '\0', 'd' };
+
+	btf_dump_one_string(ctx, embed, 4, "\"fo\"");
+}
+
+static void test_btf_dump_string_data(void)
+{
+	struct test_ctx t = {};
+	char str[STRSIZE];
+	struct btf_dump *d;
+	DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+	struct btf_dump_string_ctx ctx;
+	int char_id, int_id, array_id;
+
+	if (test_ctx__init(&t))
+		return;
+
+	d = btf_dump__new(t.btf, btf_dump_snprintf, str, NULL);
+	if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+		return;
+
+	/* Generate BTF for a four-element char array. */
+	char_id = btf__add_int(t.btf, "char", 1, BTF_INT_CHAR);
+	ASSERT_EQ(char_id, 1, "char_id");
+	int_id = btf__add_int(t.btf, "int", 4, BTF_INT_SIGNED);
+	ASSERT_EQ(int_id, 2, "int_id");
+	array_id = btf__add_array(t.btf, int_id, char_id, 4);
+	ASSERT_EQ(array_id, 3, "array_id");
+
+	ctx.btf = t.btf;
+	ctx.d = d;
+	ctx.str = str;
+	ctx.opts = &opts;
+	ctx.array_id = array_id;
+
+	btf_dump_strings(&ctx);
+
+	btf_dump__free(d);
+	test_ctx__free(&t);
+}
+
 static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str,
 			     const char *name, const char *expected_val,
 			     void *data, size_t data_sz)
@@ -874,6 +1060,9 @@ void test_btf_dump() {
 	if (test__start_subtest("btf_dump: incremental"))
 		test_btf_dump_incremental();
 
+	if (test__start_subtest("btf_dump: type_tags"))
+		test_btf_dump_type_tags();
+
 	btf = libbpf_find_kernel_btf();
 	if (!ASSERT_OK_PTR(btf, "no kernel BTF found"))
 		return;
@@ -897,6 +1086,8 @@ void test_btf_dump() {
 		test_btf_dump_struct_data(btf, d, str);
 	if (test__start_subtest("btf_dump: var_data"))
 		test_btf_dump_var_data(btf, d, str);
+	if (test__start_subtest("btf_dump: string_data"))
+		test_btf_dump_string_data();
 	btf_dump__free(d);
 	btf__free(btf);
 
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c
index eef1158676ed..2d47cad50a51 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c
@@ -12,10 +12,45 @@ static void btf_dump_printf(void *ctx, const char *fmt, va_list args)
 	vfprintf(ctx, fmt, args);
 }
 
-void test_btf_split() {
+/* Write raw BTF to file, return number of bytes written or negative errno */
+static ssize_t btf_raw_write(struct btf *btf, char *file)
+{
+	ssize_t written = 0;
+	const void *data;
+	__u32 size = 0;
+	int fd, ret;
+
+	fd = mkstemp(file);
+	if (!ASSERT_GE(fd, 0, "create_file"))
+		return -errno;
+
+	data = btf__raw_data(btf, &size);
+	if (!ASSERT_OK_PTR(data, "btf__raw_data")) {
+		close(fd);
+		return -EINVAL;
+	}
+	while (written < size) {
+		ret = write(fd, data + written, size - written);
+		if (!ASSERT_GE(ret, 0, "write succeeded")) {
+			close(fd);
+			return -errno;
+		}
+		written += ret;
+	}
+	close(fd);
+	return written;
+}
+
+static void __test_btf_split(bool multi)
+{
+	char multisplit_btf_file[] = "/tmp/test_btf_multisplit.XXXXXX";
+	char split_btf_file[] = "/tmp/test_btf_split.XXXXXX";
+	char base_btf_file[] = "/tmp/test_btf_base.XXXXXX";
+	ssize_t multisplit_btf_sz = 0, split_btf_sz = 0, base_btf_sz = 0;
 	struct btf_dump *d = NULL;
-	const struct btf_type *t;
-	struct btf *btf1, *btf2;
+	const struct btf_type *t, *ot;
+	struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL;
+	struct btf *btf4 = NULL, *btf5 = NULL, *btf6 = NULL;
 	int str_off, i, err;
 
 	btf1 = btf__new_empty();
@@ -63,14 +98,46 @@ void test_btf_split() {
 	ASSERT_EQ(btf_vlen(t), 3, "split_struct_vlen");
 	ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "s2", "split_struct_name");
 
+	if (multi) {
+		btf3 = btf__new_empty_split(btf2);
+		if (!ASSERT_OK_PTR(btf3, "multi_split_btf"))
+			goto cleanup;
+	} else {
+		btf3 = btf2;
+	}
+
+	btf__add_union(btf3, "u1", 16);			/* [5] union u1 {	*/
+	btf__add_field(btf3, "f1", 4, 0, 0);		/*	struct s2 f1;	*/
+	btf__add_field(btf3, "uf2", 1, 0, 0);		/*	int f2;		*/
+							/* } */
+
+	if (multi) {
+		t = btf__type_by_id(btf2, 5);
+		ASSERT_NULL(t, "multisplit_type_in_first_split");
+	}
+
+	t = btf__type_by_id(btf3, 5);
+	if (!ASSERT_OK_PTR(t, "split_union_type"))
+		goto cleanup;
+	ASSERT_EQ(btf_is_union(t), true, "split_union_kind");
+	ASSERT_EQ(btf_vlen(t), 2, "split_union_vlen");
+	ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "u1", "split_union_name");
+	ASSERT_EQ(btf__type_cnt(btf3), 6, "split_type_cnt");
+
+	t = btf__type_by_id(btf3, 1);
+	if (!ASSERT_OK_PTR(t, "split_base_type"))
+		goto cleanup;
+	ASSERT_EQ(btf_is_int(t), true, "split_base_int");
+	ASSERT_STREQ(btf__str_by_offset(btf3, t->name_off), "int", "split_base_type_name");
+
 	/* BTF-to-C dump of split BTF */
 	dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
 	if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
 		return;
-	d = btf_dump__new(btf2, btf_dump_printf, dump_buf_file, NULL);
+	d = btf_dump__new(btf3, btf_dump_printf, dump_buf_file, NULL);
 	if (!ASSERT_OK_PTR(d, "btf_dump__new"))
 		goto cleanup;
-	for (i = 1; i < btf__type_cnt(btf2); i++) {
+	for (i = 1; i < btf__type_cnt(btf3); i++) {
 		err = btf_dump__dump_type(d, i);
 		ASSERT_OK(err, "dump_type_ok");
 	}
@@ -79,14 +146,56 @@ void test_btf_split() {
 	ASSERT_STREQ(dump_buf,
 "struct s1 {\n"
 "	int f1;\n"
-"};\n"
-"\n"
+"};\n\n"
 "struct s2 {\n"
 "	struct s1 f1;\n"
 "	int f2;\n"
 "	int *f3;\n"
+"};\n\n"
+"union u1 {\n"
+"	struct s2 f1;\n"
+"	int uf2;\n"
 "};\n\n", "c_dump");
 
+	/* write base, split BTFs to files and ensure parsing succeeds */
+	base_btf_sz = btf_raw_write(btf1, base_btf_file);
+	if (base_btf_sz < 0)
+		goto cleanup;
+	split_btf_sz = btf_raw_write(btf2, split_btf_file);
+	if (split_btf_sz < 0)
+		goto cleanup;
+	btf4 = btf__parse(base_btf_file, NULL);
+	if (!ASSERT_OK_PTR(btf4, "parse_base"))
+		goto cleanup;
+	btf5 = btf__parse_split(split_btf_file, btf4);
+	if (!ASSERT_OK_PTR(btf5, "parse_split"))
+		goto cleanup;
+	if (multi) {
+		multisplit_btf_sz = btf_raw_write(btf3, multisplit_btf_file);
+		if (multisplit_btf_sz < 0)
+			goto cleanup;
+		btf6 = btf__parse_split(multisplit_btf_file, btf5);
+		if (!ASSERT_OK_PTR(btf6, "parse_multisplit"))
+			goto cleanup;
+	} else {
+		btf6 = btf5;
+	}
+
+	if (!ASSERT_EQ(btf__type_cnt(btf3), btf__type_cnt(btf6), "cmp_type_cnt"))
+		goto cleanup;
+
+	/* compare parsed to original BTF */
+	for (i = 1; i < btf__type_cnt(btf6); i++) {
+		t = btf__type_by_id(btf6, i);
+		if (!ASSERT_OK_PTR(t, "type_in_parsed_btf"))
+			goto cleanup;
+		ot = btf__type_by_id(btf3, i);
+		if (!ASSERT_OK_PTR(ot, "type_in_orig_btf"))
+			goto cleanup;
+		if (!ASSERT_EQ(memcmp(t, ot, sizeof(*ot)), 0, "cmp_parsed_orig_btf"))
+			goto cleanup;
+	}
+
 cleanup:
 	if (dump_buf_file)
 		fclose(dump_buf_file);
@@ -94,4 +203,24 @@ cleanup:
 	btf_dump__free(d);
 	btf__free(btf1);
 	btf__free(btf2);
+	if (btf2 != btf3)
+		btf__free(btf3);
+	btf__free(btf4);
+	btf__free(btf5);
+	if (btf5 != btf6)
+		btf__free(btf6);
+	if (base_btf_sz > 0)
+		unlink(base_btf_file);
+	if (split_btf_sz > 0)
+		unlink(split_btf_file);
+	if (multisplit_btf_sz > 0)
+		unlink(multisplit_btf_file);
+}
+
+void test_btf_split(void)
+{
+	if (test__start_subtest("single_split"))
+		__test_btf_split(false);
+	if (test__start_subtest("multi_split"))
+		__test_btf_split(true);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
new file mode 100644
index 000000000000..3923e64c4c1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_sysfs.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright (c) 2025 Isovalent */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+static void test_btf_mmap_sysfs(const char *path, struct btf *base)
+{
+	struct stat st;
+	__u64 btf_size, end;
+	void *raw_data = NULL;
+	int fd = -1;
+	long page_size;
+	struct btf *btf = NULL;
+
+	page_size = sysconf(_SC_PAGESIZE);
+	if (!ASSERT_GE(page_size, 0, "get_page_size"))
+		goto cleanup;
+
+	if (!ASSERT_OK(stat(path, &st), "stat_btf"))
+		goto cleanup;
+
+	btf_size = st.st_size;
+	end = (btf_size + page_size - 1) / page_size * page_size;
+
+	fd = open(path, O_RDONLY);
+	if (!ASSERT_GE(fd, 0, "open_btf"))
+		goto cleanup;
+
+	raw_data = mmap(NULL, btf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+	if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_writable"))
+		goto cleanup;
+
+	raw_data = mmap(NULL, btf_size, PROT_READ, MAP_SHARED, fd, 0);
+	if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_shared"))
+		goto cleanup;
+
+	raw_data = mmap(NULL, end + 1, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (!ASSERT_EQ(raw_data, MAP_FAILED, "mmap_btf_invalid_size"))
+		goto cleanup;
+
+	raw_data = mmap(NULL, end, PROT_READ, MAP_PRIVATE, fd, 0);
+	if (!ASSERT_OK_PTR(raw_data, "mmap_btf"))
+		goto cleanup;
+
+	if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_WRITE), -1,
+	    "mprotect_writable"))
+		goto cleanup;
+
+	if (!ASSERT_EQ(mprotect(raw_data, btf_size, PROT_READ | PROT_EXEC), -1,
+	    "mprotect_executable"))
+		goto cleanup;
+
+	/* Check padding is zeroed */
+	for (int i = btf_size; i < end; i++) {
+		if (((__u8 *)raw_data)[i] != 0) {
+			PRINT_FAIL("tail of BTF is not zero at page offset %d\n", i);
+			goto cleanup;
+		}
+	}
+
+	btf = btf__new_split(raw_data, btf_size, base);
+	if (!ASSERT_OK_PTR(btf, "parse_btf"))
+		goto cleanup;
+
+cleanup:
+	btf__free(btf);
+	if (raw_data && raw_data != MAP_FAILED)
+		munmap(raw_data, btf_size);
+	if (fd >= 0)
+		close(fd);
+}
+
+void test_btf_sysfs(void)
+{
+	test_btf_mmap_sysfs("/sys/kernel/btf/vmlinux", NULL);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c
new file mode 100644
index 000000000000..bb60704a3ef9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_opts.c
@@ -0,0 +1,617 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "cgroup_mprog.skel.h"
+
+static void assert_mprog_count(int cg, int atype, int expected)
+{
+	__u32 count = 0, attach_flags = 0;
+	int err;
+
+	err = bpf_prog_query(cg, atype, 0, &attach_flags,
+			     NULL, &count);
+	ASSERT_EQ(count, expected, "count");
+	ASSERT_EQ(err, 0, "prog_query");
+}
+
+static void test_prog_attach_detach(int atype)
+{
+	LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+	LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+	LIBBPF_OPTS(bpf_prog_query_opts, optq);
+	__u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+	struct cgroup_mprog *skel;
+	__u32 prog_ids[10];
+	int cg, err;
+
+	cg = test__join_cgroup("/prog_attach_detach");
+	if (!ASSERT_GE(cg, 0, "join_cgroup /prog_attach_detach"))
+		return;
+
+	skel = cgroup_mprog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_load"))
+		goto cleanup;
+
+	fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+	fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+	fd3 = bpf_program__fd(skel->progs.getsockopt_3);
+	fd4 = bpf_program__fd(skel->progs.getsockopt_4);
+
+	id1 = id_from_prog_fd(fd1);
+	id2 = id_from_prog_fd(fd2);
+	id3 = id_from_prog_fd(fd3);
+	id4 = id_from_prog_fd(fd4);
+
+	assert_mprog_count(cg, atype, 0);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER,
+		.expected_revision = 1,
+	);
+
+	/* ordering: [fd1] */
+	err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+	if (!ASSERT_EQ(err, 0, "prog_attach"))
+		goto cleanup;
+
+	assert_mprog_count(cg, atype, 1);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE,
+		.expected_revision = 2,
+	);
+
+	/* ordering: [fd2, fd1] */
+	err = bpf_prog_attach_opts(fd2, cg, atype, &opta);
+	if (!ASSERT_EQ(err, 0, "prog_attach"))
+		goto cleanup1;
+
+	assert_mprog_count(cg, atype, 2);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+		.relative_fd = fd2,
+		.expected_revision = 3,
+	);
+
+	/* ordering: [fd2, fd3, fd1] */
+	err = bpf_prog_attach_opts(fd3, cg, atype, &opta);
+	if (!ASSERT_EQ(err, 0, "prog_attach"))
+		goto cleanup2;
+
+	assert_mprog_count(cg, atype, 3);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI,
+		.expected_revision = 4,
+	);
+
+	/* ordering: [fd2, fd3, fd1, fd4] */
+	err = bpf_prog_attach_opts(fd4, cg, atype, &opta);
+	if (!ASSERT_EQ(err, 0, "prog_attach"))
+		goto cleanup3;
+
+	assert_mprog_count(cg, atype, 4);
+
+	/* retrieve optq.prog_cnt */
+	err = bpf_prog_query_opts(cg, atype, &optq);
+	if (!ASSERT_OK(err, "prog_query"))
+		goto cleanup4;
+
+	/* optq.prog_cnt will be used in below query */
+	memset(prog_ids, 0, sizeof(prog_ids));
+	optq.prog_ids = prog_ids;
+	err = bpf_prog_query_opts(cg, atype, &optq);
+	if (!ASSERT_OK(err, "prog_query"))
+		goto cleanup4;
+
+	ASSERT_EQ(optq.count, 4, "count");
+	ASSERT_EQ(optq.revision, 5, "revision");
+	ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+	ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+	ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]");
+	ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+	ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+	ASSERT_EQ(optq.link_ids, NULL, "link_ids");
+
+cleanup4:
+	optd.expected_revision = 5;
+	err = bpf_prog_detach_opts(fd4, cg, atype, &optd);
+	ASSERT_OK(err, "prog_detach");
+	assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+	LIBBPF_OPTS_RESET(optd);
+	err = bpf_prog_detach_opts(fd3, cg, atype, &optd);
+	ASSERT_OK(err, "prog_detach");
+	assert_mprog_count(cg, atype, 2);
+
+	/* Check revision after two detach operations */
+	err = bpf_prog_query_opts(cg, atype, &optq);
+	ASSERT_OK(err, "prog_query");
+	ASSERT_EQ(optq.revision, 7, "revision");
+
+cleanup2:
+	err = bpf_prog_detach_opts(fd2, cg, atype, &optd);
+	ASSERT_OK(err, "prog_detach");
+	assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+	err = bpf_prog_detach_opts(fd1, cg, atype, &optd);
+	ASSERT_OK(err, "prog_detach");
+	assert_mprog_count(cg, atype, 0);
+
+cleanup:
+	cgroup_mprog__destroy(skel);
+	close(cg);
+}
+
+static void test_link_attach_detach(int atype)
+{
+	LIBBPF_OPTS(bpf_cgroup_opts, opta);
+	LIBBPF_OPTS(bpf_cgroup_opts, optd);
+	LIBBPF_OPTS(bpf_prog_query_opts, optq);
+	struct bpf_link *link1, *link2, *link3, *link4;
+	__u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+	struct cgroup_mprog *skel;
+	__u32 prog_ids[10];
+	int cg, err;
+
+	cg = test__join_cgroup("/link_attach_detach");
+	if (!ASSERT_GE(cg, 0, "join_cgroup /link_attach_detach"))
+		return;
+
+	skel = cgroup_mprog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_load"))
+		goto cleanup;
+
+	fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+	fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+	fd3 = bpf_program__fd(skel->progs.getsockopt_3);
+	fd4 = bpf_program__fd(skel->progs.getsockopt_4);
+
+	id1 = id_from_prog_fd(fd1);
+	id2 = id_from_prog_fd(fd2);
+	id3 = id_from_prog_fd(fd3);
+	id4 = id_from_prog_fd(fd4);
+
+	assert_mprog_count(cg, atype, 0);
+
+	LIBBPF_OPTS_RESET(opta,
+		.expected_revision = 1,
+	);
+
+	/* ordering: [fd1] */
+	link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta);
+	if (!ASSERT_OK_PTR(link1, "link_attach"))
+		goto cleanup;
+
+	assert_mprog_count(cg, atype, 1);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_BEFORE | BPF_F_LINK,
+		.relative_id = id_from_link_fd(bpf_link__fd(link1)),
+		.expected_revision = 2,
+	);
+
+	/* ordering: [fd2, fd1] */
+	link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta);
+	if (!ASSERT_OK_PTR(link2, "link_attach"))
+		goto cleanup1;
+
+	assert_mprog_count(cg, atype, 2);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_AFTER | BPF_F_LINK,
+		.relative_fd = bpf_link__fd(link2),
+		.expected_revision = 3,
+	);
+
+	/* ordering: [fd2, fd3, fd1] */
+	link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta);
+	if (!ASSERT_OK_PTR(link3, "link_attach"))
+		goto cleanup2;
+
+	assert_mprog_count(cg, atype, 3);
+
+	LIBBPF_OPTS_RESET(opta,
+		.expected_revision = 4,
+	);
+
+	/* ordering: [fd2, fd3, fd1, fd4] */
+	link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta);
+	if (!ASSERT_OK_PTR(link4, "link_attach"))
+		goto cleanup3;
+
+	assert_mprog_count(cg, atype, 4);
+
+	/* retrieve optq.prog_cnt */
+	err = bpf_prog_query_opts(cg, atype, &optq);
+	if (!ASSERT_OK(err, "prog_query"))
+		goto cleanup4;
+
+	/* optq.prog_cnt will be used in below query */
+	memset(prog_ids, 0, sizeof(prog_ids));
+	optq.prog_ids = prog_ids;
+	err = bpf_prog_query_opts(cg, atype, &optq);
+	if (!ASSERT_OK(err, "prog_query"))
+		goto cleanup4;
+
+	ASSERT_EQ(optq.count, 4, "count");
+	ASSERT_EQ(optq.revision, 5, "revision");
+	ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+	ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+	ASSERT_EQ(optq.prog_ids[2], id1, "prog_ids[2]");
+	ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+	ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+	ASSERT_EQ(optq.link_ids, NULL, "link_ids");
+
+cleanup4:
+	bpf_link__destroy(link4);
+	assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+	bpf_link__destroy(link3);
+	assert_mprog_count(cg, atype, 2);
+
+	/* Check revision after two detach operations */
+	err = bpf_prog_query_opts(cg, atype, &optq);
+	ASSERT_OK(err, "prog_query");
+	ASSERT_EQ(optq.revision, 7, "revision");
+
+cleanup2:
+	bpf_link__destroy(link2);
+	assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+	bpf_link__destroy(link1);
+	assert_mprog_count(cg, atype, 0);
+
+cleanup:
+	cgroup_mprog__destroy(skel);
+	close(cg);
+}
+
+static void test_preorder_prog_attach_detach(int atype)
+{
+	LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+	LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+	__u32 fd1, fd2, fd3, fd4;
+	struct cgroup_mprog *skel;
+	int cg, err;
+
+	cg = test__join_cgroup("/preorder_prog_attach_detach");
+	if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_prog_attach_detach"))
+		return;
+
+	skel = cgroup_mprog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_load"))
+		goto cleanup;
+
+	fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+	fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+	fd3 = bpf_program__fd(skel->progs.getsockopt_3);
+	fd4 = bpf_program__fd(skel->progs.getsockopt_4);
+
+	assert_mprog_count(cg, atype, 0);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI,
+		.expected_revision = 1,
+	);
+
+	/* ordering: [fd1] */
+	err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+	if (!ASSERT_EQ(err, 0, "prog_attach"))
+		goto cleanup;
+
+	assert_mprog_count(cg, atype, 1);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER,
+		.expected_revision = 2,
+	);
+
+	/* ordering: [fd1, fd2] */
+	err = bpf_prog_attach_opts(fd2, cg, atype, &opta);
+	if (!ASSERT_EQ(err, 0, "prog_attach"))
+		goto cleanup1;
+
+	assert_mprog_count(cg, atype, 2);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+		.relative_fd = fd2,
+		.expected_revision = 3,
+	);
+
+	err = bpf_prog_attach_opts(fd3, cg, atype, &opta);
+	if (!ASSERT_EQ(err, -EINVAL, "prog_attach"))
+		goto cleanup2;
+
+	assert_mprog_count(cg, atype, 2);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_PREORDER,
+		.relative_fd = fd2,
+		.expected_revision = 3,
+	);
+
+	/* ordering: [fd1, fd2, fd3] */
+	err = bpf_prog_attach_opts(fd3, cg, atype, &opta);
+	if (!ASSERT_EQ(err, 0, "prog_attach"))
+		goto cleanup2;
+
+	assert_mprog_count(cg, atype, 3);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI,
+		.expected_revision = 4,
+	);
+
+	/* ordering: [fd2, fd3, fd1, fd4] */
+	err = bpf_prog_attach_opts(fd4, cg, atype, &opta);
+	if (!ASSERT_EQ(err, 0, "prog_attach"))
+		goto cleanup3;
+
+	assert_mprog_count(cg, atype, 4);
+
+	err = bpf_prog_detach_opts(fd4, cg, atype, &optd);
+	ASSERT_OK(err, "prog_detach");
+	assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+	err = bpf_prog_detach_opts(fd3, cg, atype, &optd);
+	ASSERT_OK(err, "prog_detach");
+	assert_mprog_count(cg, atype, 2);
+
+cleanup2:
+	err = bpf_prog_detach_opts(fd2, cg, atype, &optd);
+	ASSERT_OK(err, "prog_detach");
+	assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+	err = bpf_prog_detach_opts(fd1, cg, atype, &optd);
+	ASSERT_OK(err, "prog_detach");
+	assert_mprog_count(cg, atype, 0);
+
+cleanup:
+	cgroup_mprog__destroy(skel);
+	close(cg);
+}
+
+static void test_preorder_link_attach_detach(int atype)
+{
+	LIBBPF_OPTS(bpf_cgroup_opts, opta);
+	struct bpf_link *link1, *link2, *link3, *link4;
+	struct cgroup_mprog *skel;
+	__u32 fd2;
+	int cg;
+
+	cg = test__join_cgroup("/preorder_link_attach_detach");
+	if (!ASSERT_GE(cg, 0, "join_cgroup /preorder_link_attach_detach"))
+		return;
+
+	skel = cgroup_mprog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_load"))
+		goto cleanup;
+
+	fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+
+	assert_mprog_count(cg, atype, 0);
+
+	LIBBPF_OPTS_RESET(opta,
+		.expected_revision = 1,
+	);
+
+	/* ordering: [fd1] */
+	link1 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_1, cg, &opta);
+	if (!ASSERT_OK_PTR(link1, "link_attach"))
+		goto cleanup;
+
+	assert_mprog_count(cg, atype, 1);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_PREORDER,
+		.expected_revision = 2,
+	);
+
+	/* ordering: [fd1, fd2] */
+	link2 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_2, cg, &opta);
+	if (!ASSERT_OK_PTR(link2, "link_attach"))
+		goto cleanup1;
+
+	assert_mprog_count(cg, atype, 2);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_AFTER,
+		.relative_fd = fd2,
+		.expected_revision = 3,
+	);
+
+	link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta);
+	if (!ASSERT_ERR_PTR(link3, "link_attach"))
+		goto cleanup2;
+
+	assert_mprog_count(cg, atype, 2);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_AFTER | BPF_F_PREORDER | BPF_F_LINK,
+		.relative_fd = bpf_link__fd(link2),
+		.expected_revision = 3,
+	);
+
+	/* ordering: [fd1, fd2, fd3] */
+	link3 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_3, cg, &opta);
+	if (!ASSERT_OK_PTR(link3, "link_attach"))
+		goto cleanup2;
+
+	assert_mprog_count(cg, atype, 3);
+
+	LIBBPF_OPTS_RESET(opta,
+		.expected_revision = 4,
+	);
+
+	/* ordering: [fd2, fd3, fd1, fd4] */
+	link4 = bpf_program__attach_cgroup_opts(skel->progs.getsockopt_4, cg, &opta);
+	if (!ASSERT_OK_PTR(link4, "prog_attach"))
+		goto cleanup3;
+
+	assert_mprog_count(cg, atype, 4);
+
+	bpf_link__destroy(link4);
+	assert_mprog_count(cg, atype, 3);
+
+cleanup3:
+	bpf_link__destroy(link3);
+	assert_mprog_count(cg, atype, 2);
+
+cleanup2:
+	bpf_link__destroy(link2);
+	assert_mprog_count(cg, atype, 1);
+
+cleanup1:
+	bpf_link__destroy(link1);
+	assert_mprog_count(cg, atype, 0);
+
+cleanup:
+	cgroup_mprog__destroy(skel);
+	close(cg);
+}
+
+static void test_invalid_attach_detach(int atype)
+{
+	LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+	__u32 fd1, fd2, id2;
+	struct cgroup_mprog *skel;
+	int cg, err;
+
+	cg = test__join_cgroup("/invalid_attach_detach");
+	if (!ASSERT_GE(cg, 0, "join_cgroup /invalid_attach_detach"))
+		return;
+
+	skel = cgroup_mprog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_load"))
+		goto cleanup;
+
+	fd1 = bpf_program__fd(skel->progs.getsockopt_1);
+	fd2 = bpf_program__fd(skel->progs.getsockopt_2);
+
+	id2 = id_from_prog_fd(fd2);
+
+	assert_mprog_count(cg, atype, 0);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER,
+		.relative_id = id2,
+	);
+
+	err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+	ASSERT_EQ(err, -EINVAL, "prog_attach");
+	assert_mprog_count(cg, atype, 0);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_ID,
+	);
+
+	err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+	ASSERT_EQ(err, -ENOENT, "prog_attach");
+	assert_mprog_count(cg, atype, 0);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER | BPF_F_ID,
+	);
+
+	err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+	ASSERT_EQ(err, -ENOENT, "prog_attach");
+	assert_mprog_count(cg, atype, 0);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE | BPF_F_AFTER,
+		.relative_id = id2,
+	);
+
+	err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+	ASSERT_EQ(err, -EINVAL, "prog_attach");
+	assert_mprog_count(cg, atype, 0);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI | BPF_F_LINK,
+		.relative_id = id2,
+	);
+
+	err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+	ASSERT_EQ(err, -EINVAL, "prog_attach");
+	assert_mprog_count(cg, atype, 0);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI,
+		.relative_id = id2,
+	);
+
+	err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+	ASSERT_EQ(err, -EINVAL, "prog_attach");
+	assert_mprog_count(cg, atype, 0);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE,
+		.relative_fd = fd1,
+	);
+
+	err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+	ASSERT_EQ(err, -ENOENT, "prog_attach");
+	assert_mprog_count(cg, atype, 0);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+		.relative_fd = fd1,
+	);
+
+	err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+	ASSERT_EQ(err, -ENOENT, "prog_attach");
+	assert_mprog_count(cg, atype, 0);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI,
+	);
+
+	err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+	if (!ASSERT_EQ(err, 0, "prog_attach"))
+		goto cleanup;
+	assert_mprog_count(cg, atype, 1);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI | BPF_F_AFTER,
+	);
+
+	err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+	ASSERT_EQ(err, -EINVAL, "prog_attach");
+	assert_mprog_count(cg, atype, 1);
+
+	LIBBPF_OPTS_RESET(opta,
+		.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE | BPF_F_AFTER,
+		.replace_prog_fd = fd1,
+	);
+
+	err = bpf_prog_attach_opts(fd1, cg, atype, &opta);
+	ASSERT_EQ(err, -EINVAL, "prog_attach");
+	assert_mprog_count(cg, atype, 1);
+cleanup:
+	cgroup_mprog__destroy(skel);
+	close(cg);
+}
+
+void test_cgroup_mprog_opts(void)
+{
+	if (test__start_subtest("prog_attach_detach"))
+		test_prog_attach_detach(BPF_CGROUP_GETSOCKOPT);
+	if (test__start_subtest("link_attach_detach"))
+		test_link_attach_detach(BPF_CGROUP_GETSOCKOPT);
+	if (test__start_subtest("preorder_prog_attach_detach"))
+		test_preorder_prog_attach_detach(BPF_CGROUP_GETSOCKOPT);
+	if (test__start_subtest("preorder_link_attach_detach"))
+		test_preorder_link_attach_detach(BPF_CGROUP_GETSOCKOPT);
+	if (test__start_subtest("invalid_attach_detach"))
+		test_invalid_attach_detach(BPF_CGROUP_GETSOCKOPT);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c
new file mode 100644
index 000000000000..a36d2e968bc5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_mprog_ordering.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "cgroup_preorder.skel.h"
+
+static int run_getsockopt_test(int cg_parent, int sock_fd, bool has_relative_fd)
+{
+	LIBBPF_OPTS(bpf_prog_attach_opts, opts);
+	enum bpf_attach_type prog_p_atype, prog_p2_atype;
+	int prog_p_fd, prog_p2_fd;
+	struct cgroup_preorder *skel = NULL;
+	struct bpf_program *prog;
+	__u8 *result, buf;
+	socklen_t optlen = 1;
+	int err = 0;
+
+	skel = cgroup_preorder__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load"))
+		return 0;
+
+	LIBBPF_OPTS_RESET(opts);
+	opts.flags = BPF_F_ALLOW_MULTI;
+	prog = skel->progs.parent;
+	prog_p_fd = bpf_program__fd(prog);
+	prog_p_atype = bpf_program__expected_attach_type(prog);
+	err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts);
+	if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent"))
+		goto close_skel;
+
+	opts.flags = BPF_F_ALLOW_MULTI | BPF_F_BEFORE;
+	if (has_relative_fd)
+		opts.relative_fd = prog_p_fd;
+	prog = skel->progs.parent_2;
+	prog_p2_fd = bpf_program__fd(prog);
+	prog_p2_atype = bpf_program__expected_attach_type(prog);
+	err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts);
+	if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2"))
+		goto detach_parent;
+
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (!ASSERT_OK(err, "getsockopt"))
+		goto detach_parent_2;
+
+	result = skel->bss->result;
+	ASSERT_TRUE(result[0] == 4 && result[1] == 3, "result values");
+
+detach_parent_2:
+	ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype),
+		  "bpf_prog_detach2-parent_2");
+detach_parent:
+	ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype),
+		  "bpf_prog_detach2-parent");
+close_skel:
+	cgroup_preorder__destroy(skel);
+	return err;
+}
+
+void test_cgroup_mprog_ordering(void)
+{
+	int cg_parent = -1, sock_fd = -1;
+
+	cg_parent = test__join_cgroup("/parent");
+	if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent"))
+		goto out;
+
+	sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (!ASSERT_GE(sock_fd, 0, "socket"))
+		goto out;
+
+	ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, false), "getsockopt_test_1");
+	ASSERT_OK(run_getsockopt_test(cg_parent, sock_fd, true), "getsockopt_test_2");
+
+out:
+	close(sock_fd);
+	close(cg_parent);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c
new file mode 100644
index 000000000000..d4d583872fa2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_preorder.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "cgroup_preorder.skel.h"
+
+static int run_getsockopt_test(int cg_parent, int cg_child, int sock_fd, bool all_preorder)
+{
+	LIBBPF_OPTS(bpf_prog_attach_opts, opts);
+	enum bpf_attach_type prog_c_atype, prog_c2_atype, prog_p_atype, prog_p2_atype;
+	int prog_c_fd, prog_c2_fd, prog_p_fd, prog_p2_fd;
+	struct cgroup_preorder *skel = NULL;
+	struct bpf_program *prog;
+	__u8 *result, buf;
+	socklen_t optlen;
+	int err = 0;
+
+	skel = cgroup_preorder__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "cgroup_preorder__open_and_load"))
+		return 0;
+
+	buf = 0x00;
+	err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+	if (!ASSERT_OK(err, "setsockopt"))
+		goto close_skel;
+
+	opts.flags = BPF_F_ALLOW_MULTI;
+	if (all_preorder)
+		opts.flags |= BPF_F_PREORDER;
+	prog = skel->progs.child;
+	prog_c_fd = bpf_program__fd(prog);
+	prog_c_atype = bpf_program__expected_attach_type(prog);
+	err = bpf_prog_attach_opts(prog_c_fd, cg_child, prog_c_atype, &opts);
+	if (!ASSERT_OK(err, "bpf_prog_attach_opts-child"))
+		goto close_skel;
+
+	opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER;
+	prog = skel->progs.child_2;
+	prog_c2_fd = bpf_program__fd(prog);
+	prog_c2_atype = bpf_program__expected_attach_type(prog);
+	err = bpf_prog_attach_opts(prog_c2_fd, cg_child, prog_c2_atype, &opts);
+	if (!ASSERT_OK(err, "bpf_prog_attach_opts-child_2"))
+		goto detach_child;
+
+	optlen = 1;
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (!ASSERT_OK(err, "getsockopt"))
+		goto detach_child_2;
+
+	result = skel->bss->result;
+	if (all_preorder)
+		ASSERT_TRUE(result[0] == 1 && result[1] == 2, "child only");
+	else
+		ASSERT_TRUE(result[0] == 2 && result[1] == 1, "child only");
+
+	skel->bss->idx = 0;
+	memset(result, 0, 4);
+
+	opts.flags = BPF_F_ALLOW_MULTI;
+	if (all_preorder)
+		opts.flags |= BPF_F_PREORDER;
+	prog = skel->progs.parent;
+	prog_p_fd = bpf_program__fd(prog);
+	prog_p_atype = bpf_program__expected_attach_type(prog);
+	err = bpf_prog_attach_opts(prog_p_fd, cg_parent, prog_p_atype, &opts);
+	if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent"))
+		goto detach_child_2;
+
+	opts.flags = BPF_F_ALLOW_MULTI | BPF_F_PREORDER;
+	prog = skel->progs.parent_2;
+	prog_p2_fd = bpf_program__fd(prog);
+	prog_p2_atype = bpf_program__expected_attach_type(prog);
+	err = bpf_prog_attach_opts(prog_p2_fd, cg_parent, prog_p2_atype, &opts);
+	if (!ASSERT_OK(err, "bpf_prog_attach_opts-parent_2"))
+		goto detach_parent;
+
+	err = getsockopt(sock_fd, SOL_IP, IP_TOS, &buf, &optlen);
+	if (!ASSERT_OK(err, "getsockopt"))
+		goto detach_parent_2;
+
+	if (all_preorder)
+		ASSERT_TRUE(result[0] == 3 && result[1] == 4 && result[2] == 1 && result[3] == 2,
+			    "parent and child");
+	else
+		ASSERT_TRUE(result[0] == 4 && result[1] == 2 && result[2] == 1 && result[3] == 3,
+			    "parent and child");
+
+detach_parent_2:
+	ASSERT_OK(bpf_prog_detach2(prog_p2_fd, cg_parent, prog_p2_atype),
+		  "bpf_prog_detach2-parent_2");
+detach_parent:
+	ASSERT_OK(bpf_prog_detach2(prog_p_fd, cg_parent, prog_p_atype),
+		  "bpf_prog_detach2-parent");
+detach_child_2:
+	ASSERT_OK(bpf_prog_detach2(prog_c2_fd, cg_child, prog_c2_atype),
+		  "bpf_prog_detach2-child_2");
+detach_child:
+	ASSERT_OK(bpf_prog_detach2(prog_c_fd, cg_child, prog_c_atype),
+		  "bpf_prog_detach2-child");
+close_skel:
+	cgroup_preorder__destroy(skel);
+	return err;
+}
+
+void test_cgroup_preorder(void)
+{
+	int cg_parent = -1, cg_child = -1, sock_fd = -1;
+
+	cg_parent = test__join_cgroup("/parent");
+	if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent"))
+		goto out;
+
+	cg_child = test__join_cgroup("/parent/child");
+	if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child"))
+		goto out;
+
+	sock_fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (!ASSERT_GE(sock_fd, 0, "socket"))
+		goto out;
+
+	ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, false), "getsockopt_test_1");
+	ASSERT_OK(run_getsockopt_test(cg_parent, cg_child, sock_fd, true), "getsockopt_test_2");
+
+out:
+	close(sock_fd);
+	close(cg_child);
+	close(cg_parent);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c
new file mode 100644
index 000000000000..e1a90c10db8c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_direct_packet_access.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "cgroup_skb_direct_packet_access.skel.h"
+
+void test_cgroup_skb_prog_run_direct_packet_access(void)
+{
+	int err;
+	struct cgroup_skb_direct_packet_access *skel;
+	char test_skb[64] = {};
+
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = test_skb,
+		.data_size_in = sizeof(test_skb),
+	);
+
+	skel = cgroup_skb_direct_packet_access__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "cgroup_skb_direct_packet_access__open_and_load"))
+		return;
+
+	err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.direct_packet_access), &topts);
+	ASSERT_OK(err, "bpf_prog_test_run_opts err");
+	ASSERT_EQ(topts.retval, 1, "retval");
+
+	ASSERT_NEQ(skel->bss->data_end, 0, "data_end");
+
+	cgroup_skb_direct_packet_access__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
index 64abba72ac10..37c1cc52ed98 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -10,12 +10,18 @@
 static int run_test(int cgroup_fd, int server_fd, bool classid)
 {
 	struct connect4_dropper *skel;
-	int fd, err = 0;
+	int fd, err = 0, port;
 
 	skel = connect4_dropper__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel_open"))
 		return -1;
 
+	port = get_socket_local_port(server_fd);
+	if (!ASSERT_GE(port, 0, "get_socket_local_port"))
+		return -1;
+
+	skel->bss->port = ntohs(port);
+
 	skel->links.connect_v4_dropper =
 		bpf_program__attach_cgroup(skel->progs.connect_v4_dropper,
 					   cgroup_fd);
@@ -48,10 +54,9 @@ void test_cgroup_v1v2(void)
 {
 	struct network_helper_opts opts = {};
 	int server_fd, client_fd, cgroup_fd;
-	static const int port = 60120;
 
 	/* Step 1: Check base connectivity works without any BPF. */
-	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
+	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
 	if (!ASSERT_GE(server_fd, 0, "server_fd"))
 		return;
 	client_fd = connect_to_fd_opts(server_fd, &opts);
@@ -66,7 +71,7 @@ void test_cgroup_v1v2(void)
 	cgroup_fd = test__join_cgroup("/connect_dropper");
 	if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd"))
 		return;
-	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
+	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
 	if (!ASSERT_GE(server_fd, 0, "server_fd")) {
 		close(cgroup_fd);
 		return;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
new file mode 100644
index 000000000000..5ad904e9d15d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_xattr.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+
+#include "read_cgroupfs_xattr.skel.h"
+#include "cgroup_read_xattr.skel.h"
+
+#define CGROUP_FS_PARENT "foo/"
+#define CGROUP_FS_CHILD CGROUP_FS_PARENT "bar/"
+#define TMP_FILE "/tmp/selftests_cgroup_xattr"
+
+static const char xattr_value_a[] = "bpf_selftest_value_a";
+static const char xattr_value_b[] = "bpf_selftest_value_b";
+static const char xattr_name[] = "user.bpf_test";
+
+static void test_read_cgroup_xattr(void)
+{
+	int tmp_fd, parent_cgroup_fd = -1, child_cgroup_fd = -1;
+	struct read_cgroupfs_xattr *skel = NULL;
+
+	parent_cgroup_fd = test__join_cgroup(CGROUP_FS_PARENT);
+	if (!ASSERT_OK_FD(parent_cgroup_fd, "create parent cgroup"))
+		return;
+	if (!ASSERT_OK(set_cgroup_xattr(CGROUP_FS_PARENT, xattr_name, xattr_value_a),
+		       "set parent xattr"))
+		goto out;
+
+	child_cgroup_fd = test__join_cgroup(CGROUP_FS_CHILD);
+	if (!ASSERT_OK_FD(child_cgroup_fd, "create child cgroup"))
+		goto out;
+	if (!ASSERT_OK(set_cgroup_xattr(CGROUP_FS_CHILD, xattr_name, xattr_value_b),
+		       "set child xattr"))
+		goto out;
+
+	skel = read_cgroupfs_xattr__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "read_cgroupfs_xattr__open_and_load"))
+		goto out;
+
+	skel->bss->target_pid = sys_gettid();
+
+	if (!ASSERT_OK(read_cgroupfs_xattr__attach(skel), "read_cgroupfs_xattr__attach"))
+		goto out;
+
+	tmp_fd = open(TMP_FILE, O_RDONLY | O_CREAT);
+	ASSERT_OK_FD(tmp_fd, "open tmp file");
+	close(tmp_fd);
+
+	ASSERT_TRUE(skel->bss->found_value_a, "found_value_a");
+	ASSERT_TRUE(skel->bss->found_value_b, "found_value_b");
+
+out:
+	close(child_cgroup_fd);
+	close(parent_cgroup_fd);
+	read_cgroupfs_xattr__destroy(skel);
+	unlink(TMP_FILE);
+}
+
+void test_cgroup_xattr(void)
+{
+	RUN_TESTS(cgroup_read_xattr);
+
+	if (test__start_subtest("read_cgroupfs_xattr"))
+		test_read_cgroup_xattr();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
index adda85f97058..4b42fbc96efc 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
@@ -4,6 +4,8 @@
 #define _GNU_SOURCE
 #include <cgroup_helpers.h>
 #include <test_progs.h>
+#include <sched.h>
+#include <sys/wait.h>
 
 #include "cgrp_kfunc_failure.skel.h"
 #include "cgrp_kfunc_success.skel.h"
@@ -87,6 +89,72 @@ static const char * const success_tests[] = {
 	"test_cgrp_from_id",
 };
 
+static void test_cgrp_from_id_ns(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts);
+	struct cgrp_kfunc_success *skel;
+	struct bpf_program *prog;
+	int pid, pipe_fd[2];
+
+	skel = open_load_cgrp_kfunc_skel();
+	if (!ASSERT_OK_PTR(skel, "open_load_skel"))
+		return;
+
+	if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err"))
+		goto cleanup;
+
+	prog = skel->progs.test_cgrp_from_id_ns;
+
+	if (!ASSERT_OK(pipe(pipe_fd), "pipe"))
+		goto cleanup;
+
+	pid = fork();
+	if (!ASSERT_GE(pid, 0, "fork result")) {
+		close(pipe_fd[0]);
+		close(pipe_fd[1]);
+		goto cleanup;
+	}
+
+	if (pid == 0) {
+		int ret = 0;
+
+		close(pipe_fd[0]);
+
+		if (!ASSERT_GE(cgroup_setup_and_join("cgrp_from_id_ns"), 0, "join cgroup"))
+			exit(1);
+
+		if (!ASSERT_OK(unshare(CLONE_NEWCGROUP), "unshare cgns"))
+			exit(1);
+
+		ret = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+		if (!ASSERT_OK(ret, "test run ret"))
+			exit(1);
+
+		if (!ASSERT_OK(opts.retval, "test run retval"))
+			exit(1);
+
+		if (!ASSERT_EQ(write(pipe_fd[1], &ret, sizeof(ret)), sizeof(ret), "write pipe"))
+			exit(1);
+
+		exit(0);
+	} else {
+		int res;
+
+		close(pipe_fd[1]);
+
+		ASSERT_EQ(read(pipe_fd[0], &res, sizeof(res)), sizeof(res), "read res");
+		ASSERT_EQ(waitpid(pid, NULL, 0), pid, "wait on child");
+
+		remove_cgroup_pid("cgrp_from_id_ns", pid);
+
+		ASSERT_OK(res, "result from run");
+	}
+
+	close(pipe_fd[0]);
+cleanup:
+	cgrp_kfunc_success__destroy(skel);
+}
+
 void test_cgrp_kfunc(void)
 {
 	int i, err;
@@ -102,6 +170,9 @@ void test_cgrp_kfunc(void)
 		run_success_test(success_tests[i]);
 	}
 
+	if (test__start_subtest("test_cgrp_from_id_ns"))
+		test_cgrp_from_id_ns();
+
 	RUN_TESTS(cgrp_kfunc_failure);
 
 cleanup:
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index 2a9a30650350..65b4512967e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -153,6 +153,26 @@ static void test_check_mtu_run_tc(struct test_check_mtu *skel,
 	ASSERT_EQ(mtu_result, mtu_expect, "MTU-compare-user");
 }
 
+static void test_chk_segs_flag(struct test_check_mtu *skel, __u32 mtu)
+{
+	int err, prog_fd = bpf_program__fd(skel->progs.tc_chk_segs_flag);
+	struct __sk_buff skb = {
+		.gso_size = 10,
+	};
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		    .data_in = &pkt_v4,
+		    .data_size_in = sizeof(pkt_v4),
+		    .ctx_in = &skb,
+		    .ctx_size_in = sizeof(skb),
+	);
+
+	/* Lower the mtu to test the BPF_MTU_CHK_SEGS */
+	SYS_NOFAIL("ip link set dev lo mtu 10");
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	SYS_NOFAIL("ip link set dev lo mtu %u", mtu);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, BPF_OK, "retval");
+}
 
 static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
 {
@@ -177,11 +197,12 @@ static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
 	test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu);
 	test_check_mtu_run_tc(skel, skel->progs.tc_input_len, mtu);
 	test_check_mtu_run_tc(skel, skel->progs.tc_input_len_exceed, mtu);
+	test_chk_segs_flag(skel, mtu);
 cleanup:
 	test_check_mtu__destroy(skel);
 }
 
-void serial_test_check_mtu(void)
+void test_ns_check_mtu(void)
 {
 	int mtu_lo;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
index 34b59f6baca1..7488a7606e6a 100644
--- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -22,79 +22,37 @@
 
 static int duration = 0;
 
-struct addr_port {
-	in_port_t port;
-	union {
-		struct in_addr in_addr;
-		struct in6_addr in6_addr;
-	};
-};
-
-struct tuple {
-	int family;
-	struct addr_port src;
-	struct addr_port dst;
-};
-
-static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap)
-{
-	const struct sockaddr_in6 *in6;
-	const struct sockaddr_in *in;
-
-	switch (sa->sa_family) {
-	case AF_INET:
-		in = (const struct sockaddr_in *)sa;
-		ap->in_addr = in->sin_addr;
-		ap->port = in->sin_port;
-		return true;
-
-	case AF_INET6:
-		in6 = (const struct sockaddr_in6 *)sa;
-		ap->in6_addr = in6->sin6_addr;
-		ap->port = in6->sin6_port;
-		return true;
-
-	default:
-		return false;
-	}
-}
 
-static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type,
-			int *server, int *conn, struct tuple *tuple)
+static bool set_up_conn(const struct sockaddr_storage *addr, socklen_t len, int type,
+			int *server, int *conn,
+			struct sockaddr_storage *src,
+			struct sockaddr_storage *dst)
 {
 	struct sockaddr_storage ss;
 	socklen_t slen = sizeof(ss);
-	struct sockaddr *sa = (struct sockaddr *)&ss;
 
-	*server = start_server_addr(type, (struct sockaddr_storage *)addr, len, NULL);
+	*server = start_server_addr(type, addr, len, NULL);
 	if (*server < 0)
 		return false;
 
-	if (CHECK_FAIL(getsockname(*server, sa, &slen)))
+	if (CHECK_FAIL(getsockname(*server, (struct sockaddr *)&ss, &slen)))
 		goto close_server;
 
-	*conn = connect_to_addr(type, (struct sockaddr_storage *)sa, slen, NULL);
+	*conn = connect_to_addr(type, &ss, slen, NULL);
 	if (*conn < 0)
 		goto close_server;
 
 	/* We want to simulate packets arriving at conn, so we have to
 	 * swap src and dst.
 	 */
-	slen = sizeof(ss);
-	if (CHECK_FAIL(getsockname(*conn, sa, &slen)))
-		goto close_conn;
-
-	if (CHECK_FAIL(!fill_addr_port(sa, &tuple->dst)))
+	slen = sizeof(*dst);
+	if (CHECK_FAIL(getsockname(*conn, (struct sockaddr *)dst, &slen)))
 		goto close_conn;
 
-	slen = sizeof(ss);
-	if (CHECK_FAIL(getpeername(*conn, sa, &slen)))
+	slen = sizeof(*src);
+	if (CHECK_FAIL(getpeername(*conn, (struct sockaddr *)src, &slen)))
 		goto close_conn;
 
-	if (CHECK_FAIL(!fill_addr_port(sa, &tuple->src)))
-		goto close_conn;
-
-	tuple->family = ss.ss_family;
 	return true;
 
 close_conn:
@@ -110,17 +68,16 @@ static socklen_t prepare_addr(struct sockaddr_storage *addr, int family)
 {
 	struct sockaddr_in *addr4;
 	struct sockaddr_in6 *addr6;
+	memset(addr, 0, sizeof(*addr));
 
 	switch (family) {
 	case AF_INET:
 		addr4 = (struct sockaddr_in *)addr;
-		memset(addr4, 0, sizeof(*addr4));
 		addr4->sin_family = family;
 		addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 		return sizeof(*addr4);
 	case AF_INET6:
 		addr6 = (struct sockaddr_in6 *)addr;
-		memset(addr6, 0, sizeof(*addr6));
 		addr6->sin6_family = family;
 		addr6->sin6_addr = in6addr_loopback;
 		return sizeof(*addr6);
@@ -242,9 +199,15 @@ static void encap_init(encap_headers_t *encap, uint8_t hop_count, uint8_t proto)
 }
 
 static size_t build_input(const struct test_cfg *test, void *const buf,
-			  const struct tuple *tuple)
+			  const struct sockaddr_storage *src,
+			  const struct sockaddr_storage *dst)
 {
-	in_port_t sport = tuple->src.port;
+	struct sockaddr_in6 *src_in6 = (struct sockaddr_in6 *)src;
+	struct sockaddr_in6 *dst_in6 = (struct sockaddr_in6 *)dst;
+	struct sockaddr_in *src_in = (struct sockaddr_in *)src;
+	struct sockaddr_in *dst_in = (struct sockaddr_in *)dst;
+	sa_family_t family = src->ss_family;
+	in_port_t sport, dport;
 	encap_headers_t encap;
 	struct iphdr ip;
 	struct ipv6hdr ipv6;
@@ -254,8 +217,11 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
 	uint8_t *p = buf;
 	int proto;
 
+	sport = (family == AF_INET) ? src_in->sin_port : src_in6->sin6_port;
+	dport = (family == AF_INET) ? dst_in->sin_port : dst_in6->sin6_port;
+
 	proto = IPPROTO_IPIP;
-	if (tuple->family == AF_INET6)
+	if (family == AF_INET6)
 		proto = IPPROTO_IPV6;
 
 	encap_init(&encap, test->hops == ONE_HOP ? 1 : 0, proto);
@@ -270,15 +236,15 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
 	if (test->type == UDP)
 		proto = IPPROTO_UDP;
 
-	switch (tuple->family) {
+	switch (family) {
 	case AF_INET:
 		ip = (struct iphdr){
 			.ihl = 5,
 			.version = 4,
 			.ttl = IPDEFTTL,
 			.protocol = proto,
-			.saddr = tuple->src.in_addr.s_addr,
-			.daddr = tuple->dst.in_addr.s_addr,
+			.saddr = src_in->sin_addr.s_addr,
+			.daddr = dst_in->sin_addr.s_addr,
 		};
 		p = mempcpy(p, &ip, sizeof(ip));
 		break;
@@ -287,8 +253,8 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
 			.version = 6,
 			.hop_limit = IPDEFTTL,
 			.nexthdr = proto,
-			.saddr = tuple->src.in6_addr,
-			.daddr = tuple->dst.in6_addr,
+			.saddr = src_in6->sin6_addr,
+			.daddr = dst_in6->sin6_addr,
 		};
 		p = mempcpy(p, &ipv6, sizeof(ipv6));
 		break;
@@ -303,18 +269,16 @@ static size_t build_input(const struct test_cfg *test, void *const buf,
 	case TCP:
 		tcp = (struct tcphdr){
 			.source = sport,
-			.dest = tuple->dst.port,
+			.dest = dport,
+			.syn = (test->flags == SYN),
+			.ack = (test->flags == ACK),
 		};
-		if (test->flags == SYN)
-			tcp.syn = true;
-		if (test->flags == ACK)
-			tcp.ack = true;
 		p = mempcpy(p, &tcp, sizeof(tcp));
 		break;
 	case UDP:
 		udp = (struct udphdr){
 			.source = sport,
-			.dest = tuple->dst.port,
+			.dest = dport,
 		};
 		p = mempcpy(p, &udp, sizeof(udp));
 		break;
@@ -339,27 +303,26 @@ static void test_cls_redirect_common(struct bpf_program *prog)
 	LIBBPF_OPTS(bpf_test_run_opts, tattr);
 	int families[] = { AF_INET, AF_INET6 };
 	struct sockaddr_storage ss;
-	struct sockaddr *addr;
 	socklen_t slen;
 	int i, j, err, prog_fd;
 	int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
 	int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
-	struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
+	struct sockaddr_storage srcs[__NR_KIND][ARRAY_SIZE(families)];
+	struct sockaddr_storage dsts[__NR_KIND][ARRAY_SIZE(families)];
 
-	addr = (struct sockaddr *)&ss;
 	for (i = 0; i < ARRAY_SIZE(families); i++) {
 		slen = prepare_addr(&ss, families[i]);
 		if (CHECK_FAIL(!slen))
 			goto cleanup;
 
-		if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_DGRAM,
+		if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_DGRAM,
 					    &servers[UDP][i], &conns[UDP][i],
-					    &tuples[UDP][i])))
+					    &srcs[UDP][i], &dsts[UDP][i])))
 			goto cleanup;
 
-		if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_STREAM,
+		if (CHECK_FAIL(!set_up_conn(&ss, slen, SOCK_STREAM,
 					    &servers[TCP][i], &conns[TCP][i],
-					    &tuples[TCP][i])))
+					    &srcs[TCP][i], &dsts[TCP][i])))
 			goto cleanup;
 	}
 
@@ -368,11 +331,12 @@ static void test_cls_redirect_common(struct bpf_program *prog)
 		struct test_cfg *test = &tests[i];
 
 		for (j = 0; j < ARRAY_SIZE(families); j++) {
-			struct tuple *tuple = &tuples[test->type][j];
+			struct sockaddr_storage *src = &srcs[test->type][j];
+			struct sockaddr_storage *dst = &dsts[test->type][j];
 			char input[256];
 			char tmp[256];
 
-			test_str(tmp, sizeof(tmp), test, tuple->family);
+			test_str(tmp, sizeof(tmp), test, families[j]);
 			if (!test__start_subtest(tmp))
 				continue;
 
@@ -380,7 +344,7 @@ static void test_cls_redirect_common(struct bpf_program *prog)
 			tattr.data_size_out = sizeof(tmp);
 
 			tattr.data_in = input;
-			tattr.data_size_in = build_input(test, input, tuple);
+			tattr.data_size_in = build_input(test, input, src, dst);
 			if (CHECK_FAIL(!tattr.data_size_in))
 				continue;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c b/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c
new file mode 100644
index 000000000000..285f20241fe1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/compute_live_registers.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "compute_live_registers.skel.h"
+#include "test_progs.h"
+
+void test_compute_live_registers(void)
+{
+	RUN_TESTS(compute_live_registers);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 1c682550e0e7..08963c82f30b 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -2,7 +2,7 @@
 #define _GNU_SOURCE
 #include <test_progs.h>
 #include "progs/core_reloc_types.h"
-#include "bpf_testmod/bpf_testmod.h"
+#include "test_kmods/bpf_testmod.h"
 #include <linux/limits.h>
 #include <sys/mman.h>
 #include <sys/syscall.h>
@@ -85,11 +85,11 @@ static int duration = 0;
 #define NESTING_ERR_CASE(name) {					\
 	NESTING_CASE_COMMON(name),					\
 	.fails = true,							\
-	.run_btfgen_fails = true,							\
+	.run_btfgen_fails = true,					\
 }
 
 #define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) {	\
-	.a = { [2] = 1 },						\
+	.a = { [2] = 1, [3] = 11 },					\
 	.b = { [1] = { [2] = { [3] = 2 } } },				\
 	.c = { [1] = { .c =  3 } },					\
 	.d = { [0] = { [0] = { .d = 4 } } },				\
@@ -108,6 +108,7 @@ static int duration = 0;
 	.input_len = sizeof(struct core_reloc_##name),			\
 	.output = STRUCT_TO_CHAR_PTR(core_reloc_arrays_output) {	\
 		.a2   = 1,						\
+		.a3   = 12,						\
 		.b123 = 2,						\
 		.c1c  = 3,						\
 		.d00d = 4,						\
@@ -602,6 +603,7 @@ static const struct core_reloc_test_case test_cases[] = {
 	ARRAYS_ERR_CASE(arrays___err_non_array),
 	ARRAYS_ERR_CASE(arrays___err_wrong_val_type),
 	ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr),
+	ARRAYS_ERR_CASE(arrays___err_bad_signed_arr_elem_sz),
 
 	/* enum/ptr/int handling scenarios */
 	PRIMITIVES_CASE(primitives),
diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c
index e58a04654238..6c45330a5ca3 100644
--- a/tools/testing/selftests/bpf/prog_tests/cpumask.c
+++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c
@@ -25,6 +25,10 @@ static const char * const cpumask_success_testcases[] = {
 	"test_global_mask_nested_deep_rcu",
 	"test_global_mask_nested_deep_array_rcu",
 	"test_cpumask_weight",
+	"test_refcount_null_tracking",
+	"test_populate_reject_small_mask",
+	"test_populate_reject_unaligned",
+	"test_populate",
 };
 
 static void verify_success(const char *prog_name)
@@ -78,6 +82,5 @@ void test_cpumask(void)
 		verify_success(cpumask_success_testcases[i]);
 	}
 
-	RUN_TESTS(cpumask_success);
 	RUN_TESTS(cpumask_failure);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
new file mode 100644
index 000000000000..6c2b0c3dbcd8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/dmabuf_iter.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google */
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include "dmabuf_iter.skel.h"
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <linux/dma-buf.h>
+#include <linux/dma-heap.h>
+#include <linux/udmabuf.h>
+
+static int udmabuf = -1;
+static const char udmabuf_test_buffer_name[DMA_BUF_NAME_LEN] = "udmabuf_test_buffer_for_iter";
+static size_t udmabuf_test_buffer_size;
+static int sysheap_dmabuf = -1;
+static const char sysheap_test_buffer_name[DMA_BUF_NAME_LEN] = "sysheap_test_buffer_for_iter";
+static size_t sysheap_test_buffer_size;
+
+static int create_udmabuf(void)
+{
+	struct udmabuf_create create;
+	int dev_udmabuf, memfd, local_udmabuf;
+
+	udmabuf_test_buffer_size = 10 * getpagesize();
+
+	if (!ASSERT_LE(sizeof(udmabuf_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG"))
+		return -1;
+
+	memfd = memfd_create("memfd_test", MFD_ALLOW_SEALING);
+	if (!ASSERT_OK_FD(memfd, "memfd_create"))
+		return -1;
+
+	if (!ASSERT_OK(ftruncate(memfd, udmabuf_test_buffer_size), "ftruncate"))
+		goto close_memfd;
+
+	if (!ASSERT_OK(fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK), "seal"))
+		goto close_memfd;
+
+	dev_udmabuf = open("/dev/udmabuf", O_RDONLY);
+	if (!ASSERT_OK_FD(dev_udmabuf, "open udmabuf"))
+		goto close_memfd;
+
+	memset(&create, 0, sizeof(create));
+	create.memfd = memfd;
+	create.flags = UDMABUF_FLAGS_CLOEXEC;
+	create.offset = 0;
+	create.size = udmabuf_test_buffer_size;
+
+	local_udmabuf = ioctl(dev_udmabuf, UDMABUF_CREATE, &create);
+	close(dev_udmabuf);
+	if (!ASSERT_OK_FD(local_udmabuf, "udmabuf_create"))
+		goto close_memfd;
+
+	if (!ASSERT_OK(ioctl(local_udmabuf, DMA_BUF_SET_NAME_B, udmabuf_test_buffer_name), "name"))
+		goto close_udmabuf;
+
+	return local_udmabuf;
+
+close_udmabuf:
+	close(local_udmabuf);
+close_memfd:
+	close(memfd);
+	return -1;
+}
+
+static int create_sys_heap_dmabuf(void)
+{
+	sysheap_test_buffer_size = 20 * getpagesize();
+
+	struct dma_heap_allocation_data data = {
+		.len = sysheap_test_buffer_size,
+		.fd = 0,
+		.fd_flags = O_RDWR | O_CLOEXEC,
+		.heap_flags = 0,
+	};
+	int heap_fd, ret;
+
+	if (!ASSERT_LE(sizeof(sysheap_test_buffer_name), DMA_BUF_NAME_LEN, "NAMETOOLONG"))
+		return -1;
+
+	heap_fd = open("/dev/dma_heap/system", O_RDONLY);
+	if (!ASSERT_OK_FD(heap_fd, "open dma heap"))
+		return -1;
+
+	ret = ioctl(heap_fd, DMA_HEAP_IOCTL_ALLOC, &data);
+	close(heap_fd);
+	if (!ASSERT_OK(ret, "syheap alloc"))
+		return -1;
+
+	if (!ASSERT_OK(ioctl(data.fd, DMA_BUF_SET_NAME_B, sysheap_test_buffer_name), "name"))
+		goto close_sysheap_dmabuf;
+
+	return data.fd;
+
+close_sysheap_dmabuf:
+	close(data.fd);
+	return -1;
+}
+
+static int create_test_buffers(void)
+{
+	udmabuf = create_udmabuf();
+	sysheap_dmabuf = create_sys_heap_dmabuf();
+
+	if (udmabuf < 0 || sysheap_dmabuf < 0)
+		return -1;
+
+	return 0;
+}
+
+static void destroy_test_buffers(void)
+{
+	close(udmabuf);
+	udmabuf = -1;
+
+	close(sysheap_dmabuf);
+	sysheap_dmabuf = -1;
+}
+
+enum Fields { INODE, SIZE, NAME, EXPORTER, FIELD_COUNT };
+struct DmabufInfo {
+	unsigned long inode;
+	unsigned long size;
+	char name[DMA_BUF_NAME_LEN];
+	char exporter[32];
+};
+
+static bool check_dmabuf_info(const struct DmabufInfo *bufinfo,
+			      unsigned long size,
+			      const char *name, const char *exporter)
+{
+	return size == bufinfo->size &&
+	       !strcmp(name, bufinfo->name) &&
+	       !strcmp(exporter, bufinfo->exporter);
+}
+
+static void subtest_dmabuf_iter_check_no_infinite_reads(struct dmabuf_iter *skel)
+{
+	int iter_fd;
+	char buf[256];
+
+	iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
+	if (!ASSERT_OK_FD(iter_fd, "iter_create"))
+		return;
+
+	while (read(iter_fd, buf, sizeof(buf)) > 0)
+		; /* Read out all contents */
+
+	/* Next reads should return 0 */
+	ASSERT_EQ(read(iter_fd, buf, sizeof(buf)), 0, "read");
+
+	close(iter_fd);
+}
+
+static void subtest_dmabuf_iter_check_default_iter(struct dmabuf_iter *skel)
+{
+	bool found_test_sysheap_dmabuf = false;
+	bool found_test_udmabuf = false;
+	struct DmabufInfo bufinfo;
+	size_t linesize = 0;
+	char *line = NULL;
+	FILE *iter_file;
+	int iter_fd, f = INODE;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(skel->links.dmabuf_collector));
+	if (!ASSERT_OK_FD(iter_fd, "iter_create"))
+		return;
+
+	iter_file = fdopen(iter_fd, "r");
+	if (!ASSERT_OK_PTR(iter_file, "fdopen"))
+		goto close_iter_fd;
+
+	while (getline(&line, &linesize, iter_file) != -1) {
+		if (f % FIELD_COUNT == INODE) {
+			ASSERT_EQ(sscanf(line, "%ld", &bufinfo.inode), 1,
+				  "read inode");
+		} else if (f % FIELD_COUNT == SIZE) {
+			ASSERT_EQ(sscanf(line, "%ld", &bufinfo.size), 1,
+				  "read size");
+		} else if (f % FIELD_COUNT == NAME) {
+			ASSERT_EQ(sscanf(line, "%s", bufinfo.name), 1,
+				  "read name");
+		} else if (f % FIELD_COUNT == EXPORTER) {
+			ASSERT_EQ(sscanf(line, "%31s", bufinfo.exporter), 1,
+				  "read exporter");
+
+			if (check_dmabuf_info(&bufinfo,
+					      sysheap_test_buffer_size,
+					      sysheap_test_buffer_name,
+					      "system"))
+				found_test_sysheap_dmabuf = true;
+			else if (check_dmabuf_info(&bufinfo,
+						   udmabuf_test_buffer_size,
+						   udmabuf_test_buffer_name,
+						   "udmabuf"))
+				found_test_udmabuf = true;
+		}
+		++f;
+	}
+
+	ASSERT_EQ(f % FIELD_COUNT, INODE, "number of fields");
+
+	ASSERT_TRUE(found_test_sysheap_dmabuf, "found_test_sysheap_dmabuf");
+	ASSERT_TRUE(found_test_udmabuf, "found_test_udmabuf");
+
+	free(line);
+	fclose(iter_file);
+close_iter_fd:
+	close(iter_fd);
+}
+
+static void subtest_dmabuf_iter_check_open_coded(struct dmabuf_iter *skel, int map_fd)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	char key[DMA_BUF_NAME_LEN];
+	int err, fd;
+	bool found;
+
+	/* No need to attach it, just run it directly */
+	fd = bpf_program__fd(skel->progs.iter_dmabuf_for_each);
+
+	err = bpf_prog_test_run_opts(fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		return;
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+		return;
+
+	if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, key), "get next key"))
+		return;
+
+	do {
+		ASSERT_OK(bpf_map_lookup_elem(map_fd, key, &found), "lookup");
+		ASSERT_TRUE(found, "found test buffer");
+	} while (bpf_map_get_next_key(map_fd, key, key));
+}
+
+void test_dmabuf_iter(void)
+{
+	struct dmabuf_iter *skel = NULL;
+	int map_fd;
+	const bool f = false;
+
+	skel = dmabuf_iter__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "dmabuf_iter__open_and_load"))
+		return;
+
+	map_fd = bpf_map__fd(skel->maps.testbuf_hash);
+	if (!ASSERT_OK_FD(map_fd, "map_fd"))
+		goto destroy_skel;
+
+	if (!ASSERT_OK(bpf_map_update_elem(map_fd, udmabuf_test_buffer_name, &f, BPF_ANY),
+		       "insert udmabuf"))
+		goto destroy_skel;
+	if (!ASSERT_OK(bpf_map_update_elem(map_fd, sysheap_test_buffer_name, &f, BPF_ANY),
+		       "insert sysheap buffer"))
+		goto destroy_skel;
+
+	if (!ASSERT_OK(create_test_buffers(), "create_test_buffers"))
+		goto destroy;
+
+	if (!ASSERT_OK(dmabuf_iter__attach(skel), "skel_attach"))
+		goto destroy;
+
+	if (test__start_subtest("no_infinite_reads"))
+		subtest_dmabuf_iter_check_no_infinite_reads(skel);
+	if (test__start_subtest("default_iter"))
+		subtest_dmabuf_iter_check_default_iter(skel);
+	if (test__start_subtest("open_coded"))
+		subtest_dmabuf_iter_check_open_coded(skel, map_fd);
+
+destroy:
+	destroy_test_buffers();
+destroy_skel:
+	dmabuf_iter__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index b614a5272dfd..b9f86cb91e81 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -10,6 +10,7 @@ enum test_setup_type {
 	SETUP_SYSCALL_SLEEP,
 	SETUP_SKB_PROG,
 	SETUP_SKB_PROG_TP,
+	SETUP_XDP_PROG,
 };
 
 static struct {
@@ -18,9 +19,21 @@ static struct {
 } success_tests[] = {
 	{"test_read_write", SETUP_SYSCALL_SLEEP},
 	{"test_dynptr_data", SETUP_SYSCALL_SLEEP},
+	{"test_dynptr_copy", SETUP_SYSCALL_SLEEP},
+	{"test_dynptr_copy_xdp", SETUP_XDP_PROG},
+	{"test_dynptr_memset_zero", SETUP_SYSCALL_SLEEP},
+	{"test_dynptr_memset_notzero", SETUP_SYSCALL_SLEEP},
+	{"test_dynptr_memset_zero_offset", SETUP_SYSCALL_SLEEP},
+	{"test_dynptr_memset_zero_adjusted", SETUP_SYSCALL_SLEEP},
+	{"test_dynptr_memset_overflow", SETUP_SYSCALL_SLEEP},
+	{"test_dynptr_memset_overflow_offset", SETUP_SYSCALL_SLEEP},
+	{"test_dynptr_memset_readonly", SETUP_SKB_PROG},
+	{"test_dynptr_memset_xdp_chunks", SETUP_XDP_PROG},
 	{"test_ringbuf", SETUP_SYSCALL_SLEEP},
 	{"test_skb_readonly", SETUP_SKB_PROG},
 	{"test_dynptr_skb_data", SETUP_SKB_PROG},
+	{"test_dynptr_skb_meta_data", SETUP_SKB_PROG},
+	{"test_dynptr_skb_meta_flags", SETUP_SKB_PROG},
 	{"test_adjust", SETUP_SYSCALL_SLEEP},
 	{"test_adjust_err", SETUP_SYSCALL_SLEEP},
 	{"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP},
@@ -30,10 +43,21 @@ static struct {
 	{"test_dynptr_skb_no_buff", SETUP_SKB_PROG},
 	{"test_dynptr_skb_strcmp", SETUP_SKB_PROG},
 	{"test_dynptr_skb_tp_btf", SETUP_SKB_PROG_TP},
+	{"test_probe_read_user_dynptr", SETUP_XDP_PROG},
+	{"test_probe_read_kernel_dynptr", SETUP_XDP_PROG},
+	{"test_probe_read_user_str_dynptr", SETUP_XDP_PROG},
+	{"test_probe_read_kernel_str_dynptr", SETUP_XDP_PROG},
+	{"test_copy_from_user_dynptr", SETUP_SYSCALL_SLEEP},
+	{"test_copy_from_user_str_dynptr", SETUP_SYSCALL_SLEEP},
+	{"test_copy_from_user_task_dynptr", SETUP_SYSCALL_SLEEP},
+	{"test_copy_from_user_task_str_dynptr", SETUP_SYSCALL_SLEEP},
 };
 
+#define PAGE_SIZE_64K 65536
+
 static void verify_success(const char *prog_name, enum test_setup_type setup_type)
 {
+	char user_data[384] = {[0 ... 382] = 'a', '\0'};
 	struct dynptr_success *skel;
 	struct bpf_program *prog;
 	struct bpf_link *link;
@@ -55,6 +79,10 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
 	if (!ASSERT_OK(err, "dynptr_success__load"))
 		goto cleanup;
 
+	skel->bss->user_ptr = user_data;
+	skel->data->test_len[0] = sizeof(user_data);
+	memcpy(skel->bss->expected_str, user_data, sizeof(user_data));
+
 	switch (setup_type) {
 	case SETUP_SYSCALL_SLEEP:
 		link = bpf_program__attach(prog);
@@ -120,6 +148,28 @@ static void verify_success(const char *prog_name, enum test_setup_type setup_typ
 
 		break;
 	}
+	case SETUP_XDP_PROG:
+	{
+		char data[90000];
+		int err, prog_fd;
+		LIBBPF_OPTS(bpf_test_run_opts, opts,
+			    .data_in = &data,
+			    .repeat = 1,
+		);
+
+		if (getpagesize() == PAGE_SIZE_64K)
+			opts.data_size_in = sizeof(data);
+		else
+			opts.data_size_in = 5000;
+
+		prog_fd = bpf_program__fd(prog);
+		err = bpf_prog_test_run_opts(prog_fd, &opts);
+
+		if (!ASSERT_OK(err, "test_run"))
+			goto cleanup;
+
+		break;
+	}
 	}
 
 	ASSERT_EQ(skel->bss->err, 0, "err");
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_array.c b/tools/testing/selftests/bpf/prog_tests/fd_array.c
new file mode 100644
index 000000000000..c534b4d5f9da
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fd_array.c
@@ -0,0 +1,441 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include <linux/btf.h>
+#include <bpf/bpf.h>
+
+#include "../test_btf.h"
+
+static inline int new_map(void)
+{
+	const char *name = NULL;
+	__u32 max_entries = 1;
+	__u32 value_size = 8;
+	__u32 key_size = 4;
+
+	return bpf_map_create(BPF_MAP_TYPE_ARRAY, name,
+			      key_size, value_size,
+			      max_entries, NULL);
+}
+
+static int new_btf(void)
+{
+	struct btf_blob {
+		struct btf_header btf_hdr;
+		__u32 types[8];
+		__u32 str;
+	} raw_btf = {
+		.btf_hdr = {
+			.magic = BTF_MAGIC,
+			.version = BTF_VERSION,
+			.hdr_len = sizeof(struct btf_header),
+			.type_len = sizeof(raw_btf.types),
+			.str_off = offsetof(struct btf_blob, str) - offsetof(struct btf_blob, types),
+			.str_len = sizeof(raw_btf.str),
+		},
+		.types = {
+			/* long */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8),  /* [1] */
+			/* unsigned long */
+			BTF_TYPE_INT_ENC(0, 0, 0, 64, 8),  /* [2] */
+		},
+	};
+
+	return bpf_btf_load(&raw_btf, sizeof(raw_btf), NULL);
+}
+
+#define Close(FD) do {		\
+	if ((FD) >= 0) {	\
+		close(FD);	\
+		FD = -1;	\
+	}			\
+} while(0)
+
+static bool map_exists(__u32 id)
+{
+	int fd;
+
+	fd = bpf_map_get_fd_by_id(id);
+	if (fd >= 0) {
+		close(fd);
+		return true;
+	}
+	return false;
+}
+
+static bool btf_exists(__u32 id)
+{
+	int fd;
+
+	fd = bpf_btf_get_fd_by_id(id);
+	if (fd >= 0) {
+		close(fd);
+		return true;
+	}
+	return false;
+}
+
+static inline int bpf_prog_get_map_ids(int prog_fd, __u32 *nr_map_ids, __u32 *map_ids)
+{
+	__u32 len = sizeof(struct bpf_prog_info);
+	struct bpf_prog_info info;
+	int err;
+
+	memset(&info, 0, len);
+	info.nr_map_ids = *nr_map_ids;
+	info.map_ids = ptr_to_u64(map_ids);
+
+	err = bpf_prog_get_info_by_fd(prog_fd, &info, &len);
+	if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+		return -1;
+
+	*nr_map_ids = info.nr_map_ids;
+
+	return 0;
+}
+
+static int __load_test_prog(int map_fd, const int *fd_array, int fd_array_cnt)
+{
+	/* A trivial program which uses one map */
+	struct bpf_insn insns[] = {
+		BPF_LD_MAP_FD(BPF_REG_1, map_fd),
+		BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	LIBBPF_OPTS(bpf_prog_load_opts, opts);
+
+	opts.fd_array = fd_array;
+	opts.fd_array_cnt = fd_array_cnt;
+
+	return bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, ARRAY_SIZE(insns), &opts);
+}
+
+static int load_test_prog(const int *fd_array, int fd_array_cnt)
+{
+	int map_fd;
+	int ret;
+
+	map_fd = new_map();
+	if (!ASSERT_GE(map_fd, 0, "new_map"))
+		return map_fd;
+
+	ret = __load_test_prog(map_fd, fd_array, fd_array_cnt);
+	close(map_fd);
+	return ret;
+}
+
+static bool check_expected_map_ids(int prog_fd, int expected, __u32 *map_ids, __u32 *nr_map_ids)
+{
+	int err;
+
+	err = bpf_prog_get_map_ids(prog_fd, nr_map_ids, map_ids);
+	if (!ASSERT_OK(err, "bpf_prog_get_map_ids"))
+		return false;
+	if (!ASSERT_EQ(*nr_map_ids, expected, "unexpected nr_map_ids"))
+		return false;
+
+	return true;
+}
+
+/*
+ * Load a program, which uses one map. No fd_array maps are present.
+ * On return only one map is expected to be bound to prog.
+ */
+static void check_fd_array_cnt__no_fd_array(void)
+{
+	__u32 map_ids[16];
+	__u32 nr_map_ids;
+	int prog_fd = -1;
+
+	prog_fd = load_test_prog(NULL, 0);
+	if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD"))
+		return;
+	nr_map_ids = ARRAY_SIZE(map_ids);
+	check_expected_map_ids(prog_fd, 1, map_ids, &nr_map_ids);
+	close(prog_fd);
+}
+
+/*
+ * Load a program, which uses one map, and pass two extra, non-equal, maps in
+ * fd_array with fd_array_cnt=2. On return three maps are expected to be bound
+ * to the program.
+ */
+static void check_fd_array_cnt__fd_array_ok(void)
+{
+	int extra_fds[2] = { -1, -1 };
+	__u32 map_ids[16];
+	__u32 nr_map_ids;
+	int prog_fd = -1;
+
+	extra_fds[0] = new_map();
+	if (!ASSERT_GE(extra_fds[0], 0, "new_map"))
+		goto cleanup;
+	extra_fds[1] = new_map();
+	if (!ASSERT_GE(extra_fds[1], 0, "new_map"))
+		goto cleanup;
+	prog_fd = load_test_prog(extra_fds, 2);
+	if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD"))
+		goto cleanup;
+	nr_map_ids = ARRAY_SIZE(map_ids);
+	if (!check_expected_map_ids(prog_fd, 3, map_ids, &nr_map_ids))
+		goto cleanup;
+
+	/* maps should still exist when original file descriptors are closed */
+	Close(extra_fds[0]);
+	Close(extra_fds[1]);
+	if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map_ids[0] should exist"))
+		goto cleanup;
+	if (!ASSERT_EQ(map_exists(map_ids[1]), true, "map_ids[1] should exist"))
+		goto cleanup;
+
+	/* some fds might be invalid, so ignore return codes */
+cleanup:
+	Close(extra_fds[1]);
+	Close(extra_fds[0]);
+	Close(prog_fd);
+}
+
+/*
+ * Load a program with a few extra maps duplicated in the fd_array.
+ * After the load maps should only be referenced once.
+ */
+static void check_fd_array_cnt__duplicated_maps(void)
+{
+	int extra_fds[4] = { -1, -1, -1, -1 };
+	__u32 map_ids[16];
+	__u32 nr_map_ids;
+	int prog_fd = -1;
+
+	extra_fds[0] = extra_fds[2] = new_map();
+	if (!ASSERT_GE(extra_fds[0], 0, "new_map"))
+		goto cleanup;
+	extra_fds[1] = extra_fds[3] = new_map();
+	if (!ASSERT_GE(extra_fds[1], 0, "new_map"))
+		goto cleanup;
+	prog_fd = load_test_prog(extra_fds, 4);
+	if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD"))
+		goto cleanup;
+	nr_map_ids = ARRAY_SIZE(map_ids);
+	if (!check_expected_map_ids(prog_fd, 3, map_ids, &nr_map_ids))
+		goto cleanup;
+
+	/* maps should still exist when original file descriptors are closed */
+	Close(extra_fds[0]);
+	Close(extra_fds[1]);
+	if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map should exist"))
+		goto cleanup;
+	if (!ASSERT_EQ(map_exists(map_ids[1]), true, "map should exist"))
+		goto cleanup;
+
+	/* some fds might be invalid, so ignore return codes */
+cleanup:
+	Close(extra_fds[1]);
+	Close(extra_fds[0]);
+	Close(prog_fd);
+}
+
+/*
+ * Check that if maps which are referenced by a program are
+ * passed in fd_array, then they will be referenced only once
+ */
+static void check_fd_array_cnt__referenced_maps_in_fd_array(void)
+{
+	int extra_fds[1] = { -1 };
+	__u32 map_ids[16];
+	__u32 nr_map_ids;
+	int prog_fd = -1;
+
+	extra_fds[0] = new_map();
+	if (!ASSERT_GE(extra_fds[0], 0, "new_map"))
+		goto cleanup;
+	prog_fd = __load_test_prog(extra_fds[0], extra_fds, 1);
+	if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD"))
+		goto cleanup;
+	nr_map_ids = ARRAY_SIZE(map_ids);
+	if (!check_expected_map_ids(prog_fd, 1, map_ids, &nr_map_ids))
+		goto cleanup;
+
+	/* map should still exist when original file descriptor is closed */
+	Close(extra_fds[0]);
+	if (!ASSERT_EQ(map_exists(map_ids[0]), true, "map should exist"))
+		goto cleanup;
+
+	/* some fds might be invalid, so ignore return codes */
+cleanup:
+	Close(extra_fds[0]);
+	Close(prog_fd);
+}
+
+static int get_btf_id_by_fd(int btf_fd, __u32 *id)
+{
+	struct bpf_btf_info info;
+	__u32 info_len = sizeof(info);
+	int err;
+
+	memset(&info, 0, info_len);
+	err = bpf_btf_get_info_by_fd(btf_fd, &info, &info_len);
+	if (err)
+		return err;
+	if (id)
+		*id = info.id;
+	return 0;
+}
+
+/*
+ * Check that fd_array operates properly for btfs. Namely, to check that
+ * passing a btf fd in fd_array increases its reference count, do the
+ * following:
+ *  1) Create a new btf, it's referenced only by a file descriptor, so refcnt=1
+ *  2) Load a BPF prog with fd_array[0] = btf_fd; now btf's refcnt=2
+ *  3) Close the btf_fd, now refcnt=1
+ * Wait and check that BTF still exists.
+ */
+static void check_fd_array_cnt__referenced_btfs(void)
+{
+	int extra_fds[1] = { -1 };
+	int prog_fd = -1;
+	__u32 btf_id;
+	int tries;
+	int err;
+
+	extra_fds[0] = new_btf();
+	if (!ASSERT_GE(extra_fds[0], 0, "new_btf"))
+		goto cleanup;
+	prog_fd = load_test_prog(extra_fds, 1);
+	if (!ASSERT_GE(prog_fd, 0, "BPF_PROG_LOAD"))
+		goto cleanup;
+
+	/* btf should still exist when original file descriptor is closed */
+	err = get_btf_id_by_fd(extra_fds[0], &btf_id);
+	if (!ASSERT_EQ(err, 0, "get_btf_id_by_fd"))
+		goto cleanup;
+
+	Close(extra_fds[0]);
+
+	if (!ASSERT_GE(kern_sync_rcu(), 0, "kern_sync_rcu 1"))
+		goto cleanup;
+
+	if (!ASSERT_EQ(btf_exists(btf_id), true, "btf should exist"))
+		goto cleanup;
+
+	Close(prog_fd);
+
+	/* The program is freed by a workqueue, so no reliable
+	 * way to sync, so just wait a bit (max ~1 second). */
+	for (tries = 100; tries >= 0; tries--) {
+		usleep(1000);
+
+		if (!btf_exists(btf_id))
+			break;
+
+		if (tries)
+			continue;
+
+		PRINT_FAIL("btf should have been freed");
+	}
+
+	/* some fds might be invalid, so ignore return codes */
+cleanup:
+	Close(extra_fds[0]);
+	Close(prog_fd);
+}
+
+/*
+ * Test that a program with trash in fd_array can't be loaded:
+ * only map and BTF file descriptors should be accepted.
+ */
+static void check_fd_array_cnt__fd_array_with_trash(void)
+{
+	int extra_fds[3] = { -1, -1, -1 };
+	int prog_fd = -1;
+
+	extra_fds[0] = new_map();
+	if (!ASSERT_GE(extra_fds[0], 0, "new_map"))
+		goto cleanup;
+	extra_fds[1] = new_btf();
+	if (!ASSERT_GE(extra_fds[1], 0, "new_btf"))
+		goto cleanup;
+
+	/* trash 1: not a file descriptor */
+	extra_fds[2] = 0xbeef;
+	prog_fd = load_test_prog(extra_fds, 3);
+	if (!ASSERT_EQ(prog_fd, -EBADF, "prog should have been rejected with -EBADF"))
+		goto cleanup;
+
+	/* trash 2: not a map or btf */
+	extra_fds[2] = socket(AF_INET, SOCK_STREAM, 0);
+	if (!ASSERT_GE(extra_fds[2], 0, "socket"))
+		goto cleanup;
+
+	prog_fd = load_test_prog(extra_fds, 3);
+	if (!ASSERT_EQ(prog_fd, -EINVAL, "prog should have been rejected with -EINVAL"))
+		goto cleanup;
+
+	/* Validate that the prog is ok if trash is removed */
+	Close(extra_fds[2]);
+	extra_fds[2] = new_btf();
+	if (!ASSERT_GE(extra_fds[2], 0, "new_btf"))
+		goto cleanup;
+
+	prog_fd = load_test_prog(extra_fds, 3);
+	if (!ASSERT_GE(prog_fd, 0, "prog should have been loaded"))
+		goto cleanup;
+
+	/* some fds might be invalid, so ignore return codes */
+cleanup:
+	Close(extra_fds[2]);
+	Close(extra_fds[1]);
+	Close(extra_fds[0]);
+}
+
+/*
+ * Test that a program with too big fd_array can't be loaded.
+ */
+static void check_fd_array_cnt__fd_array_too_big(void)
+{
+	int extra_fds[65];
+	int prog_fd = -1;
+	int i;
+
+	for (i = 0; i < 65; i++) {
+		extra_fds[i] = new_map();
+		if (!ASSERT_GE(extra_fds[i], 0, "new_map"))
+			goto cleanup_fds;
+	}
+
+	prog_fd = load_test_prog(extra_fds, 65);
+	ASSERT_EQ(prog_fd, -E2BIG, "prog should have been rejected with -E2BIG");
+
+cleanup_fds:
+	while (i > 0)
+		Close(extra_fds[--i]);
+}
+
+void test_fd_array_cnt(void)
+{
+	if (test__start_subtest("no-fd-array"))
+		check_fd_array_cnt__no_fd_array();
+
+	if (test__start_subtest("fd-array-ok"))
+		check_fd_array_cnt__fd_array_ok();
+
+	if (test__start_subtest("fd-array-dup-input"))
+		check_fd_array_cnt__duplicated_maps();
+
+	if (test__start_subtest("fd-array-ref-maps-in-array"))
+		check_fd_array_cnt__referenced_maps_in_fd_array();
+
+	if (test__start_subtest("fd-array-ref-btfs"))
+		check_fd_array_cnt__referenced_btfs();
+
+	if (test__start_subtest("fd-array-trash-input"))
+		check_fd_array_cnt__fd_array_with_trash();
+
+	if (test__start_subtest("fd-array-2big"))
+		check_fd_array_cnt__fd_array_too_big();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c
new file mode 100644
index 000000000000..ca46fdd6e1ae
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fd_htab_lookup.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <test_progs.h>
+#include "fd_htab_lookup.skel.h"
+
+struct htab_op_ctx {
+	int fd;
+	int loop;
+	unsigned int entries;
+	bool stop;
+};
+
+#define ERR_TO_RETVAL(where, err) ((void *)(long)(((where) << 12) | (-err)))
+
+static void *htab_lookup_fn(void *arg)
+{
+	struct htab_op_ctx *ctx = arg;
+	int i = 0;
+
+	while (i++ < ctx->loop && !ctx->stop) {
+		unsigned int j;
+
+		for (j = 0; j < ctx->entries; j++) {
+			unsigned int key = j, zero = 0, value;
+			int inner_fd, err;
+
+			err = bpf_map_lookup_elem(ctx->fd, &key, &value);
+			if (err) {
+				ctx->stop = true;
+				return ERR_TO_RETVAL(1, err);
+			}
+
+			inner_fd = bpf_map_get_fd_by_id(value);
+			if (inner_fd < 0) {
+				/* The old map has been freed */
+				if (inner_fd == -ENOENT)
+					continue;
+				ctx->stop = true;
+				return ERR_TO_RETVAL(2, inner_fd);
+			}
+
+			err = bpf_map_lookup_elem(inner_fd, &zero, &value);
+			if (err) {
+				close(inner_fd);
+				ctx->stop = true;
+				return ERR_TO_RETVAL(3, err);
+			}
+			close(inner_fd);
+
+			if (value != key) {
+				ctx->stop = true;
+				return ERR_TO_RETVAL(4, -EINVAL);
+			}
+		}
+	}
+
+	return NULL;
+}
+
+static void *htab_update_fn(void *arg)
+{
+	struct htab_op_ctx *ctx = arg;
+	int i = 0;
+
+	while (i++ < ctx->loop && !ctx->stop) {
+		unsigned int j;
+
+		for (j = 0; j < ctx->entries; j++) {
+			unsigned int key = j, zero = 0;
+			int inner_fd, err;
+
+			inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+			if (inner_fd < 0) {
+				ctx->stop = true;
+				return ERR_TO_RETVAL(1, inner_fd);
+			}
+
+			err = bpf_map_update_elem(inner_fd, &zero, &key, 0);
+			if (err) {
+				close(inner_fd);
+				ctx->stop = true;
+				return ERR_TO_RETVAL(2, err);
+			}
+
+			err = bpf_map_update_elem(ctx->fd, &key, &inner_fd, BPF_EXIST);
+			if (err) {
+				close(inner_fd);
+				ctx->stop = true;
+				return ERR_TO_RETVAL(3, err);
+			}
+			close(inner_fd);
+		}
+	}
+
+	return NULL;
+}
+
+static int setup_htab(int fd, unsigned int entries)
+{
+	unsigned int i;
+
+	for (i = 0; i < entries; i++) {
+		unsigned int key = i, zero = 0;
+		int inner_fd, err;
+
+		inner_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+		if (!ASSERT_OK_FD(inner_fd, "new array"))
+			return -1;
+
+		err = bpf_map_update_elem(inner_fd, &zero, &key, 0);
+		if (!ASSERT_OK(err, "init array")) {
+			close(inner_fd);
+			return -1;
+		}
+
+		err = bpf_map_update_elem(fd, &key, &inner_fd, 0);
+		if (!ASSERT_OK(err, "init outer")) {
+			close(inner_fd);
+			return -1;
+		}
+		close(inner_fd);
+	}
+
+	return 0;
+}
+
+static int get_int_from_env(const char *name, int dft)
+{
+	const char *value;
+
+	value = getenv(name);
+	if (!value)
+		return dft;
+
+	return atoi(value);
+}
+
+void test_fd_htab_lookup(void)
+{
+	unsigned int i, wr_nr = 8, rd_nr = 16;
+	pthread_t tids[wr_nr + rd_nr];
+	struct fd_htab_lookup *skel;
+	struct htab_op_ctx ctx;
+	int err;
+
+	skel = fd_htab_lookup__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "fd_htab_lookup__open_and_load"))
+		return;
+
+	ctx.fd = bpf_map__fd(skel->maps.outer_map);
+	ctx.loop = get_int_from_env("FD_HTAB_LOOP_NR", 5);
+	ctx.stop = false;
+	ctx.entries = 8;
+
+	err = setup_htab(ctx.fd, ctx.entries);
+	if (err)
+		goto destroy;
+
+	memset(tids, 0, sizeof(tids));
+	for (i = 0; i < wr_nr; i++) {
+		err = pthread_create(&tids[i], NULL, htab_update_fn, &ctx);
+		if (!ASSERT_OK(err, "pthread_create")) {
+			ctx.stop = true;
+			goto reap;
+		}
+	}
+	for (i = 0; i < rd_nr; i++) {
+		err = pthread_create(&tids[i + wr_nr], NULL, htab_lookup_fn, &ctx);
+		if (!ASSERT_OK(err, "pthread_create")) {
+			ctx.stop = true;
+			goto reap;
+		}
+	}
+
+reap:
+	for (i = 0; i < wr_nr + rd_nr; i++) {
+		void *ret = NULL;
+		char desc[32];
+
+		if (!tids[i])
+			continue;
+
+		snprintf(desc, sizeof(desc), "thread %u", i + 1);
+		err = pthread_join(tids[i], &ret);
+		ASSERT_OK(err, desc);
+		ASSERT_EQ(ret, NULL, desc);
+	}
+destroy:
+	fd_htab_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 130f5b82d2e6..5ef1804e44df 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -12,13 +12,24 @@ void test_fentry_fexit(void)
 	int err, prog_fd, i;
 	LIBBPF_OPTS(bpf_test_run_opts, topts);
 
-	fentry_skel = fentry_test_lskel__open_and_load();
+	fentry_skel = fentry_test_lskel__open();
 	if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
 		goto close_prog;
-	fexit_skel = fexit_test_lskel__open_and_load();
+
+	fentry_skel->keyring_id	= KEY_SPEC_SESSION_KEYRING;
+	err = fentry_test_lskel__load(fentry_skel);
+	if (!ASSERT_OK(err, "fentry_skel_load"))
+		goto close_prog;
+
+	fexit_skel = fexit_test_lskel__open();
 	if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
 		goto close_prog;
 
+	fexit_skel->keyring_id	= KEY_SPEC_SESSION_KEYRING;
+	err = fexit_test_lskel__load(fexit_skel);
+	if (!ASSERT_OK(err, "fexit_skel_load"))
+		goto close_prog;
+
 	err = fentry_test_lskel__attach(fentry_skel);
 	if (!ASSERT_OK(err, "fentry_attach"))
 		goto close_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index aee1bc77a17f..ec882328eb59 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -43,8 +43,13 @@ static void fentry_test(void)
 	struct fentry_test_lskel *fentry_skel = NULL;
 	int err;
 
-	fentry_skel = fentry_test_lskel__open_and_load();
-	if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
+	fentry_skel = fentry_test_lskel__open();
+	if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_open"))
+		goto cleanup;
+
+	fentry_skel->keyring_id	= KEY_SPEC_SESSION_KEYRING;
+	err = fentry_test_lskel__load(fentry_skel);
+	if (!ASSERT_OK(err, "fentry_skel_load"))
 		goto cleanup;
 
 	err = fentry_test_common(fentry_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index 1c13007e37dd..94eed753560c 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -43,8 +43,13 @@ static void fexit_test(void)
 	struct fexit_test_lskel *fexit_skel = NULL;
 	int err;
 
-	fexit_skel = fexit_test_lskel__open_and_load();
-	if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
+	fexit_skel = fexit_test_lskel__open();
+	if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_open"))
+		goto cleanup;
+
+	fexit_skel->keyring_id	= KEY_SPEC_SESSION_KEYRING;
+	err = fexit_test_lskel__load(fexit_skel);
+	if (!ASSERT_OK(err, "fexit_skel_load"))
 		goto cleanup;
 
 	err = fexit_test_common(fexit_skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/file_reader.c b/tools/testing/selftests/bpf/prog_tests/file_reader.c
new file mode 100644
index 000000000000..5cde32b35da4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/file_reader.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "file_reader.skel.h"
+#include "file_reader_fail.skel.h"
+#include <dlfcn.h>
+#include <sys/mman.h>
+
+const char *user_ptr = "hello world";
+char file_contents[256000];
+
+void *get_executable_base_addr(void)
+{
+	Dl_info info;
+
+	if (!dladdr((void *)&get_executable_base_addr, &info)) {
+		fprintf(stderr, "dladdr failed\n");
+		return NULL;
+	}
+
+	return info.dli_fbase;
+}
+
+static int initialize_file_contents(void)
+{
+	int fd, page_sz = sysconf(_SC_PAGESIZE);
+	ssize_t n = 0, cur, off;
+	void *addr;
+
+	fd = open("/proc/self/exe", O_RDONLY);
+	if (!ASSERT_OK_FD(fd, "Open /proc/self/exe\n"))
+		return 1;
+
+	do {
+		cur = read(fd, file_contents + n, sizeof(file_contents) - n);
+		if (!ASSERT_GT(cur, 0, "read success"))
+			break;
+		n += cur;
+	} while (n < sizeof(file_contents));
+
+	close(fd);
+
+	if (!ASSERT_EQ(n, sizeof(file_contents), "Read /proc/self/exe\n"))
+		return 1;
+
+	addr = get_executable_base_addr();
+	if (!ASSERT_NEQ(addr, NULL, "get executable address"))
+		return 1;
+
+	/* page-align base file address */
+	addr = (void *)((unsigned long)addr & ~(page_sz - 1));
+
+	/*
+	 * Page out range 0..512K, use 0..256K for positive tests and
+	 * 256K..512K for negative tests expecting page faults
+	 */
+	for (off = 0; off < sizeof(file_contents) * 2; off += page_sz) {
+		if (!ASSERT_OK(madvise(addr + off, page_sz, MADV_PAGEOUT),
+			       "madvise pageout"))
+			return errno;
+	}
+
+	return 0;
+}
+
+static void run_test(const char *prog_name)
+{
+	struct file_reader *skel;
+	struct bpf_program *prog;
+	int err, fd;
+
+	err = initialize_file_contents();
+	if (!ASSERT_OK(err, "initialize file contents"))
+		return;
+
+	skel = file_reader__open();
+	if (!ASSERT_OK_PTR(skel, "file_reader__open"))
+		return;
+
+	bpf_object__for_each_program(prog, skel->obj) {
+		bpf_program__set_autoload(prog, strcmp(bpf_program__name(prog), prog_name) == 0);
+	}
+
+	memcpy(skel->bss->user_buf, file_contents, sizeof(file_contents));
+	skel->bss->pid = getpid();
+
+	err = file_reader__load(skel);
+	if (!ASSERT_OK(err, "file_reader__load"))
+		goto cleanup;
+
+	err = file_reader__attach(skel);
+	if (!ASSERT_OK(err, "file_reader__attach"))
+		goto cleanup;
+
+	fd = open("/proc/self/exe", O_RDONLY);
+	if (fd >= 0)
+		close(fd);
+
+	ASSERT_EQ(skel->bss->err, 0, "err");
+	ASSERT_EQ(skel->bss->run_success, 1, "run_success");
+cleanup:
+	file_reader__destroy(skel);
+}
+
+void test_file_reader(void)
+{
+	if (test__start_subtest("on_open_expect_fault"))
+		run_test("on_open_expect_fault");
+
+	if (test__start_subtest("on_open_validate_file_read"))
+		run_test("on_open_validate_file_read");
+
+	if (test__start_subtest("negative"))
+		RUN_TESTS(file_reader_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
index d50cbd8040d4..e40114620751 100644
--- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
@@ -37,6 +37,7 @@ static noinline void uprobe_func(void)
 static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr,
 				 ssize_t offset, ssize_t entry_offset)
 {
+	ssize_t ref_ctr_offset = entry_offset /* ref_ctr_offset for uprobes */;
 	struct bpf_link_info info;
 	__u32 len = sizeof(info);
 	char buf[PATH_MAX];
@@ -97,6 +98,7 @@ again:
 	case BPF_PERF_EVENT_UPROBE:
 	case BPF_PERF_EVENT_URETPROBE:
 		ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset");
+		ASSERT_EQ(info.perf_event.uprobe.ref_ctr_offset, ref_ctr_offset, "uprobe_ref_ctr_offset");
 
 		ASSERT_EQ(info.perf_event.uprobe.name_len, strlen(UPROBE_FILE) + 1,
 				  "name_len");
@@ -171,6 +173,10 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel,
 		/* See also arch_adjust_kprobe_addr(). */
 		if (skel->kconfig->CONFIG_X86_KERNEL_IBT)
 			entry_offset = 4;
+		if (skel->kconfig->CONFIG_PPC64 &&
+		    skel->kconfig->CONFIG_KPROBES_ON_FTRACE &&
+		    !skel->kconfig->CONFIG_PPC_FTRACE_OUT_OF_LINE)
+			entry_offset = 4;
 		err = verify_perf_link_info(link_fd, type, kprobe_addr, 0, entry_offset);
 		ASSERT_OK(err, "verify_perf_link_info");
 	} else {
@@ -237,20 +243,32 @@ static void test_uprobe_fill_link_info(struct test_fill_link_info *skel,
 		.retprobe = type == BPF_PERF_EVENT_URETPROBE,
 		.bpf_cookie = PERF_EVENT_COOKIE,
 	);
+	const char *sema[1] = {
+		"uprobe_link_info_sema_1",
+	};
+	__u64 *ref_ctr_offset;
 	struct bpf_link *link;
 	int link_fd, err;
 
+	err = elf_resolve_syms_offsets("/proc/self/exe", 1, sema,
+				       (unsigned long **) &ref_ctr_offset, STT_OBJECT);
+	if (!ASSERT_OK(err, "elf_resolve_syms_offsets_object"))
+		return;
+
+	opts.ref_ctr_offset = *ref_ctr_offset;
 	link = bpf_program__attach_uprobe_opts(skel->progs.uprobe_run,
 					       0, /* self pid */
 					       UPROBE_FILE, uprobe_offset,
 					       &opts);
 	if (!ASSERT_OK_PTR(link, "attach_uprobe"))
-		return;
+		goto out;
 
 	link_fd = bpf_link__fd(link);
-	err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, 0);
+	err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, *ref_ctr_offset);
 	ASSERT_OK(err, "verify_perf_link_info");
 	bpf_link__destroy(link);
+out:
+	free(ref_ctr_offset);
 }
 
 static int verify_kmulti_link_info(int fd, bool retprobe, bool has_cookies)
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index cfcc90cb7ffb..08bae13248c4 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -7,39 +7,14 @@
 
 #include "bpf_flow.skel.h"
 
+#define TEST_NS	"flow_dissector_ns"
 #define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */
+#define TEST_NAME_MAX_LEN	64
 
 #ifndef IP_MF
 #define IP_MF 0x2000
 #endif
 
-#define CHECK_FLOW_KEYS(desc, got, expected)				\
-	_CHECK(memcmp(&got, &expected, sizeof(got)) != 0,		\
-	      desc,							\
-	      topts.duration,						\
-	      "nhoff=%u/%u "						\
-	      "thoff=%u/%u "						\
-	      "addr_proto=0x%x/0x%x "					\
-	      "is_frag=%u/%u "						\
-	      "is_first_frag=%u/%u "					\
-	      "is_encap=%u/%u "						\
-	      "ip_proto=0x%x/0x%x "					\
-	      "n_proto=0x%x/0x%x "					\
-	      "flow_label=0x%x/0x%x "					\
-	      "sport=%u/%u "						\
-	      "dport=%u/%u\n",						\
-	      got.nhoff, expected.nhoff,				\
-	      got.thoff, expected.thoff,				\
-	      got.addr_proto, expected.addr_proto,			\
-	      got.is_frag, expected.is_frag,				\
-	      got.is_first_frag, expected.is_first_frag,		\
-	      got.is_encap, expected.is_encap,				\
-	      got.ip_proto, expected.ip_proto,				\
-	      got.n_proto, expected.n_proto,				\
-	      got.flow_label, expected.flow_label,			\
-	      got.sport, expected.sport,				\
-	      got.dport, expected.dport)
-
 struct ipv4_pkt {
 	struct ethhdr eth;
 	struct iphdr iph;
@@ -89,6 +64,19 @@ struct dvlan_ipv6_pkt {
 	struct tcphdr tcp;
 } __packed;
 
+struct gre_base_hdr {
+	__be16 flags;
+	__be16 protocol;
+} gre_base_hdr;
+
+struct gre_minimal_pkt {
+	struct ethhdr eth;
+	struct iphdr iph;
+	struct gre_base_hdr gre_hdr;
+	struct iphdr iph_inner;
+	struct tcphdr tcp;
+} __packed;
+
 struct test {
 	const char *name;
 	union {
@@ -98,6 +86,7 @@ struct test {
 		struct ipv6_pkt ipv6;
 		struct ipv6_frag_pkt ipv6_frag;
 		struct dvlan_ipv6_pkt dvlan_ipv6;
+		struct gre_minimal_pkt gre_minimal;
 	} pkt;
 	struct bpf_flow_keys keys;
 	__u32 flags;
@@ -106,7 +95,6 @@ struct test {
 
 #define VLAN_HLEN	4
 
-static __u32 duration;
 struct test tests[] = {
 	{
 		.name = "ipv4",
@@ -444,8 +432,137 @@ struct test tests[] = {
 		},
 		.retval = BPF_FLOW_DISSECTOR_CONTINUE,
 	},
+	{
+		.name = "ip-gre",
+		.pkt.gre_minimal = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+			.iph.ihl = 5,
+			.iph.protocol = IPPROTO_GRE,
+			.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+			.gre_hdr = {
+				.flags = 0,
+				.protocol = __bpf_constant_htons(ETH_P_IP),
+			},
+			.iph_inner.ihl = 5,
+			.iph_inner.protocol = IPPROTO_TCP,
+			.iph_inner.tot_len =
+				__bpf_constant_htons(MAGIC_BYTES -
+				sizeof(struct iphdr)),
+			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
+		},
+		.keys = {
+			.nhoff = ETH_HLEN,
+			.thoff = ETH_HLEN + sizeof(struct iphdr) * 2 +
+				 sizeof(struct gre_base_hdr),
+			.addr_proto = ETH_P_IP,
+			.ip_proto = IPPROTO_TCP,
+			.n_proto = __bpf_constant_htons(ETH_P_IP),
+			.is_encap = true,
+			.sport = 80,
+			.dport = 8080,
+		},
+		.retval = BPF_OK,
+	},
+	{
+		.name = "ip-gre-no-encap",
+		.pkt.ipip = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+			.iph.ihl = 5,
+			.iph.protocol = IPPROTO_GRE,
+			.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+			.iph_inner.ihl = 5,
+			.iph_inner.protocol = IPPROTO_TCP,
+			.iph_inner.tot_len =
+				__bpf_constant_htons(MAGIC_BYTES -
+				sizeof(struct iphdr)),
+			.tcp.doff = 5,
+			.tcp.source = 80,
+			.tcp.dest = 8080,
+		},
+		.keys = {
+			.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
+			.nhoff = ETH_HLEN,
+			.thoff = ETH_HLEN + sizeof(struct iphdr)
+				 + sizeof(struct gre_base_hdr),
+			.addr_proto = ETH_P_IP,
+			.ip_proto = IPPROTO_GRE,
+			.n_proto = __bpf_constant_htons(ETH_P_IP),
+			.is_encap = true,
+		},
+		.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
+		.retval = BPF_OK,
+	},
 };
 
+void serial_test_flow_dissector_namespace(void)
+{
+	struct bpf_flow *skel;
+	struct nstoken *ns;
+	int err, prog_fd;
+
+	skel = bpf_flow__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open/load skeleton"))
+		return;
+
+	prog_fd = bpf_program__fd(skel->progs._dissect);
+	if (!ASSERT_OK_FD(prog_fd, "get dissector fd"))
+		goto out_destroy_skel;
+
+	/* We must be able to attach a flow dissector to root namespace */
+	err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+	if (!ASSERT_OK(err, "attach on root namespace ok"))
+		goto out_destroy_skel;
+
+	err = make_netns(TEST_NS);
+	if (!ASSERT_OK(err, "create non-root net namespace"))
+		goto out_destroy_skel;
+
+	/* We must not be able to additionally attach a flow dissector to a
+	 * non-root net namespace
+	 */
+	ns = open_netns(TEST_NS);
+	if (!ASSERT_OK_PTR(ns, "enter non-root net namespace"))
+		goto out_clean_ns;
+	err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+	if (!ASSERT_ERR(err,
+			"refuse new flow dissector in non-root net namespace"))
+		bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
+	else
+		ASSERT_EQ(errno, EEXIST,
+			  "refused because of already attached prog");
+	close_netns(ns);
+
+	/* If no flow dissector is attached to the root namespace, we must
+	 * be able to attach one to a non-root net namespace
+	 */
+	bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
+	ns = open_netns(TEST_NS);
+	ASSERT_OK_PTR(ns, "enter non-root net namespace");
+	err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+	close_netns(ns);
+	ASSERT_OK(err, "accept new flow dissector in non-root net namespace");
+
+	/* If a flow dissector is attached to non-root net namespace, attaching
+	 * a flow dissector to root namespace must fail
+	 */
+	err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+	if (!ASSERT_ERR(err, "refuse new flow dissector on root namespace"))
+		bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
+	else
+		ASSERT_EQ(errno, EEXIST,
+			  "refused because of already attached prog");
+
+	ns = open_netns(TEST_NS);
+	bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
+	close_netns(ns);
+out_clean_ns:
+	remove_netns(TEST_NS);
+out_destroy_skel:
+	bpf_flow__destroy(skel);
+}
+
 static int create_tap(const char *ifname)
 {
 	struct ifreq ifr = {
@@ -533,22 +650,27 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
 	return 0;
 }
 
-static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
+static void run_tests_skb_less(int tap_fd, struct bpf_map *keys,
+			       char *test_suffix)
 {
+	char test_name[TEST_NAME_MAX_LEN];
 	int i, err, keys_fd;
 
 	keys_fd = bpf_map__fd(keys);
-	if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
+	if (!ASSERT_OK_FD(keys_fd, "bpf_map__fd"))
 		return;
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		/* Keep in sync with 'flags' from eth_get_headlen. */
 		__u32 eth_get_headlen_flags =
 			BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
-		LIBBPF_OPTS(bpf_test_run_opts, topts);
 		struct bpf_flow_keys flow_keys = {};
 		__u32 key = (__u32)(tests[i].keys.sport) << 16 |
 			    tests[i].keys.dport;
+		snprintf(test_name, TEST_NAME_MAX_LEN, "%s-%s", tests[i].name,
+			 test_suffix);
+		if (!test__start_subtest(test_name))
+			continue;
 
 		/* For skb-less case we can't pass input flags; run
 		 * only the tests that have a matching set of flags.
@@ -558,78 +680,139 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
 			continue;
 
 		err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
-		CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
+		if (!ASSERT_EQ(err, sizeof(tests[i].pkt), "tx_tap"))
+			continue;
 
 		/* check the stored flow_keys only if BPF_OK expected */
 		if (tests[i].retval != BPF_OK)
 			continue;
 
 		err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
-		ASSERT_OK(err, "bpf_map_lookup_elem");
+		if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+			continue;
 
-		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
+		ASSERT_MEMEQ(&flow_keys, &tests[i].keys,
+			     sizeof(struct bpf_flow_keys),
+			     "returned flow keys");
 
 		err = bpf_map_delete_elem(keys_fd, &key);
 		ASSERT_OK(err, "bpf_map_delete_elem");
 	}
 }
 
-static void test_skb_less_prog_attach(struct bpf_flow *skel, int tap_fd)
+void test_flow_dissector_skb_less_direct_attach(void)
 {
-	int err, prog_fd;
+	int err, prog_fd, tap_fd;
+	struct bpf_flow *skel;
+	struct netns_obj *ns;
 
-	prog_fd = bpf_program__fd(skel->progs._dissect);
-	if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
+	ns = netns_new("flow_dissector_skb_less_indirect_attach_ns", true);
+	if (!ASSERT_OK_PTR(ns, "create and open netns"))
 		return;
 
+	skel = bpf_flow__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open/load skeleton"))
+		goto out_clean_ns;
+
+	err = init_prog_array(skel->obj, skel->maps.jmp_table);
+	if (!ASSERT_OK(err, "init_prog_array"))
+		goto out_destroy_skel;
+
+	prog_fd = bpf_program__fd(skel->progs._dissect);
+	if (!ASSERT_OK_FD(prog_fd, "bpf_program__fd"))
+		goto out_destroy_skel;
+
 	err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
-	if (CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno))
-		return;
+	if (!ASSERT_OK(err, "bpf_prog_attach"))
+		goto out_destroy_skel;
+
+	tap_fd = create_tap("tap0");
+	if (!ASSERT_OK_FD(tap_fd, "create_tap"))
+		goto out_destroy_skel;
+	err = ifup("tap0");
+	if (!ASSERT_OK(err, "ifup"))
+		goto out_close_tap;
 
-	run_tests_skb_less(tap_fd, skel->maps.last_dissection);
+	run_tests_skb_less(tap_fd, skel->maps.last_dissection,
+			   "non-skb-direct-attach");
 
 	err = bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
-	CHECK(err, "bpf_prog_detach2", "err %d errno %d\n", err, errno);
+	ASSERT_OK(err, "bpf_prog_detach2");
+
+out_close_tap:
+	close(tap_fd);
+out_destroy_skel:
+	bpf_flow__destroy(skel);
+out_clean_ns:
+	netns_free(ns);
 }
 
-static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
+void test_flow_dissector_skb_less_indirect_attach(void)
 {
+	int err, net_fd, tap_fd;
+	struct bpf_flow *skel;
 	struct bpf_link *link;
-	int err, net_fd;
+	struct netns_obj *ns;
 
-	net_fd = open("/proc/self/ns/net", O_RDONLY);
-	if (CHECK(net_fd < 0, "open(/proc/self/ns/net)", "err %d\n", errno))
+	ns = netns_new("flow_dissector_skb_less_indirect_attach_ns", true);
+	if (!ASSERT_OK_PTR(ns, "create and open netns"))
 		return;
 
+	skel = bpf_flow__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open/load skeleton"))
+		goto out_clean_ns;
+
+	net_fd = open("/proc/self/ns/net", O_RDONLY);
+	if (!ASSERT_OK_FD(net_fd, "open(/proc/self/ns/net"))
+		goto out_destroy_skel;
+
+	err = init_prog_array(skel->obj, skel->maps.jmp_table);
+	if (!ASSERT_OK(err, "init_prog_array"))
+		goto out_destroy_skel;
+
+	tap_fd = create_tap("tap0");
+	if (!ASSERT_OK_FD(tap_fd, "create_tap"))
+		goto out_close_ns;
+	err = ifup("tap0");
+	if (!ASSERT_OK(err, "ifup"))
+		goto out_close_tap;
+
 	link = bpf_program__attach_netns(skel->progs._dissect, net_fd);
 	if (!ASSERT_OK_PTR(link, "attach_netns"))
-		goto out_close;
+		goto out_close_tap;
 
-	run_tests_skb_less(tap_fd, skel->maps.last_dissection);
+	run_tests_skb_less(tap_fd, skel->maps.last_dissection,
+			   "non-skb-indirect-attach");
 
 	err = bpf_link__destroy(link);
-	CHECK(err, "bpf_link__destroy", "err %d\n", err);
-out_close:
+	ASSERT_OK(err, "bpf_link__destroy");
+
+out_close_tap:
+	close(tap_fd);
+out_close_ns:
 	close(net_fd);
+out_destroy_skel:
+	bpf_flow__destroy(skel);
+out_clean_ns:
+	netns_free(ns);
 }
 
-void test_flow_dissector(void)
+void test_flow_dissector_skb(void)
 {
-	int i, err, prog_fd, keys_fd = -1, tap_fd;
+	char test_name[TEST_NAME_MAX_LEN];
 	struct bpf_flow *skel;
+	int i, err, prog_fd;
 
 	skel = bpf_flow__open_and_load();
-	if (CHECK(!skel, "skel", "failed to open/load skeleton\n"))
+	if (!ASSERT_OK_PTR(skel, "open/load skeleton"))
 		return;
 
-	prog_fd = bpf_program__fd(skel->progs._dissect);
-	if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
-		goto out_destroy_skel;
-	keys_fd = bpf_map__fd(skel->maps.last_dissection);
-	if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
-		goto out_destroy_skel;
 	err = init_prog_array(skel->obj, skel->maps.jmp_table);
-	if (CHECK(err, "init_prog_array", "err %d\n", err))
+	if (!ASSERT_OK(err, "init_prog_array"))
+		goto out_destroy_skel;
+
+	prog_fd = bpf_program__fd(skel->progs._dissect);
+	if (!ASSERT_OK_FD(prog_fd, "bpf_program__fd"))
 		goto out_destroy_skel;
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
@@ -641,6 +824,10 @@ void test_flow_dissector(void)
 		);
 		static struct bpf_flow_keys ctx = {};
 
+		snprintf(test_name, TEST_NAME_MAX_LEN, "%s-skb", tests[i].name);
+		if (!test__start_subtest(test_name))
+			continue;
+
 		if (tests[i].flags) {
 			topts.ctx_in = &ctx;
 			topts.ctx_size_in = sizeof(ctx);
@@ -656,26 +843,12 @@ void test_flow_dissector(void)
 			continue;
 		ASSERT_EQ(topts.data_size_out, sizeof(flow_keys),
 			  "test_run data_size_out");
-		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
+		ASSERT_MEMEQ(&flow_keys, &tests[i].keys,
+			     sizeof(struct bpf_flow_keys),
+			     "returned flow keys");
 	}
 
-	/* Do the same tests but for skb-less flow dissector.
-	 * We use a known path in the net/tun driver that calls
-	 * eth_get_headlen and we manually export bpf_flow_keys
-	 * via BPF map in this case.
-	 */
-
-	tap_fd = create_tap("tap0");
-	CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d\n", tap_fd, errno);
-	err = ifup("tap0");
-	CHECK(err, "ifup", "err %d errno %d\n", err, errno);
-
-	/* Test direct prog attachment */
-	test_skb_less_prog_attach(skel, tap_fd);
-	/* Test indirect prog attachment via link */
-	test_skb_less_link_create(skel, tap_fd);
-
-	close(tap_fd);
 out_destroy_skel:
 	bpf_flow__destroy(skel);
 }
+
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
new file mode 100644
index 000000000000..80b153d3ddec
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_classification.c
@@ -0,0 +1,797 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <bpf/bpf.h>
+#include <linux/bpf.h>
+#include <bpf/libbpf.h>
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "bpf_util.h"
+#include "bpf_flow.skel.h"
+
+#define CFG_PORT_INNER 8000
+#define CFG_PORT_GUE 6080
+#define SUBTEST_NAME_MAX_LEN 32
+#define TEST_NAME_MAX_LEN (32 + SUBTEST_NAME_MAX_LEN)
+#define MAX_SOURCE_PORTS 3
+#define TEST_PACKETS_COUNT 10
+#define TEST_PACKET_LEN 100
+#define TEST_PACKET_PATTERN 'a'
+#define TEST_IPV4 "192.168.0.1/32"
+#define TEST_IPV6 "100::a/128"
+#define TEST_TUNNEL_REMOTE "127.0.0.2"
+#define TEST_TUNNEL_LOCAL "127.0.0.1"
+
+#define INIT_ADDR4(addr4, port)					\
+	{							\
+		.sin_family = AF_INET,				\
+		.sin_port = __constant_htons(port),		\
+		.sin_addr.s_addr = __constant_htonl(addr4),	\
+	}
+
+#define INIT_ADDR6(addr6, port)				\
+	{						\
+		.sin6_family = AF_INET6,		\
+		.sin6_port = __constant_htons(port),	\
+		.sin6_addr = addr6,			\
+	}
+#define TEST_IN4_SRC_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK + 2, 0)
+#define TEST_IN4_DST_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK, CFG_PORT_INNER)
+#define TEST_OUT4_SRC_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK + 1, 0)
+#define TEST_OUT4_DST_ADDR_DEFAULT INIT_ADDR4(INADDR_LOOPBACK, 0)
+
+#define TEST_IN6_SRC_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0)
+#define TEST_IN6_DST_ADDR_DEFAULT \
+	INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER)
+#define TEST_OUT6_SRC_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0)
+#define TEST_OUT6_DST_ADDR_DEFAULT INIT_ADDR6(IN6ADDR_LOOPBACK_INIT, 0)
+
+#define TEST_IN4_SRC_ADDR_DISSECT_CONTINUE INIT_ADDR4(INADDR_LOOPBACK + 126, 0)
+#define TEST_IN4_SRC_ADDR_IPIP INIT_ADDR4((in_addr_t)0x01010101, 0)
+#define TEST_IN4_DST_ADDR_IPIP INIT_ADDR4((in_addr_t)0xC0A80001, CFG_PORT_INNER)
+
+struct grehdr {
+	uint16_t unused;
+	uint16_t protocol;
+} __packed;
+
+struct guehdr {
+	union {
+		struct {
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+			__u8 hlen : 5, control : 1, version : 2;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+			__u8 version : 2, control : 1, hlen : 5;
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+			__u8 proto_ctype;
+			__be16 flags;
+		};
+		__be32 word;
+	};
+};
+
+static char buf[ETH_DATA_LEN];
+
+struct test_configuration {
+	char name[SUBTEST_NAME_MAX_LEN];
+	int (*test_setup)(void);
+	void (*test_teardown)(void);
+	int source_ports[MAX_SOURCE_PORTS];
+	int cfg_l3_inner;
+	struct sockaddr_in in_saddr4;
+	struct sockaddr_in in_daddr4;
+	struct sockaddr_in6 in_saddr6;
+	struct sockaddr_in6 in_daddr6;
+	int cfg_l3_outer;
+	struct sockaddr_in out_saddr4;
+	struct sockaddr_in out_daddr4;
+	struct sockaddr_in6 out_saddr6;
+	struct sockaddr_in6 out_daddr6;
+	int cfg_encap_proto;
+	uint8_t cfg_dsfield_inner;
+	uint8_t cfg_dsfield_outer;
+	int cfg_l3_extra;
+	struct sockaddr_in extra_saddr4;
+	struct sockaddr_in extra_daddr4;
+	struct sockaddr_in6 extra_saddr6;
+	struct sockaddr_in6 extra_daddr6;
+};
+
+static unsigned long util_gettime(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void build_ipv4_header(void *header, uint8_t proto, uint32_t src,
+			      uint32_t dst, int payload_len, uint8_t tos)
+{
+	struct iphdr *iph = header;
+
+	iph->ihl = 5;
+	iph->version = 4;
+	iph->tos = tos;
+	iph->ttl = 8;
+	iph->tot_len = htons(sizeof(*iph) + payload_len);
+	iph->id = htons(1337);
+	iph->protocol = proto;
+	iph->saddr = src;
+	iph->daddr = dst;
+	iph->check = build_ip_csum((void *)iph);
+}
+
+static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
+{
+	uint16_t val, *ptr = (uint16_t *)ip6h;
+
+	val = ntohs(*ptr);
+	val &= 0xF00F;
+	val |= ((uint16_t)dsfield) << 4;
+	*ptr = htons(val);
+}
+
+static void build_ipv6_header(void *header, uint8_t proto,
+			      const struct sockaddr_in6 *src,
+			      const struct sockaddr_in6 *dst, int payload_len,
+			      uint8_t dsfield)
+{
+	struct ipv6hdr *ip6h = header;
+
+	ip6h->version = 6;
+	ip6h->payload_len = htons(payload_len);
+	ip6h->nexthdr = proto;
+	ip6h->hop_limit = 8;
+	ipv6_set_dsfield(ip6h, dsfield);
+
+	memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr));
+	memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr));
+}
+
+static void build_udp_header(void *header, int payload_len, uint16_t sport,
+			     uint16_t dport, int family)
+{
+	struct udphdr *udph = header;
+	int len = sizeof(*udph) + payload_len;
+
+	udph->source = htons(sport);
+	udph->dest = htons(dport);
+	udph->len = htons(len);
+	udph->check = 0;
+	if (family == AF_INET)
+		udph->check = build_udp_v4_csum(header - sizeof(struct iphdr),
+						udph);
+	else
+		udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr),
+						udph);
+}
+
+static void build_gue_header(void *header, uint8_t proto)
+{
+	struct guehdr *gueh = header;
+
+	gueh->proto_ctype = proto;
+}
+
+static void build_gre_header(void *header, uint16_t proto)
+{
+	struct grehdr *greh = header;
+
+	greh->protocol = htons(proto);
+}
+
+static int l3_length(int family)
+{
+	if (family == AF_INET)
+		return sizeof(struct iphdr);
+	else
+		return sizeof(struct ipv6hdr);
+}
+
+static int build_packet(const struct test_configuration *test, uint16_t sport)
+{
+	int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0;
+	int el3_len = 0, packet_len;
+
+	memset(buf, 0, ETH_DATA_LEN);
+
+	if (test->cfg_l3_extra)
+		el3_len = l3_length(test->cfg_l3_extra);
+
+	/* calculate header offsets */
+	if (test->cfg_encap_proto) {
+		ol3_len = l3_length(test->cfg_l3_outer);
+
+		if (test->cfg_encap_proto == IPPROTO_GRE)
+			ol4_len = sizeof(struct grehdr);
+		else if (test->cfg_encap_proto == IPPROTO_UDP)
+			ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr);
+	}
+
+	il3_len = l3_length(test->cfg_l3_inner);
+	il4_len = sizeof(struct udphdr);
+
+	packet_len = el3_len + ol3_len + ol4_len + il3_len + il4_len +
+		     TEST_PACKET_LEN;
+	if (!ASSERT_LE(packet_len, sizeof(buf), "check packet size"))
+		return -1;
+
+	/*
+	 * Fill packet from inside out, to calculate correct checksums.
+	 * But create ip before udp headers, as udp uses ip for pseudo-sum.
+	 */
+	memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len,
+	       TEST_PACKET_PATTERN, TEST_PACKET_LEN);
+
+	/* add zero byte for udp csum padding */
+	buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + TEST_PACKET_LEN] =
+		0;
+
+	switch (test->cfg_l3_inner) {
+	case PF_INET:
+		build_ipv4_header(buf + el3_len + ol3_len + ol4_len,
+				  IPPROTO_UDP, test->in_saddr4.sin_addr.s_addr,
+				  test->in_daddr4.sin_addr.s_addr,
+				  il4_len + TEST_PACKET_LEN,
+				  test->cfg_dsfield_inner);
+		break;
+	case PF_INET6:
+		build_ipv6_header(buf + el3_len + ol3_len + ol4_len,
+				  IPPROTO_UDP, &test->in_saddr6,
+				  &test->in_daddr6, il4_len + TEST_PACKET_LEN,
+				  test->cfg_dsfield_inner);
+		break;
+	}
+
+	build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len,
+			 TEST_PACKET_LEN, sport, CFG_PORT_INNER,
+			 test->cfg_l3_inner);
+
+	if (!test->cfg_encap_proto)
+		return il3_len + il4_len + TEST_PACKET_LEN;
+
+	switch (test->cfg_l3_outer) {
+	case PF_INET:
+		build_ipv4_header(buf + el3_len, test->cfg_encap_proto,
+				  test->out_saddr4.sin_addr.s_addr,
+				  test->out_daddr4.sin_addr.s_addr,
+				  ol4_len + il3_len + il4_len + TEST_PACKET_LEN,
+				  test->cfg_dsfield_outer);
+		break;
+	case PF_INET6:
+		build_ipv6_header(buf + el3_len, test->cfg_encap_proto,
+				  &test->out_saddr6, &test->out_daddr6,
+				  ol4_len + il3_len + il4_len + TEST_PACKET_LEN,
+				  test->cfg_dsfield_outer);
+		break;
+	}
+
+	switch (test->cfg_encap_proto) {
+	case IPPROTO_UDP:
+		build_gue_header(buf + el3_len + ol3_len + ol4_len -
+					 sizeof(struct guehdr),
+				 test->cfg_l3_inner == PF_INET ? IPPROTO_IPIP :
+								 IPPROTO_IPV6);
+		build_udp_header(buf + el3_len + ol3_len,
+				 sizeof(struct guehdr) + il3_len + il4_len +
+					 TEST_PACKET_LEN,
+				 sport, CFG_PORT_GUE, test->cfg_l3_outer);
+		break;
+	case IPPROTO_GRE:
+		build_gre_header(buf + el3_len + ol3_len,
+				 test->cfg_l3_inner == PF_INET ? ETH_P_IP :
+								 ETH_P_IPV6);
+		break;
+	}
+
+	switch (test->cfg_l3_extra) {
+	case PF_INET:
+		build_ipv4_header(buf,
+				  test->cfg_l3_outer == PF_INET ? IPPROTO_IPIP :
+								  IPPROTO_IPV6,
+				  test->extra_saddr4.sin_addr.s_addr,
+				  test->extra_daddr4.sin_addr.s_addr,
+				  ol3_len + ol4_len + il3_len + il4_len +
+					  TEST_PACKET_LEN,
+				  0);
+		break;
+	case PF_INET6:
+		build_ipv6_header(buf,
+				  test->cfg_l3_outer == PF_INET ? IPPROTO_IPIP :
+								  IPPROTO_IPV6,
+				  &test->extra_saddr6, &test->extra_daddr6,
+				  ol3_len + ol4_len + il3_len + il4_len +
+					  TEST_PACKET_LEN,
+				  0);
+		break;
+	}
+
+	return el3_len + ol3_len + ol4_len + il3_len + il4_len +
+	       TEST_PACKET_LEN;
+}
+
+/* sender transmits encapsulated over RAW or unencap'd over UDP */
+static int setup_tx(const struct test_configuration *test)
+{
+	int family, fd, ret;
+
+	if (test->cfg_l3_extra)
+		family = test->cfg_l3_extra;
+	else if (test->cfg_l3_outer)
+		family = test->cfg_l3_outer;
+	else
+		family = test->cfg_l3_inner;
+
+	fd = socket(family, SOCK_RAW, IPPROTO_RAW);
+	if (!ASSERT_OK_FD(fd, "setup tx socket"))
+		return fd;
+
+	if (test->cfg_l3_extra) {
+		if (test->cfg_l3_extra == PF_INET)
+			ret = connect(fd, (void *)&test->extra_daddr4,
+				      sizeof(test->extra_daddr4));
+		else
+			ret = connect(fd, (void *)&test->extra_daddr6,
+				      sizeof(test->extra_daddr6));
+		if (!ASSERT_OK(ret, "connect")) {
+			close(fd);
+			return ret;
+		}
+	} else if (test->cfg_l3_outer) {
+		/* connect to destination if not encapsulated */
+		if (test->cfg_l3_outer == PF_INET)
+			ret = connect(fd, (void *)&test->out_daddr4,
+				      sizeof(test->out_daddr4));
+		else
+			ret = connect(fd, (void *)&test->out_daddr6,
+				      sizeof(test->out_daddr6));
+		if (!ASSERT_OK(ret, "connect")) {
+			close(fd);
+			return ret;
+		}
+	} else {
+		/* otherwise using loopback */
+		if (test->cfg_l3_inner == PF_INET)
+			ret = connect(fd, (void *)&test->in_daddr4,
+				      sizeof(test->in_daddr4));
+		else
+			ret = connect(fd, (void *)&test->in_daddr6,
+				      sizeof(test->in_daddr6));
+		if (!ASSERT_OK(ret, "connect")) {
+			close(fd);
+			return ret;
+		}
+	}
+
+	return fd;
+}
+
+/* receiver reads unencapsulated UDP */
+static int setup_rx(const struct test_configuration *test)
+{
+	int fd, ret;
+
+	fd = socket(test->cfg_l3_inner, SOCK_DGRAM, 0);
+	if (!ASSERT_OK_FD(fd, "socket rx"))
+		return fd;
+
+	if (test->cfg_l3_inner == PF_INET)
+		ret = bind(fd, (void *)&test->in_daddr4,
+			   sizeof(test->in_daddr4));
+	else
+		ret = bind(fd, (void *)&test->in_daddr6,
+			   sizeof(test->in_daddr6));
+	if (!ASSERT_OK(ret, "bind rx")) {
+		close(fd);
+		return ret;
+	}
+
+	return fd;
+}
+
+static int do_tx(int fd, const char *pkt, int len)
+{
+	int ret;
+
+	ret = write(fd, pkt, len);
+	return ret != len;
+}
+
+static int do_poll(int fd, short events, int timeout)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.fd = fd;
+	pfd.events = events;
+
+	ret = poll(&pfd, 1, timeout);
+	return ret;
+}
+
+static int do_rx(int fd)
+{
+	char rbuf;
+	int ret, num = 0;
+
+	while (1) {
+		ret = recv(fd, &rbuf, 1, MSG_DONTWAIT);
+		if (ret == -1 && errno == EAGAIN)
+			break;
+		if (ret < 0)
+			return -1;
+		if (!ASSERT_EQ(rbuf, TEST_PACKET_PATTERN, "check pkt pattern"))
+			return -1;
+		num++;
+	}
+
+	return num;
+}
+
+static int run_test(const struct test_configuration *test,
+		    int source_port_index)
+{
+	int fdt = -1, fdr = -1, len, tx = 0, rx = 0, err;
+	unsigned long tstop, tcur;
+
+	fdr = setup_rx(test);
+	fdt = setup_tx(test);
+	if (!ASSERT_OK_FD(fdr, "setup rx") || !ASSERT_OK_FD(fdt, "setup tx")) {
+		err = -1;
+		goto out_close_sockets;
+	}
+
+	len = build_packet(test,
+			   (uint16_t)test->source_ports[source_port_index]);
+	if (!ASSERT_GT(len, 0, "build test packet"))
+		return -1;
+
+	tcur = util_gettime();
+	tstop = tcur;
+
+	while (tx < TEST_PACKETS_COUNT) {
+		if (!ASSERT_OK(do_tx(fdt, buf, len), "do_tx"))
+			break;
+		tx++;
+		err = do_rx(fdr);
+		if (!ASSERT_GE(err, 0, "do_rx"))
+			break;
+		rx += err;
+	}
+
+	/* read straggler packets, if any */
+	if (rx < tx) {
+		tstop = util_gettime() + 100;
+		while (rx < tx) {
+			tcur = util_gettime();
+			if (tcur >= tstop)
+				break;
+
+			err = do_poll(fdr, POLLIN, tstop - tcur);
+			if (err < 0)
+				break;
+			err = do_rx(fdr);
+			if (err >= 0)
+				rx += err;
+		}
+	}
+
+out_close_sockets:
+	close(fdt);
+	close(fdr);
+	return rx;
+}
+
+static int attach_and_configure_program(struct bpf_flow *skel)
+{
+	struct bpf_map *prog_array = skel->maps.jmp_table;
+	int main_prog_fd, sub_prog_fd, map_fd, i, err;
+	struct bpf_program *prog;
+	char prog_name[32];
+
+	main_prog_fd = bpf_program__fd(skel->progs._dissect);
+	if (main_prog_fd < 0)
+		return main_prog_fd;
+
+	err = bpf_prog_attach(main_prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+	if (err)
+		return err;
+
+	map_fd = bpf_map__fd(prog_array);
+	if (map_fd < 0)
+		return map_fd;
+
+	for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+		snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i);
+
+		prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+		if (!prog)
+			return -1;
+
+		sub_prog_fd = bpf_program__fd(prog);
+		if (sub_prog_fd < 0)
+			return -1;
+
+		err = bpf_map_update_elem(map_fd, &i, &sub_prog_fd, BPF_ANY);
+		if (err)
+			return -1;
+	}
+
+	return main_prog_fd;
+}
+
+static void detach_program(struct bpf_flow *skel, int prog_fd)
+{
+	bpf_prog_detach2(prog_fd, 0, BPF_FLOW_DISSECTOR);
+}
+
+static int set_port_drop(int pf, bool multi_port)
+{
+	char dst_port[16];
+
+	snprintf(dst_port, sizeof(dst_port), "%d", CFG_PORT_INNER);
+
+	SYS(fail, "tc qdisc add dev lo ingress");
+	SYS(fail_delete_qdisc, "tc filter add %s %s %s %s %s %s %s %s %s %s %s %s",
+	    "dev lo",
+	    "parent FFFF:",
+	    "protocol", pf == PF_INET6 ? "ipv6" : "ip",
+	    "pref 1337",
+	    "flower",
+	    "ip_proto udp",
+	    "src_port", multi_port ? "8-10" : "9",
+	    "dst_port", dst_port,
+	    "action drop");
+	return 0;
+
+fail_delete_qdisc:
+	SYS_NOFAIL("tc qdisc del dev lo ingress");
+fail:
+	return 1;
+}
+
+static void remove_filter(void)
+{
+	SYS_NOFAIL("tc filter del dev lo ingress");
+	SYS_NOFAIL("tc qdisc del dev lo ingress");
+}
+
+static int ipv4_setup(void)
+{
+	return set_port_drop(PF_INET, false);
+}
+
+static int ipv6_setup(void)
+{
+	return set_port_drop(PF_INET6, false);
+}
+
+static int port_range_setup(void)
+{
+	return set_port_drop(PF_INET, true);
+}
+
+static int set_addresses(void)
+{
+	SYS(out, "ip -4 addr add  %s dev lo", TEST_IPV4);
+	SYS(out_remove_ipv4, "ip -6 addr add %s dev lo", TEST_IPV6);
+	return 0;
+out_remove_ipv4:
+	SYS_NOFAIL("ip -4 addr del %s dev lo", TEST_IPV4);
+out:
+	return -1;
+}
+
+static void unset_addresses(void)
+{
+	SYS_NOFAIL("ip -4 addr del %s dev lo", TEST_IPV4);
+	SYS_NOFAIL("ip -6 addr del %s dev lo", TEST_IPV6);
+}
+
+static int ipip_setup(void)
+{
+	if (!ASSERT_OK(set_addresses(), "configure addresses"))
+		return -1;
+	if (!ASSERT_OK(set_port_drop(PF_INET, false), "set filter"))
+		goto out_unset_addresses;
+	SYS(out_remove_filter,
+	    "ip link add ipip_test type ipip remote %s local %s dev lo",
+	    TEST_TUNNEL_REMOTE, TEST_TUNNEL_LOCAL);
+	SYS(out_clean_netif, "ip link set ipip_test up");
+	return 0;
+
+out_clean_netif:
+	SYS_NOFAIL("ip link del ipip_test");
+out_remove_filter:
+	remove_filter();
+out_unset_addresses:
+	unset_addresses();
+	return -1;
+}
+
+static void ipip_shutdown(void)
+{
+	SYS_NOFAIL("ip link del ipip_test");
+	remove_filter();
+	unset_addresses();
+}
+
+static int gre_setup(void)
+{
+	if (!ASSERT_OK(set_addresses(), "configure addresses"))
+		return -1;
+	if (!ASSERT_OK(set_port_drop(PF_INET, false), "set filter"))
+		goto out_unset_addresses;
+	SYS(out_remove_filter,
+	    "ip link add gre_test type gre remote %s local %s dev lo",
+	    TEST_TUNNEL_REMOTE, TEST_TUNNEL_LOCAL);
+	SYS(out_clean_netif, "ip link set gre_test up");
+	return 0;
+
+out_clean_netif:
+	SYS_NOFAIL("ip link del ipip_test");
+out_remove_filter:
+	remove_filter();
+out_unset_addresses:
+	unset_addresses();
+	return -1;
+}
+
+static void gre_shutdown(void)
+{
+	SYS_NOFAIL("ip link del gre_test");
+	remove_filter();
+	unset_addresses();
+}
+
+static const struct test_configuration tests_input[] = {
+	{
+		.name = "ipv4",
+		.test_setup = ipv4_setup,
+		.test_teardown = remove_filter,
+		.source_ports = { 8, 9, 10 },
+		.cfg_l3_inner = PF_INET,
+		.in_saddr4 = TEST_IN4_SRC_ADDR_DEFAULT,
+		.in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT
+	},
+	{
+		.name = "ipv4_continue_dissect",
+		.test_setup = ipv4_setup,
+		.test_teardown = remove_filter,
+		.source_ports = { 8, 9, 10 },
+		.cfg_l3_inner = PF_INET,
+		.in_saddr4 = TEST_IN4_SRC_ADDR_DISSECT_CONTINUE,
+		.in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT },
+	{
+		.name = "ipip",
+		.test_setup = ipip_setup,
+		.test_teardown = ipip_shutdown,
+		.source_ports = { 8, 9, 10 },
+		.cfg_l3_inner = PF_INET,
+		.in_saddr4 = TEST_IN4_SRC_ADDR_IPIP,
+		.in_daddr4 = TEST_IN4_DST_ADDR_IPIP,
+		.out_saddr4 = TEST_OUT4_SRC_ADDR_DEFAULT,
+		.out_daddr4 = TEST_OUT4_DST_ADDR_DEFAULT,
+		.cfg_l3_outer = PF_INET,
+		.cfg_encap_proto = IPPROTO_IPIP,
+
+	},
+	{
+		.name = "gre",
+		.test_setup = gre_setup,
+		.test_teardown = gre_shutdown,
+		.source_ports = { 8, 9, 10 },
+		.cfg_l3_inner = PF_INET,
+		.in_saddr4 = TEST_IN4_SRC_ADDR_IPIP,
+		.in_daddr4 = TEST_IN4_DST_ADDR_IPIP,
+		.out_saddr4 = TEST_OUT4_SRC_ADDR_DEFAULT,
+		.out_daddr4 = TEST_OUT4_DST_ADDR_DEFAULT,
+		.cfg_l3_outer = PF_INET,
+		.cfg_encap_proto = IPPROTO_GRE,
+	},
+	{
+		.name = "port_range",
+		.test_setup = port_range_setup,
+		.test_teardown = remove_filter,
+		.source_ports = { 7, 9, 11 },
+		.cfg_l3_inner = PF_INET,
+		.in_saddr4 = TEST_IN4_SRC_ADDR_DEFAULT,
+		.in_daddr4 = TEST_IN4_DST_ADDR_DEFAULT },
+	{
+		.name = "ipv6",
+		.test_setup = ipv6_setup,
+		.test_teardown = remove_filter,
+		.source_ports = { 8, 9, 10 },
+		.cfg_l3_inner = PF_INET6,
+		.in_saddr6 = TEST_IN6_SRC_ADDR_DEFAULT,
+		.in_daddr6 = TEST_IN6_DST_ADDR_DEFAULT
+	},
+};
+
+struct test_ctx {
+	struct bpf_flow *skel;
+	struct netns_obj *ns;
+	int prog_fd;
+};
+
+static int test_global_init(struct test_ctx *ctx)
+{
+	int err;
+
+	ctx->skel = bpf_flow__open_and_load();
+	if (!ASSERT_OK_PTR(ctx->skel, "open and load flow_dissector"))
+		return -1;
+
+	ctx->ns = netns_new("flow_dissector_classification", true);
+	if (!ASSERT_OK_PTR(ctx->ns, "switch ns"))
+		goto out_destroy_skel;
+
+	err = write_sysctl("/proc/sys/net/ipv4/conf/default/rp_filter", "0");
+	err |= write_sysctl("/proc/sys/net/ipv4/conf/all/rp_filter", "0");
+	err |= write_sysctl("/proc/sys/net/ipv4/conf/lo/rp_filter", "0");
+	if (!ASSERT_OK(err, "configure net tunables"))
+		goto out_clean_ns;
+
+	ctx->prog_fd = attach_and_configure_program(ctx->skel);
+	if (!ASSERT_OK_FD(ctx->prog_fd, "attach and configure program"))
+		goto out_clean_ns;
+	return 0;
+out_clean_ns:
+	netns_free(ctx->ns);
+out_destroy_skel:
+	bpf_flow__destroy(ctx->skel);
+	return -1;
+}
+
+static void test_global_shutdown(struct test_ctx *ctx)
+{
+	detach_program(ctx->skel, ctx->prog_fd);
+	netns_free(ctx->ns);
+	bpf_flow__destroy(ctx->skel);
+}
+
+void test_flow_dissector_classification(void)
+{
+	struct test_ctx ctx;
+	const struct test_configuration *test;
+	int i;
+
+	if (test_global_init(&ctx))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(tests_input); i++) {
+		if (!test__start_subtest(tests_input[i].name))
+			continue;
+		test = &tests_input[i];
+		/* All tests are expected to have one rx-ok port first,
+		 * then a non-working rx port, and finally a rx-ok port
+		 */
+		if (test->test_setup &&
+		    !ASSERT_OK(test->test_setup(), "init filter"))
+			continue;
+
+		ASSERT_EQ(run_test(test, 0), TEST_PACKETS_COUNT,
+			  "test first port");
+		ASSERT_EQ(run_test(test, 1), 0, "test second port");
+		ASSERT_EQ(run_test(test, 2), TEST_PACKETS_COUNT,
+			  "test third port");
+		if (test->test_teardown)
+			test->test_teardown();
+	}
+	test_global_shutdown(&ctx);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
index 09f6487f58b9..5fea3209566e 100644
--- a/tools/testing/selftests/bpf/prog_tests/for_each.c
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -6,6 +6,7 @@
 #include "for_each_array_map_elem.skel.h"
 #include "for_each_map_elem_write_key.skel.h"
 #include "for_each_multi_maps.skel.h"
+#include "for_each_hash_modify.skel.h"
 
 static unsigned int duration;
 
@@ -203,6 +204,40 @@ out:
 	for_each_multi_maps__destroy(skel);
 }
 
+static void test_hash_modify(void)
+{
+	struct for_each_hash_modify *skel;
+	int max_entries, i, err;
+	__u64 key, val;
+
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1
+	);
+
+	skel = for_each_hash_modify__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "for_each_hash_modify__open_and_load"))
+		return;
+
+	max_entries = bpf_map__max_entries(skel->maps.hashmap);
+	for (i = 0; i < max_entries; i++) {
+		key = i;
+		val = i;
+		err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key),
+					   &val, sizeof(val), BPF_ANY);
+		if (!ASSERT_OK(err, "map_update"))
+			goto out;
+	}
+
+	err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+	ASSERT_OK(err, "bpf_prog_test_run_opts");
+	ASSERT_OK(topts.retval, "retval");
+
+out:
+	for_each_hash_modify__destroy(skel);
+}
+
 void test_for_each(void)
 {
 	if (test__start_subtest("hash_map"))
@@ -213,4 +248,6 @@ void test_for_each(void)
 		test_write_map_key();
 	if (test__start_subtest("multi_maps"))
 		test_multi_maps();
+	if (test__start_subtest("hash_modify"))
+		test_hash_modify();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/free_timer.c b/tools/testing/selftests/bpf/prog_tests/free_timer.c
new file mode 100644
index 000000000000..0de8facca4c5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/free_timer.c
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <test_progs.h>
+
+#include "free_timer.skel.h"
+
+struct run_ctx {
+	struct bpf_program *start_prog;
+	struct bpf_program *overwrite_prog;
+	pthread_barrier_t notify;
+	int loop;
+	bool start;
+	bool stop;
+};
+
+static void start_threads(struct run_ctx *ctx)
+{
+	ctx->start = true;
+}
+
+static void stop_threads(struct run_ctx *ctx)
+{
+	ctx->stop = true;
+	/* Guarantee the order between ->stop and ->start */
+	__atomic_store_n(&ctx->start, true, __ATOMIC_RELEASE);
+}
+
+static int wait_for_start(struct run_ctx *ctx)
+{
+	while (!__atomic_load_n(&ctx->start, __ATOMIC_ACQUIRE))
+		usleep(10);
+
+	return ctx->stop;
+}
+
+static void *overwrite_timer_fn(void *arg)
+{
+	struct run_ctx *ctx = arg;
+	int loop, fd, err;
+	cpu_set_t cpuset;
+	long ret = 0;
+
+	/* Pin on CPU 0 */
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+	pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+
+	/* Is the thread being stopped ? */
+	err = wait_for_start(ctx);
+	if (err)
+		return NULL;
+
+	fd = bpf_program__fd(ctx->overwrite_prog);
+	loop = ctx->loop;
+	while (loop-- > 0) {
+		LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+		/* Wait for start thread to complete */
+		pthread_barrier_wait(&ctx->notify);
+
+		/* Overwrite timers */
+		err = bpf_prog_test_run_opts(fd, &opts);
+		if (err)
+			ret |= 1;
+		else if (opts.retval)
+			ret |= 2;
+
+		/* Notify start thread to start timers */
+		pthread_barrier_wait(&ctx->notify);
+	}
+
+	return (void *)ret;
+}
+
+static void *start_timer_fn(void *arg)
+{
+	struct run_ctx *ctx = arg;
+	int loop, fd, err;
+	cpu_set_t cpuset;
+	long ret = 0;
+
+	/* Pin on CPU 1 */
+	CPU_ZERO(&cpuset);
+	CPU_SET(1, &cpuset);
+	pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
+
+	/* Is the thread being stopped ? */
+	err = wait_for_start(ctx);
+	if (err)
+		return NULL;
+
+	fd = bpf_program__fd(ctx->start_prog);
+	loop = ctx->loop;
+	while (loop-- > 0) {
+		LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+		/* Run the prog to start timer */
+		err = bpf_prog_test_run_opts(fd, &opts);
+		if (err)
+			ret |= 4;
+		else if (opts.retval)
+			ret |= 8;
+
+		/* Notify overwrite thread to do overwrite */
+		pthread_barrier_wait(&ctx->notify);
+
+		/* Wait for overwrite thread to complete */
+		pthread_barrier_wait(&ctx->notify);
+	}
+
+	return (void *)ret;
+}
+
+void test_free_timer(void)
+{
+	struct free_timer *skel;
+	struct bpf_program *prog;
+	struct run_ctx ctx;
+	pthread_t tid[2];
+	void *ret;
+	int err;
+
+	skel = free_timer__open_and_load();
+	if (!skel && errno == EOPNOTSUPP) {
+		test__skip();
+		return;
+	}
+	if (!ASSERT_OK_PTR(skel, "open_load"))
+		return;
+
+	memset(&ctx, 0, sizeof(ctx));
+
+	prog = bpf_object__find_program_by_name(skel->obj, "start_timer");
+	if (!ASSERT_OK_PTR(prog, "find start prog"))
+		goto out;
+	ctx.start_prog = prog;
+
+	prog = bpf_object__find_program_by_name(skel->obj, "overwrite_timer");
+	if (!ASSERT_OK_PTR(prog, "find overwrite prog"))
+		goto out;
+	ctx.overwrite_prog = prog;
+
+	pthread_barrier_init(&ctx.notify, NULL, 2);
+	ctx.loop = 10;
+
+	err = pthread_create(&tid[0], NULL, start_timer_fn, &ctx);
+	if (!ASSERT_OK(err, "create start_timer"))
+		goto out;
+
+	err = pthread_create(&tid[1], NULL, overwrite_timer_fn, &ctx);
+	if (!ASSERT_OK(err, "create overwrite_timer")) {
+		stop_threads(&ctx);
+		goto out;
+	}
+
+	start_threads(&ctx);
+
+	ret = NULL;
+	err = pthread_join(tid[0], &ret);
+	ASSERT_EQ(err | (long)ret, 0, "start_timer");
+	ret = NULL;
+	err = pthread_join(tid[1], &ret);
+	ASSERT_EQ(err | (long)ret, 0, "overwrite_timer");
+out:
+	free_timer__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
index 5a0b51157451..43a26ec69a8e 100644
--- a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
+++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
@@ -8,11 +8,12 @@
 #include <unistd.h>
 #include <test_progs.h>
 #include "test_get_xattr.skel.h"
+#include "test_set_remove_xattr.skel.h"
 #include "test_fsverity.skel.h"
 
 static const char testfile[] = "/tmp/test_progs_fs_kfuncs";
 
-static void test_xattr(void)
+static void test_get_xattr(const char *name, const char *value, bool allow_access)
 {
 	struct test_get_xattr *skel = NULL;
 	int fd = -1, err;
@@ -25,7 +26,7 @@ static void test_xattr(void)
 	close(fd);
 	fd = -1;
 
-	err = setxattr(testfile, "user.kfuncs", "hello", sizeof("hello"), 0);
+	err = setxattr(testfile, name, value, strlen(value) + 1, 0);
 	if (err && errno == EOPNOTSUPP) {
 		printf("%s:SKIP:local fs doesn't support xattr (%d)\n"
 		       "To run this test, make sure /tmp filesystem supports xattr.\n",
@@ -48,16 +49,23 @@ static void test_xattr(void)
 		goto out;
 
 	fd = open(testfile, O_RDONLY, 0644);
+
 	if (!ASSERT_GE(fd, 0, "open_file"))
 		goto out;
 
-	ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file");
-
 	/* Trigger security_inode_getxattr */
-	err = getxattr(testfile, "user.kfuncs", v, sizeof(v));
-	ASSERT_EQ(err, -1, "getxattr_return");
-	ASSERT_EQ(errno, EINVAL, "getxattr_errno");
-	ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry");
+	err = getxattr(testfile, name, v, sizeof(v));
+
+	if (allow_access) {
+		ASSERT_EQ(err, -1, "getxattr_return");
+		ASSERT_EQ(errno, EINVAL, "getxattr_errno");
+		ASSERT_EQ(skel->bss->found_xattr_from_file, 1, "found_xattr_from_file");
+		ASSERT_EQ(skel->bss->found_xattr_from_dentry, 1, "found_xattr_from_dentry");
+	} else {
+		ASSERT_EQ(err, strlen(value) + 1, "getxattr_return");
+		ASSERT_EQ(skel->bss->found_xattr_from_file, 0, "found_xattr_from_file");
+		ASSERT_EQ(skel->bss->found_xattr_from_dentry, 0, "found_xattr_from_dentry");
+	}
 
 out:
 	close(fd);
@@ -65,6 +73,127 @@ out:
 	remove(testfile);
 }
 
+/* xattr value we will set to security.bpf.foo */
+static const char value_foo[] = "hello";
+
+static void read_and_validate_foo(struct test_set_remove_xattr *skel)
+{
+	char value_out[32];
+	int err;
+
+	err = getxattr(testfile, skel->rodata->xattr_foo, value_out, sizeof(value_out));
+	ASSERT_EQ(err, sizeof(value_foo), "getxattr size foo");
+	ASSERT_EQ(strncmp(value_out, value_foo, sizeof(value_foo)), 0, "strncmp value_foo");
+}
+
+static void set_foo(struct test_set_remove_xattr *skel)
+{
+	ASSERT_OK(setxattr(testfile, skel->rodata->xattr_foo, value_foo, strlen(value_foo) + 1, 0),
+		  "setxattr foo");
+}
+
+static void validate_bar_match(struct test_set_remove_xattr *skel)
+{
+	char value_out[32];
+	int err;
+
+	err = getxattr(testfile, skel->rodata->xattr_bar, value_out, sizeof(value_out));
+	ASSERT_EQ(err, sizeof(skel->data->value_bar), "getxattr size bar");
+	ASSERT_EQ(strncmp(value_out, skel->data->value_bar, sizeof(skel->data->value_bar)), 0,
+		  "strncmp value_bar");
+}
+
+static void validate_bar_removed(struct test_set_remove_xattr *skel)
+{
+	char value_out[32];
+	int err;
+
+	err = getxattr(testfile, skel->rodata->xattr_bar, value_out, sizeof(value_out));
+	ASSERT_LT(err, 0, "getxattr size bar should fail");
+}
+
+static void test_set_remove_xattr(void)
+{
+	struct test_set_remove_xattr *skel = NULL;
+	int fd = -1, err;
+
+	fd = open(testfile, O_CREAT | O_RDONLY, 0644);
+	if (!ASSERT_GE(fd, 0, "create_file"))
+		return;
+
+	close(fd);
+	fd = -1;
+
+	skel = test_set_remove_xattr__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_set_remove_xattr__open_and_load"))
+		return;
+
+	/* Set security.bpf.foo to "hello" */
+	err = setxattr(testfile, skel->rodata->xattr_foo, value_foo, strlen(value_foo) + 1, 0);
+	if (err && errno == EOPNOTSUPP) {
+		printf("%s:SKIP:local fs doesn't support xattr (%d)\n"
+		       "To run this test, make sure /tmp filesystem supports xattr.\n",
+		       __func__, errno);
+		test__skip();
+		goto out;
+	}
+
+	if (!ASSERT_OK(err, "setxattr"))
+		goto out;
+
+	skel->bss->monitored_pid = getpid();
+	err = test_set_remove_xattr__attach(skel);
+	if (!ASSERT_OK(err, "test_set_remove_xattr__attach"))
+		goto out;
+
+	/* First, test not _locked version of the kfuncs with getxattr. */
+
+	/* Read security.bpf.foo and trigger test_inode_getxattr. This
+	 * bpf program will set security.bpf.bar to "world".
+	 */
+	read_and_validate_foo(skel);
+	validate_bar_match(skel);
+
+	/* Read security.bpf.foo and trigger test_inode_getxattr again.
+	 * This will remove xattr security.bpf.bar.
+	 */
+	read_and_validate_foo(skel);
+	validate_bar_removed(skel);
+
+	ASSERT_TRUE(skel->bss->set_security_bpf_bar_success, "set_security_bpf_bar_success");
+	ASSERT_TRUE(skel->bss->remove_security_bpf_bar_success, "remove_security_bpf_bar_success");
+	ASSERT_TRUE(skel->bss->set_security_selinux_fail, "set_security_selinux_fail");
+	ASSERT_TRUE(skel->bss->remove_security_selinux_fail, "remove_security_selinux_fail");
+
+	/* Second, test _locked version of the kfuncs, with setxattr */
+
+	/* Set security.bpf.foo and trigger test_inode_setxattr. This
+	 * bpf program will set security.bpf.bar to "world".
+	 */
+	set_foo(skel);
+	validate_bar_match(skel);
+
+	/* Set security.bpf.foo and trigger test_inode_setxattr again.
+	 * This will remove xattr security.bpf.bar.
+	 */
+	set_foo(skel);
+	validate_bar_removed(skel);
+
+	ASSERT_TRUE(skel->bss->locked_set_security_bpf_bar_success,
+		    "locked_set_security_bpf_bar_success");
+	ASSERT_TRUE(skel->bss->locked_remove_security_bpf_bar_success,
+		    "locked_remove_security_bpf_bar_success");
+	ASSERT_TRUE(skel->bss->locked_set_security_selinux_fail,
+		    "locked_set_security_selinux_fail");
+	ASSERT_TRUE(skel->bss->locked_remove_security_selinux_fail,
+		    "locked_remove_security_selinux_fail");
+
+out:
+	close(fd);
+	test_set_remove_xattr__destroy(skel);
+	remove(testfile);
+}
+
 #ifndef SHA256_DIGEST_SIZE
 #define SHA256_DIGEST_SIZE      32
 #endif
@@ -141,8 +270,21 @@ out:
 
 void test_fs_kfuncs(void)
 {
-	if (test__start_subtest("xattr"))
-		test_xattr();
+	/* Matches xattr_names in progs/test_get_xattr.c */
+	if (test__start_subtest("user_xattr"))
+		test_get_xattr("user.kfuncs", "hello", true);
+
+	if (test__start_subtest("security_bpf_xattr"))
+		test_get_xattr("security.bpf.xxx", "hello", true);
+
+	if (test__start_subtest("security_bpf_xattr_error"))
+		test_get_xattr("security.bpf", "hello", false);
+
+	if (test__start_subtest("security_selinux_xattr_error"))
+		test_get_xattr("security.selinux", "hello", false);
+
+	if (test__start_subtest("set_remove_xattr"))
+		test_set_remove_xattr();
 
 	if (test__start_subtest("fsverity"))
 		test_fsverity();
diff --git a/tools/testing/selftests/bpf/prog_tests/htab_update.c b/tools/testing/selftests/bpf/prog_tests/htab_update.c
index 2bc85f4814f4..d0b405eb2966 100644
--- a/tools/testing/selftests/bpf/prog_tests/htab_update.c
+++ b/tools/testing/selftests/bpf/prog_tests/htab_update.c
@@ -15,17 +15,17 @@ struct htab_update_ctx {
 static void test_reenter_update(void)
 {
 	struct htab_update *skel;
-	unsigned int key, value;
+	void *value = NULL;
+	unsigned int key, value_size;
 	int err;
 
 	skel = htab_update__open();
 	if (!ASSERT_OK_PTR(skel, "htab_update__open"))
 		return;
 
-	/* lookup_elem_raw() may be inlined and find_kernel_btf_id() will return -ESRCH */
-	bpf_program__set_autoload(skel->progs.lookup_elem_raw, true);
+	bpf_program__set_autoload(skel->progs.bpf_obj_free_fields, true);
 	err = htab_update__load(skel);
-	if (!ASSERT_TRUE(!err || err == -ESRCH, "htab_update__load") || err)
+	if (!ASSERT_TRUE(!err, "htab_update__load") || err)
 		goto out;
 
 	skel->bss->pid = getpid();
@@ -33,14 +33,33 @@ static void test_reenter_update(void)
 	if (!ASSERT_OK(err, "htab_update__attach"))
 		goto out;
 
-	/* Will trigger the reentrancy of bpf_map_update_elem() */
+	value_size = bpf_map__value_size(skel->maps.htab);
+
+	value = calloc(1, value_size);
+	if (!ASSERT_OK_PTR(value, "calloc value"))
+		goto out;
+	/*
+	 * First update: plain insert. This should NOT trigger the re-entrancy
+	 * path, because there is no old element to free yet.
+	 */
 	key = 0;
-	value = 0;
-	err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, &value, 0);
-	if (!ASSERT_OK(err, "add element"))
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, value, BPF_ANY);
+	if (!ASSERT_OK(err, "first update (insert)"))
+		goto out;
+
+	/*
+	 * Second update: replace existing element with same key and trigger
+	 * the reentrancy of bpf_map_update_elem().
+	 * check_and_free_fields() calls bpf_obj_free_fields() on the old
+	 * value, which is where fentry program runs and performs a nested
+	 * bpf_map_update_elem(), triggering -EDEADLK.
+	 */
+	memset(value, 0, value_size);
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, value, BPF_ANY);
+	if (!ASSERT_OK(err, "second update (replace)"))
 		goto out;
 
-	ASSERT_EQ(skel->bss->update_err, -EBUSY, "no reentrancy");
+	ASSERT_EQ(skel->bss->update_err, -EDEADLK, "no reentrancy");
 out:
 	htab_update__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/kernel_flag.c b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
new file mode 100644
index 000000000000..97b00c7efe94
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kernel_flag.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Microsoft */
+#include <test_progs.h>
+#include "kfunc_call_test.skel.h"
+#include "kfunc_call_test.lskel.h"
+#include "test_kernel_flag.skel.h"
+
+void test_kernel_flag(void)
+{
+	struct test_kernel_flag *lsm_skel;
+	struct kfunc_call_test *skel = NULL;
+	struct kfunc_call_test_lskel *lskel = NULL;
+	int ret;
+
+	lsm_skel = test_kernel_flag__open_and_load();
+	if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel"))
+		return;
+
+	lsm_skel->bss->monitored_tid = sys_gettid();
+
+	ret = test_kernel_flag__attach(lsm_skel);
+	if (!ASSERT_OK(ret, "test_kernel_flag__attach"))
+		goto close_prog;
+
+	/* Test with skel. This should pass the gatekeeper */
+	skel = kfunc_call_test__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel"))
+		goto close_prog;
+
+	/* Test with lskel. This should fail due to blocking kernel-based bpf() invocations */
+	lskel = kfunc_call_test_lskel__open_and_load();
+	if (!ASSERT_ERR_PTR(lskel, "lskel"))
+		goto close_prog;
+
+close_prog:
+	if (skel)
+		kfunc_call_test__destroy(skel);
+	if (lskel)
+		kfunc_call_test_lskel__destroy(lskel);
+
+	lsm_skel->bss->monitored_tid = 0;
+	test_kernel_flag__destroy(lsm_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
index 8e13a3416a21..6e35e13c2022 100644
--- a/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/kmem_cache_iter.c
@@ -57,7 +57,8 @@ static void subtest_kmem_cache_iter_check_slabinfo(struct kmem_cache_iter *skel)
 		if (!ASSERT_OK(ret, "kmem_cache_lookup"))
 			break;
 
-		ASSERT_STREQ(r.name, name, "kmem_cache_name");
+		ASSERT_STRNEQ(r.name, name, sizeof(r.name) - 1,
+			      "kmem_cache_name");
 		ASSERT_EQ(r.obj_size, objsize, "kmem_cache_objsize");
 
 		seen++;
@@ -104,7 +105,7 @@ void test_kmem_cache_iter(void)
 		goto destroy;
 
 	memset(buf, 0, sizeof(buf));
-	while (read(iter_fd, buf, sizeof(buf) > 0)) {
+	while (read(iter_fd, buf, sizeof(buf)) > 0) {
 		/* Read out all contents */
 		printf("%s", buf);
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 66ab1cae923e..6cfaa978bc9a 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -7,6 +7,7 @@
 #include "kprobe_multi_session.skel.h"
 #include "kprobe_multi_session_cookie.skel.h"
 #include "kprobe_multi_verifier.skel.h"
+#include "kprobe_write_ctx.skel.h"
 #include "bpf/libbpf_internal.h"
 #include "bpf/hashmap.h"
 
@@ -397,218 +398,29 @@ cleanup:
 	kprobe_multi_session_cookie__destroy(skel);
 }
 
-static size_t symbol_hash(long key, void *ctx __maybe_unused)
+static void test_unique_match(void)
 {
-	return str_hash((const char *) key);
-}
-
-static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
-{
-	return strcmp((const char *) key1, (const char *) key2) == 0;
-}
-
-static bool is_invalid_entry(char *buf, bool kernel)
-{
-	if (kernel && strchr(buf, '['))
-		return true;
-	if (!kernel && !strchr(buf, '['))
-		return true;
-	return false;
-}
-
-static bool skip_entry(char *name)
-{
-	/*
-	 * We attach to almost all kernel functions and some of them
-	 * will cause 'suspicious RCU usage' when fprobe is attached
-	 * to them. Filter out the current culprits - arch_cpu_idle
-	 * default_idle and rcu_* functions.
-	 */
-	if (!strcmp(name, "arch_cpu_idle"))
-		return true;
-	if (!strcmp(name, "default_idle"))
-		return true;
-	if (!strncmp(name, "rcu_", 4))
-		return true;
-	if (!strcmp(name, "bpf_dispatcher_xdp_func"))
-		return true;
-	if (!strncmp(name, "__ftrace_invalid_address__",
-		     sizeof("__ftrace_invalid_address__") - 1))
-		return true;
-	return false;
-}
-
-/* Do comparision by ignoring '.llvm.<hash>' suffixes. */
-static int compare_name(const char *name1, const char *name2)
-{
-	const char *res1, *res2;
-	int len1, len2;
-
-	res1 = strstr(name1, ".llvm.");
-	res2 = strstr(name2, ".llvm.");
-	len1 = res1 ? res1 - name1 : strlen(name1);
-	len2 = res2 ? res2 - name2 : strlen(name2);
-
-	if (len1 == len2)
-		return strncmp(name1, name2, len1);
-	if (len1 < len2)
-		return strncmp(name1, name2, len1) <= 0 ? -1 : 1;
-	return strncmp(name1, name2, len2) >= 0 ? 1 : -1;
-}
-
-static int load_kallsyms_compare(const void *p1, const void *p2)
-{
-	return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name);
-}
-
-static int search_kallsyms_compare(const void *p1, const struct ksym *p2)
-{
-	return compare_name(p1, p2->name);
-}
-
-static int get_syms(char ***symsp, size_t *cntp, bool kernel)
-{
-	size_t cap = 0, cnt = 0;
-	char *name = NULL, *ksym_name, **syms = NULL;
-	struct hashmap *map;
-	struct ksyms *ksyms;
-	struct ksym *ks;
-	char buf[256];
-	FILE *f;
-	int err = 0;
-
-	ksyms = load_kallsyms_custom_local(load_kallsyms_compare);
-	if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_custom_local"))
-		return -EINVAL;
-
-	/*
-	 * The available_filter_functions contains many duplicates,
-	 * but other than that all symbols are usable in kprobe multi
-	 * interface.
-	 * Filtering out duplicates by using hashmap__add, which won't
-	 * add existing entry.
-	 */
-
-	if (access("/sys/kernel/tracing/trace", F_OK) == 0)
-		f = fopen("/sys/kernel/tracing/available_filter_functions", "r");
-	else
-		f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
-
-	if (!f)
-		return -EINVAL;
-
-	map = hashmap__new(symbol_hash, symbol_equal, NULL);
-	if (IS_ERR(map)) {
-		err = libbpf_get_error(map);
-		goto error;
-	}
-
-	while (fgets(buf, sizeof(buf), f)) {
-		if (is_invalid_entry(buf, kernel))
-			continue;
-
-		free(name);
-		if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
-			continue;
-		if (skip_entry(name))
-			continue;
-
-		ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare);
-		if (!ks) {
-			err = -EINVAL;
-			goto error;
-		}
-
-		ksym_name = ks->name;
-		err = hashmap__add(map, ksym_name, 0);
-		if (err == -EEXIST) {
-			err = 0;
-			continue;
-		}
-		if (err)
-			goto error;
-
-		err = libbpf_ensure_mem((void **) &syms, &cap,
-					sizeof(*syms), cnt + 1);
-		if (err)
-			goto error;
-
-		syms[cnt++] = ksym_name;
-	}
-
-	*symsp = syms;
-	*cntp = cnt;
-
-error:
-	free(name);
-	fclose(f);
-	hashmap__free(map);
-	if (err)
-		free(syms);
-	return err;
-}
-
-static int get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel)
-{
-	unsigned long *addr, *addrs, *tmp_addrs;
-	int err = 0, max_cnt, inc_cnt;
-	char *name = NULL;
-	size_t cnt = 0;
-	char buf[256];
-	FILE *f;
-
-	if (access("/sys/kernel/tracing/trace", F_OK) == 0)
-		f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r");
-	else
-		f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r");
-
-	if (!f)
-		return -ENOENT;
+	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+	struct kprobe_multi *skel = NULL;
+	struct bpf_link *link = NULL;
 
-	/* In my local setup, the number of entries is 50k+ so Let us initially
-	 * allocate space to hold 64k entries. If 64k is not enough, incrementally
-	 * increase 1k each time.
-	 */
-	max_cnt = 65536;
-	inc_cnt = 1024;
-	addrs = malloc(max_cnt * sizeof(long));
-	if (addrs == NULL) {
-		err = -ENOMEM;
-		goto error;
-	}
+	skel = kprobe_multi__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "kprobe_multi__open_and_load"))
+		return;
 
-	while (fgets(buf, sizeof(buf), f)) {
-		if (is_invalid_entry(buf, kernel))
-			continue;
-
-		free(name);
-		if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2)
-			continue;
-		if (skip_entry(name))
-			continue;
-
-		if (cnt == max_cnt) {
-			max_cnt += inc_cnt;
-			tmp_addrs = realloc(addrs, max_cnt);
-			if (!tmp_addrs) {
-				err = -ENOMEM;
-				goto error;
-			}
-			addrs = tmp_addrs;
-		}
-
-		addrs[cnt++] = (unsigned long)addr;
-	}
+	opts.unique_match = true;
+	skel->bss->pid = getpid();
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+						     "bpf_fentry_test*", &opts);
+	if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_multi_opts"))
+		bpf_link__destroy(link);
 
-	*addrsp = addrs;
-	*cntp = cnt;
+	link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+						     "bpf_fentry_test8*", &opts);
+	if (ASSERT_OK_PTR(link, "bpf_program__attach_kprobe_multi_opts"))
+		bpf_link__destroy(link);
 
-error:
-	free(name);
-	fclose(f);
-	if (err)
-		free(addrs);
-	return err;
+	kprobe_multi__destroy(skel);
 }
 
 static void do_bench_test(struct kprobe_multi_empty *skel, struct bpf_kprobe_multi_opts *opts)
@@ -645,7 +457,7 @@ static void test_kprobe_multi_bench_attach(bool kernel)
 	char **syms = NULL;
 	size_t cnt = 0;
 
-	if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms"))
+	if (!ASSERT_OK(bpf_get_ksyms(&syms, &cnt, kernel), "bpf_get_ksyms"))
 		return;
 
 	skel = kprobe_multi_empty__open_and_load();
@@ -671,13 +483,13 @@ static void test_kprobe_multi_bench_attach_addr(bool kernel)
 	size_t cnt = 0;
 	int err;
 
-	err = get_addrs(&addrs, &cnt, kernel);
+	err = bpf_get_addrs(&addrs, &cnt, kernel);
 	if (err == -ENOENT) {
 		test__skip();
 		return;
 	}
 
-	if (!ASSERT_OK(err, "get_addrs"))
+	if (!ASSERT_OK(err, "bpf_get_addrs"))
 		return;
 
 	skel = kprobe_multi_empty__open_and_load();
@@ -728,6 +540,30 @@ cleanup:
 	kprobe_multi_override__destroy(skel);
 }
 
+#ifdef __x86_64__
+static void test_attach_write_ctx(void)
+{
+	struct kprobe_write_ctx *skel = NULL;
+	struct bpf_link *link = NULL;
+
+	skel = kprobe_write_ctx__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "kprobe_write_ctx__open_and_load"))
+		return;
+
+	link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_multi_write_ctx,
+						     "bpf_fentry_test1", NULL);
+	if (!ASSERT_ERR_PTR(link, "bpf_program__attach_kprobe_opts"))
+		bpf_link__destroy(link);
+
+	kprobe_write_ctx__destroy(skel);
+}
+#else
+static void test_attach_write_ctx(void)
+{
+	test__skip();
+}
+#endif
+
 void serial_test_kprobe_multi_bench_attach(void)
 {
 	if (test__start_subtest("kernel"))
@@ -765,5 +601,9 @@ void test_kprobe_multi_test(void)
 		test_session_skel_api();
 	if (test__start_subtest("session_cookie"))
 		test_session_cookie_skel_api();
+	if (test__start_subtest("unique_match"))
+		test_unique_match();
+	if (test__start_subtest("attach_write_ctx"))
+		test_attach_write_ctx();
 	RUN_TESTS(kprobe_multi_verifier);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 77d07e0a4a55..14c5a7ef0e87 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -7,6 +7,7 @@
 
 #include "linked_list.skel.h"
 #include "linked_list_fail.skel.h"
+#include "linked_list_peek.skel.h"
 
 static char log_buf[1024 * 1024];
 
@@ -71,7 +72,7 @@ static struct {
 	{ "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" },
 	{ "obj_new_acq", "Unreleased reference id=" },
 	{ "use_after_drop", "invalid mem access 'scalar'" },
-	{ "ptr_walk_scalar", "type=scalar expected=percpu_ptr_" },
+	{ "ptr_walk_scalar", "type=rdonly_untrusted_mem expected=percpu_ptr_" },
 	{ "direct_read_lock", "direct access to bpf_spin_lock is disallowed" },
 	{ "direct_write_lock", "direct access to bpf_spin_lock is disallowed" },
 	{ "direct_read_head", "direct access to bpf_list_head is disallowed" },
@@ -805,3 +806,8 @@ void test_linked_list(void)
 	test_linked_list_success(LIST_IN_LIST, true);
 	test_linked_list_success(TEST_ALL, false);
 }
+
+void test_linked_list_peek(void)
+{
+	RUN_TESTS(linked_list_peek);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c
new file mode 100644
index 000000000000..72aa5376c30e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "testing_helpers.h"
+#include "livepatch_trampoline.skel.h"
+
+static int load_livepatch(void)
+{
+	char path[4096];
+
+	/* CI will set KBUILD_OUTPUT */
+	snprintf(path, sizeof(path), "%s/samples/livepatch/livepatch-sample.ko",
+		 getenv("KBUILD_OUTPUT") ? : "../../../..");
+
+	return load_module(path, env_verbosity > VERBOSE_NONE);
+}
+
+static void unload_livepatch(void)
+{
+	/* Disable the livepatch before unloading the module */
+	system("echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled");
+
+	unload_module("livepatch_sample", env_verbosity > VERBOSE_NONE);
+}
+
+static void read_proc_cmdline(void)
+{
+	char buf[4096];
+	int fd, ret;
+
+	fd = open("/proc/cmdline", O_RDONLY);
+	if (!ASSERT_OK_FD(fd, "open /proc/cmdline"))
+		return;
+
+	ret = read(fd, buf, sizeof(buf));
+	if (!ASSERT_GT(ret, 0, "read /proc/cmdline"))
+		goto out;
+
+	ASSERT_OK(strncmp(buf, "this has been live patched", 26), "strncmp");
+
+out:
+	close(fd);
+}
+
+static void __test_livepatch_trampoline(bool fexit_first)
+{
+	struct livepatch_trampoline *skel = NULL;
+	int err;
+
+	skel = livepatch_trampoline__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		goto out;
+
+	skel->bss->my_pid = getpid();
+
+	if (!fexit_first) {
+		/* fentry program is loaded first by default */
+		err = livepatch_trampoline__attach(skel);
+		if (!ASSERT_OK(err, "skel_attach"))
+			goto out;
+	} else {
+		/* Manually load fexit program first. */
+		skel->links.fexit_cmdline = bpf_program__attach(skel->progs.fexit_cmdline);
+		if (!ASSERT_OK_PTR(skel->links.fexit_cmdline, "attach_fexit"))
+			goto out;
+
+		skel->links.fentry_cmdline = bpf_program__attach(skel->progs.fentry_cmdline);
+		if (!ASSERT_OK_PTR(skel->links.fentry_cmdline, "attach_fentry"))
+			goto out;
+	}
+
+	read_proc_cmdline();
+
+	ASSERT_EQ(skel->bss->fentry_hit, 1, "fentry_hit");
+	ASSERT_EQ(skel->bss->fexit_hit, 1, "fexit_hit");
+out:
+	livepatch_trampoline__destroy(skel);
+}
+
+void test_livepatch_trampoline(void)
+{
+	int retry_cnt = 0;
+
+retry:
+	if (load_livepatch()) {
+		if (retry_cnt) {
+			ASSERT_OK(1, "load_livepatch");
+			goto out;
+		}
+		/*
+		 * Something else (previous run of the same test?) loaded
+		 * the KLP module. Unload the KLP module and retry.
+		 */
+		unload_livepatch();
+		retry_cnt++;
+		goto retry;
+	}
+
+	if (test__start_subtest("fentry_first"))
+		__test_livepatch_trampoline(false);
+
+	if (test__start_subtest("fexit_first"))
+		__test_livepatch_trampoline(true);
+out:
+	unload_livepatch();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c
index 169ce689b97c..d6f14a232002 100644
--- a/tools/testing/selftests/bpf/prog_tests/log_buf.c
+++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c
@@ -7,6 +7,10 @@
 #include "test_log_buf.skel.h"
 #include "bpf_util.h"
 
+#if !defined(__clang__)
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
 static size_t libbpf_log_pos;
 static char libbpf_log_buf[1024 * 1024];
 static bool libbpf_log_error;
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
index fb1eb8c67361..ccec0fcdabc1 100644
--- a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
@@ -5,7 +5,6 @@
 
 #include <time.h>
 #include <net/if.h>
-#include <linux/if_tun.h>
 #include <linux/icmp.h>
 
 #include "test_progs.h"
@@ -37,34 +36,6 @@ static inline int netns_delete(void)
 	return system("ip netns del " NETNS ">/dev/null 2>&1");
 }
 
-static int open_tuntap(const char *dev_name, bool need_mac)
-{
-	int err = 0;
-	struct ifreq ifr;
-	int fd = open("/dev/net/tun", O_RDWR);
-
-	if (!ASSERT_GT(fd, 0, "open(/dev/net/tun)"))
-		return -1;
-
-	ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
-	strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1);
-	ifr.ifr_name[IFNAMSIZ - 1] = '\0';
-
-	err = ioctl(fd, TUNSETIFF, &ifr);
-	if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
-		close(fd);
-		return -1;
-	}
-
-	err = fcntl(fd, F_SETFL, O_NONBLOCK);
-	if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
-		close(fd);
-		return -1;
-	}
-
-	return fd;
-}
-
 #define ICMP_PAYLOAD_SIZE     100
 
 /* Match an ICMP packet with payload len ICMP_PAYLOAD_SIZE */
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c
new file mode 100644
index 000000000000..b6391af5f6f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_ip_encap.c
@@ -0,0 +1,540 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <netinet/in.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+
+#define BPF_FILE "test_lwt_ip_encap.bpf.o"
+
+#define NETNS_NAME_SIZE	32
+#define NETNS_BASE	"ns-lwt-ip-encap"
+
+#define IP4_ADDR_1 "172.16.1.100"
+#define IP4_ADDR_2 "172.16.2.100"
+#define IP4_ADDR_3 "172.16.3.100"
+#define IP4_ADDR_4 "172.16.4.100"
+#define IP4_ADDR_5 "172.16.5.100"
+#define IP4_ADDR_6 "172.16.6.100"
+#define IP4_ADDR_7 "172.16.7.100"
+#define IP4_ADDR_8 "172.16.8.100"
+#define IP4_ADDR_GRE "172.16.16.100"
+
+#define IP4_ADDR_SRC IP4_ADDR_1
+#define IP4_ADDR_DST IP4_ADDR_4
+
+#define IP6_ADDR_1 "fb01::1"
+#define IP6_ADDR_2 "fb02::1"
+#define IP6_ADDR_3 "fb03::1"
+#define IP6_ADDR_4 "fb04::1"
+#define IP6_ADDR_5 "fb05::1"
+#define IP6_ADDR_6 "fb06::1"
+#define IP6_ADDR_7 "fb07::1"
+#define IP6_ADDR_8 "fb08::1"
+#define IP6_ADDR_GRE "fb10::1"
+
+#define IP6_ADDR_SRC IP6_ADDR_1
+#define IP6_ADDR_DST IP6_ADDR_4
+
+/* Setup/topology:
+ *
+ *    NS1             NS2             NS3
+ *   veth1 <---> veth2   veth3 <---> veth4 (the top route)
+ *   veth5 <---> veth6   veth7 <---> veth8 (the bottom route)
+ *
+ *   Each vethN gets IP[4|6]_ADDR_N address.
+ *
+ *   IP*_ADDR_SRC = IP*_ADDR_1
+ *   IP*_ADDR_DST = IP*_ADDR_4
+ *
+ *   All tests test pings from IP*_ADDR__SRC to IP*_ADDR_DST.
+ *
+ *   By default, routes are configured to allow packets to go
+ *   IP*_ADDR_1 <=> IP*_ADDR_2 <=> IP*_ADDR_3 <=> IP*_ADDR_4 (the top route).
+ *
+ *   A GRE device is installed in NS3 with IP*_ADDR_GRE, and
+ *   NS1/NS2 are configured to route packets to IP*_ADDR_GRE via IP*_ADDR_8
+ *   (the bottom route).
+ *
+ * Tests:
+ *
+ *   1. Routes NS2->IP*_ADDR_DST are brought down, so the only way a ping
+ *      from IP*_ADDR_SRC to IP*_ADDR_DST can work is via IP*_ADDR_GRE.
+ *
+ *   2a. In an egress test, a bpf LWT_XMIT program is installed on veth1
+ *       that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE.
+ *
+ *       ping: SRC->[encap at veth1:egress]->GRE:decap->DST
+ *       ping replies go DST->SRC directly
+ *
+ *   2b. In an ingress test, a bpf LWT_IN program is installed on veth2
+ *       that encaps the packets with an IP/GRE header to route to IP*_ADDR_GRE.
+ *
+ *       ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
+ *       ping replies go DST->SRC directly
+ */
+
+static int create_ns(char *name, size_t name_sz)
+{
+	if (!name)
+		goto fail;
+
+	if (!ASSERT_OK(append_tid(name, name_sz), "append TID"))
+		goto fail;
+
+	SYS(fail, "ip netns add %s", name);
+
+	/* rp_filter gets confused by what these tests are doing, so disable it */
+	SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.all.rp_filter=0", name);
+	SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.conf.default.rp_filter=0", name);
+	/* Disable IPv6 DAD because it sometimes takes too long and fails tests */
+	SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.accept_dad=0", name);
+	SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.default.accept_dad=0", name);
+
+	return 0;
+fail:
+	return -1;
+}
+
+static int set_top_addr(const char *ns1, const char *ns2, const char *ns3)
+{
+	SYS(fail, "ip -n %s    a add %s/24  dev veth1", ns1, IP4_ADDR_1);
+	SYS(fail, "ip -n %s    a add %s/24  dev veth2", ns2, IP4_ADDR_2);
+	SYS(fail, "ip -n %s    a add %s/24  dev veth3", ns2, IP4_ADDR_3);
+	SYS(fail, "ip -n %s    a add %s/24  dev veth4", ns3, IP4_ADDR_4);
+	SYS(fail, "ip -n %s -6 a add %s/128 dev veth1", ns1, IP6_ADDR_1);
+	SYS(fail, "ip -n %s -6 a add %s/128 dev veth2", ns2, IP6_ADDR_2);
+	SYS(fail, "ip -n %s -6 a add %s/128 dev veth3", ns2, IP6_ADDR_3);
+	SYS(fail, "ip -n %s -6 a add %s/128 dev veth4", ns3, IP6_ADDR_4);
+
+	SYS(fail, "ip -n %s link set dev veth1 up", ns1);
+	SYS(fail, "ip -n %s link set dev veth2 up", ns2);
+	SYS(fail, "ip -n %s link set dev veth3 up", ns2);
+	SYS(fail, "ip -n %s link set dev veth4 up", ns3);
+
+	return 0;
+fail:
+	return 1;
+}
+
+static int set_bottom_addr(const char *ns1, const char *ns2, const char *ns3)
+{
+	SYS(fail, "ip -n %s    a add %s/24  dev veth5", ns1, IP4_ADDR_5);
+	SYS(fail, "ip -n %s    a add %s/24  dev veth6", ns2, IP4_ADDR_6);
+	SYS(fail, "ip -n %s    a add %s/24  dev veth7", ns2, IP4_ADDR_7);
+	SYS(fail, "ip -n %s    a add %s/24  dev veth8", ns3, IP4_ADDR_8);
+	SYS(fail, "ip -n %s -6 a add %s/128 dev veth5", ns1, IP6_ADDR_5);
+	SYS(fail, "ip -n %s -6 a add %s/128 dev veth6", ns2, IP6_ADDR_6);
+	SYS(fail, "ip -n %s -6 a add %s/128 dev veth7", ns2, IP6_ADDR_7);
+	SYS(fail, "ip -n %s -6 a add %s/128 dev veth8", ns3, IP6_ADDR_8);
+
+	SYS(fail, "ip -n %s link set dev veth5 up", ns1);
+	SYS(fail, "ip -n %s link set dev veth6 up", ns2);
+	SYS(fail, "ip -n %s link set dev veth7 up", ns2);
+	SYS(fail, "ip -n %s link set dev veth8 up", ns3);
+
+	return 0;
+fail:
+	return 1;
+}
+
+static int configure_vrf(const char *ns1, const char *ns2)
+{
+	if (!ns1 || !ns2)
+		goto fail;
+
+	SYS(fail, "ip -n %s link add red type vrf table 1001", ns1);
+	SYS(fail, "ip -n %s link set red up", ns1);
+	SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns1);
+	SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns1);
+	SYS(fail, "ip -n %s link set veth1 vrf red", ns1);
+	SYS(fail, "ip -n %s link set veth5 vrf red", ns1);
+
+	SYS(fail, "ip -n %s link add red type vrf table 1001", ns2);
+	SYS(fail, "ip -n %s link set red up", ns2);
+	SYS(fail, "ip -n %s route add table 1001 unreachable default metric 8192", ns2);
+	SYS(fail, "ip -n %s -6 route add table 1001 unreachable default metric 8192", ns2);
+	SYS(fail, "ip -n %s link set veth2 vrf red", ns2);
+	SYS(fail, "ip -n %s link set veth3 vrf red", ns2);
+	SYS(fail, "ip -n %s link set veth6 vrf red", ns2);
+	SYS(fail, "ip -n %s link set veth7 vrf red", ns2);
+
+	return 0;
+fail:
+	return -1;
+}
+
+static int configure_ns1(const char *ns1, const char *vrf)
+{
+	struct nstoken *nstoken = NULL;
+
+	if (!ns1 || !vrf)
+		goto fail;
+
+	nstoken = open_netns(ns1);
+	if (!ASSERT_OK_PTR(nstoken, "open ns1"))
+		goto fail;
+
+	/* Top route */
+	SYS(fail, "ip    route add %s/32  dev veth1 %s", IP4_ADDR_2, vrf);
+	SYS(fail, "ip    route add default dev veth1 via %s %s", IP4_ADDR_2, vrf);
+	SYS(fail, "ip -6 route add %s/128 dev veth1 %s", IP6_ADDR_2, vrf);
+	SYS(fail, "ip -6 route add default dev veth1 via %s %s", IP6_ADDR_2, vrf);
+	/* Bottom route */
+	SYS(fail, "ip    route add %s/32  dev veth5 %s", IP4_ADDR_6, vrf);
+	SYS(fail, "ip    route add %s/32  dev veth5 via  %s %s", IP4_ADDR_7, IP4_ADDR_6, vrf);
+	SYS(fail, "ip    route add %s/32  dev veth5 via  %s %s", IP4_ADDR_8, IP4_ADDR_6, vrf);
+	SYS(fail, "ip -6 route add %s/128 dev veth5 %s", IP6_ADDR_6, vrf);
+	SYS(fail, "ip -6 route add %s/128 dev veth5 via  %s %s", IP6_ADDR_7, IP6_ADDR_6, vrf);
+	SYS(fail, "ip -6 route add %s/128 dev veth5 via  %s %s", IP6_ADDR_8, IP6_ADDR_6, vrf);
+
+	close_netns(nstoken);
+	return 0;
+fail:
+	close_netns(nstoken);
+	return -1;
+}
+
+static int configure_ns2(const char *ns2, const char *vrf)
+{
+	struct nstoken *nstoken = NULL;
+
+	if (!ns2 || !vrf)
+		goto fail;
+
+	nstoken = open_netns(ns2);
+	if (!ASSERT_OK_PTR(nstoken, "open ns2"))
+		goto fail;
+
+	SYS(fail, "ip netns exec %s sysctl -wq net.ipv4.ip_forward=1", ns2);
+	SYS(fail, "ip netns exec %s sysctl -wq net.ipv6.conf.all.forwarding=1", ns2);
+
+	/* Top route */
+	SYS(fail, "ip    route add %s/32  dev veth2 %s", IP4_ADDR_1, vrf);
+	SYS(fail, "ip    route add %s/32  dev veth3 %s", IP4_ADDR_4, vrf);
+	SYS(fail, "ip -6 route add %s/128 dev veth2 %s", IP6_ADDR_1, vrf);
+	SYS(fail, "ip -6 route add %s/128 dev veth3 %s", IP6_ADDR_4, vrf);
+	/* Bottom route */
+	SYS(fail, "ip    route add %s/32  dev veth6 %s", IP4_ADDR_5, vrf);
+	SYS(fail, "ip    route add %s/32  dev veth7 %s", IP4_ADDR_8, vrf);
+	SYS(fail, "ip -6 route add %s/128 dev veth6 %s", IP6_ADDR_5, vrf);
+	SYS(fail, "ip -6 route add %s/128 dev veth7 %s", IP6_ADDR_8, vrf);
+
+	close_netns(nstoken);
+	return 0;
+fail:
+	close_netns(nstoken);
+	return -1;
+}
+
+static int configure_ns3(const char *ns3)
+{
+	struct nstoken *nstoken = NULL;
+
+	if (!ns3)
+		goto fail;
+
+	nstoken = open_netns(ns3);
+	if (!ASSERT_OK_PTR(nstoken, "open ns3"))
+		goto fail;
+
+	/* Top route */
+	SYS(fail, "ip    route add %s/32  dev veth4", IP4_ADDR_3);
+	SYS(fail, "ip    route add %s/32  dev veth4 via  %s", IP4_ADDR_1, IP4_ADDR_3);
+	SYS(fail, "ip    route add %s/32  dev veth4 via  %s", IP4_ADDR_2, IP4_ADDR_3);
+	SYS(fail, "ip -6 route add %s/128 dev veth4", IP6_ADDR_3);
+	SYS(fail, "ip -6 route add %s/128 dev veth4 via  %s", IP6_ADDR_1, IP6_ADDR_3);
+	SYS(fail, "ip -6 route add %s/128 dev veth4 via  %s", IP6_ADDR_2, IP6_ADDR_3);
+	/* Bottom route */
+	SYS(fail, "ip    route add %s/32  dev veth8", IP4_ADDR_7);
+	SYS(fail, "ip    route add %s/32  dev veth8 via  %s", IP4_ADDR_5, IP4_ADDR_7);
+	SYS(fail, "ip    route add %s/32  dev veth8 via  %s", IP4_ADDR_6, IP4_ADDR_7);
+	SYS(fail, "ip -6 route add %s/128 dev veth8", IP6_ADDR_7);
+	SYS(fail, "ip -6 route add %s/128 dev veth8 via  %s", IP6_ADDR_5, IP6_ADDR_7);
+	SYS(fail, "ip -6 route add %s/128 dev veth8 via  %s", IP6_ADDR_6, IP6_ADDR_7);
+
+	/* Configure IPv4 GRE device */
+	SYS(fail, "ip tunnel add gre_dev mode gre remote %s local %s ttl 255",
+	    IP4_ADDR_1, IP4_ADDR_GRE);
+	SYS(fail, "ip link set gre_dev up");
+	SYS(fail, "ip a add %s dev gre_dev", IP4_ADDR_GRE);
+
+	/* Configure IPv6 GRE device */
+	SYS(fail, "ip tunnel add gre6_dev mode ip6gre remote %s local %s ttl 255",
+	    IP6_ADDR_1, IP6_ADDR_GRE);
+	SYS(fail, "ip link set gre6_dev up");
+	SYS(fail, "ip a add %s dev gre6_dev", IP6_ADDR_GRE);
+
+	close_netns(nstoken);
+	return 0;
+fail:
+	close_netns(nstoken);
+	return -1;
+}
+
+static int setup_network(char *ns1, char *ns2, char *ns3, const char *vrf)
+{
+	if (!ns1 || !ns2 || !ns3 || !vrf)
+		goto fail;
+
+	SYS(fail, "ip -n %s link add veth1 type veth peer name veth2 netns %s", ns1, ns2);
+	SYS(fail, "ip -n %s link add veth3 type veth peer name veth4 netns %s", ns2, ns3);
+	SYS(fail, "ip -n %s link add veth5 type veth peer name veth6 netns %s", ns1, ns2);
+	SYS(fail, "ip -n %s link add veth7 type veth peer name veth8 netns %s", ns2, ns3);
+
+	if (vrf[0]) {
+		if (!ASSERT_OK(configure_vrf(ns1, ns2), "configure vrf"))
+			goto fail;
+	}
+	if (!ASSERT_OK(set_top_addr(ns1, ns2, ns3), "set top addresses"))
+		goto fail;
+
+	if (!ASSERT_OK(set_bottom_addr(ns1, ns2, ns3), "set bottom addresses"))
+		goto fail;
+
+	if (!ASSERT_OK(configure_ns1(ns1, vrf), "configure ns1 routes"))
+		goto fail;
+
+	if (!ASSERT_OK(configure_ns2(ns2, vrf), "configure ns2 routes"))
+		goto fail;
+
+	if (!ASSERT_OK(configure_ns3(ns3), "configure ns3 routes"))
+		goto fail;
+
+	/* Link bottom route to the GRE tunnels */
+	SYS(fail, "ip -n %s route add %s/32 dev veth5 via %s %s",
+	    ns1, IP4_ADDR_GRE, IP4_ADDR_6, vrf);
+	SYS(fail, "ip -n %s route add %s/32 dev veth7 via %s %s",
+	    ns2, IP4_ADDR_GRE, IP4_ADDR_8, vrf);
+	SYS(fail, "ip -n %s -6 route add %s/128 dev veth5 via %s %s",
+	    ns1, IP6_ADDR_GRE, IP6_ADDR_6, vrf);
+	SYS(fail, "ip -n %s -6 route add %s/128 dev veth7 via %s %s",
+	    ns2, IP6_ADDR_GRE, IP6_ADDR_8, vrf);
+
+	return 0;
+fail:
+	return -1;
+}
+
+static int remove_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf)
+{
+	SYS(fail, "ip -n %s route del %s dev veth5 %s", ns1, IP4_ADDR_GRE, vrf);
+	SYS(fail, "ip -n %s route del %s dev veth7 %s", ns2, IP4_ADDR_GRE, vrf);
+	SYS(fail, "ip -n %s -6 route del %s/128 dev veth5 %s", ns1, IP6_ADDR_GRE, vrf);
+	SYS(fail, "ip -n %s -6 route del %s/128 dev veth7 %s", ns2, IP6_ADDR_GRE, vrf);
+
+	return 0;
+fail:
+	return -1;
+}
+
+static int add_unreachable_routes_to_gredev(const char *ns1, const char *ns2, const char *vrf)
+{
+	SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns1, IP4_ADDR_GRE, vrf);
+	SYS(fail, "ip -n %s route add unreachable %s/32 %s", ns2, IP4_ADDR_GRE, vrf);
+	SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns1, IP6_ADDR_GRE, vrf);
+	SYS(fail, "ip -n %s -6 route add unreachable %s/128 %s", ns2, IP6_ADDR_GRE, vrf);
+
+	return 0;
+fail:
+	return -1;
+}
+
+#define GSO_SIZE 5000
+#define GSO_TCP_PORT 9000
+/* This tests the fix from commit ea0371f78799 ("net: fix GSO in bpf_lwt_push_ip_encap") */
+static int test_gso_fix(const char *ns1, const char *ns3, int family)
+{
+	const char *ip_addr = family == AF_INET ? IP4_ADDR_DST : IP6_ADDR_DST;
+	char gso_packet[GSO_SIZE] = {};
+	struct nstoken *nstoken = NULL;
+	int sfd, cfd, afd;
+	ssize_t bytes;
+	int ret = -1;
+
+	if (!ns1 || !ns3)
+		return ret;
+
+	nstoken = open_netns(ns3);
+	if (!ASSERT_OK_PTR(nstoken, "open ns3"))
+		return ret;
+
+	sfd = start_server_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL);
+	if (!ASSERT_OK_FD(sfd, "start server"))
+		goto close_netns;
+
+	close_netns(nstoken);
+
+	nstoken = open_netns(ns1);
+	if (!ASSERT_OK_PTR(nstoken, "open ns1"))
+		goto close_server;
+
+	cfd = connect_to_addr_str(family, SOCK_STREAM, ip_addr, GSO_TCP_PORT, NULL);
+	if (!ASSERT_OK_FD(cfd, "connect to server"))
+		goto close_server;
+
+	close_netns(nstoken);
+	nstoken = NULL;
+
+	afd = accept(sfd, NULL, NULL);
+	if (!ASSERT_OK_FD(afd, "accept"))
+		goto close_client;
+
+	/* Send a packet larger than MTU */
+	bytes = send(cfd, gso_packet, GSO_SIZE, 0);
+	if (!ASSERT_EQ(bytes, GSO_SIZE, "send packet"))
+		goto close_accept;
+
+	/* Verify we received all expected bytes */
+	bytes = read(afd, gso_packet, GSO_SIZE);
+	if (!ASSERT_EQ(bytes, GSO_SIZE, "receive packet"))
+		goto close_accept;
+
+	ret = 0;
+
+close_accept:
+	close(afd);
+close_client:
+	close(cfd);
+close_server:
+	close(sfd);
+close_netns:
+	close_netns(nstoken);
+
+	return ret;
+}
+
+static int check_ping_ok(const char *ns1)
+{
+	SYS(fail, "ip netns exec %s ping -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP4_ADDR_DST);
+	SYS(fail, "ip netns exec %s ping6 -c 1 -W1 -I veth1 %s > /dev/null", ns1, IP6_ADDR_DST);
+	return 0;
+fail:
+	return -1;
+}
+
+static int check_ping_fails(const char *ns1)
+{
+	int ret;
+
+	ret = SYS_NOFAIL("ip netns exec %s ping -c 1 -W1 -I veth1 %s", ns1, IP4_ADDR_DST);
+	if (!ret)
+		return -1;
+
+	ret = SYS_NOFAIL("ip netns exec %s ping6 -c 1 -W1 -I veth1 %s", ns1, IP6_ADDR_DST);
+	if (!ret)
+		return -1;
+
+	return 0;
+}
+
+#define EGRESS true
+#define INGRESS false
+#define IPV4_ENCAP true
+#define IPV6_ENCAP false
+static void lwt_ip_encap(bool ipv4_encap, bool egress, const char *vrf)
+{
+	char ns1[NETNS_NAME_SIZE] = NETNS_BASE "-1-";
+	char ns2[NETNS_NAME_SIZE] = NETNS_BASE "-2-";
+	char ns3[NETNS_NAME_SIZE] = NETNS_BASE "-3-";
+	char *sec = ipv4_encap ?  "encap_gre" : "encap_gre6";
+
+	if (!vrf)
+		return;
+
+	if (!ASSERT_OK(create_ns(ns1, NETNS_NAME_SIZE), "create ns1"))
+		goto out;
+	if (!ASSERT_OK(create_ns(ns2, NETNS_NAME_SIZE), "create ns2"))
+		goto out;
+	if (!ASSERT_OK(create_ns(ns3, NETNS_NAME_SIZE), "create ns3"))
+		goto out;
+
+	if (!ASSERT_OK(setup_network(ns1, ns2, ns3, vrf), "setup network"))
+		goto out;
+
+	/* By default, pings work */
+	if (!ASSERT_OK(check_ping_ok(ns1), "ping OK"))
+		goto out;
+
+	/* Remove NS2->DST routes, ping fails */
+	SYS(out, "ip -n %s    route del %s/32  dev veth3 %s", ns2, IP4_ADDR_DST, vrf);
+	SYS(out, "ip -n %s -6 route del %s/128 dev veth3 %s", ns2, IP6_ADDR_DST, vrf);
+	if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail"))
+		goto out;
+
+	/* Install replacement routes (LWT/eBPF), pings succeed */
+	if (egress) {
+		SYS(out, "ip -n %s route add %s encap bpf xmit obj %s sec %s dev veth1 %s",
+		    ns1, IP4_ADDR_DST, BPF_FILE, sec, vrf);
+		SYS(out, "ip -n %s -6 route add %s encap bpf xmit obj %s sec %s dev veth1 %s",
+		    ns1, IP6_ADDR_DST, BPF_FILE, sec, vrf);
+	} else {
+		SYS(out, "ip -n %s route add %s encap bpf in obj %s sec %s dev veth2 %s",
+		    ns2, IP4_ADDR_DST, BPF_FILE, sec, vrf);
+		SYS(out, "ip -n %s -6 route add %s encap bpf in obj %s sec %s dev veth2 %s",
+		    ns2, IP6_ADDR_DST, BPF_FILE, sec, vrf);
+	}
+
+	if (!ASSERT_OK(check_ping_ok(ns1), "ping OK"))
+		goto out;
+
+	/* Skip GSO tests with VRF: VRF routing needs properly assigned
+	 * source IP/device, which is easy to do with ping but hard with TCP.
+	 */
+	if (egress && !vrf[0]) {
+		if (!ASSERT_OK(test_gso_fix(ns1, ns3, AF_INET), "test GSO"))
+			goto out;
+	}
+
+	/* Negative test: remove routes to GRE devices: ping fails */
+	if (!ASSERT_OK(remove_routes_to_gredev(ns1, ns2, vrf), "remove routes to gredev"))
+		goto out;
+	if (!ASSERT_OK(check_ping_fails(ns1), "ping expected fail"))
+		goto out;
+
+	/* Another negative test */
+	if (!ASSERT_OK(add_unreachable_routes_to_gredev(ns1, ns2, vrf),
+		       "add unreachable routes"))
+		goto out;
+	ASSERT_OK(check_ping_fails(ns1), "ping expected fail");
+
+out:
+	SYS_NOFAIL("ip netns del %s", ns1);
+	SYS_NOFAIL("ip netns del %s", ns2);
+	SYS_NOFAIL("ip netns del %s", ns3);
+}
+
+void test_lwt_ip_encap_vrf_ipv6(void)
+{
+	if (test__start_subtest("egress"))
+		lwt_ip_encap(IPV6_ENCAP, EGRESS, "vrf red");
+
+	if (test__start_subtest("ingress"))
+		lwt_ip_encap(IPV6_ENCAP, INGRESS, "vrf red");
+}
+
+void test_lwt_ip_encap_vrf_ipv4(void)
+{
+	if (test__start_subtest("egress"))
+		lwt_ip_encap(IPV4_ENCAP, EGRESS, "vrf red");
+
+	if (test__start_subtest("ingress"))
+		lwt_ip_encap(IPV4_ENCAP, INGRESS, "vrf red");
+}
+
+void test_lwt_ip_encap_ipv6(void)
+{
+	if (test__start_subtest("egress"))
+		lwt_ip_encap(IPV6_ENCAP, EGRESS, "");
+
+	if (test__start_subtest("ingress"))
+		lwt_ip_encap(IPV6_ENCAP, INGRESS, "");
+}
+
+void test_lwt_ip_encap_ipv4(void)
+{
+	if (test__start_subtest("egress"))
+		lwt_ip_encap(IPV4_ENCAP, EGRESS, "");
+
+	if (test__start_subtest("ingress"))
+		lwt_ip_encap(IPV4_ENCAP, INGRESS, "");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c
new file mode 100644
index 000000000000..3bc730b7c7fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_seg6local.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Connects 6 network namespaces through veths.
+ * Each NS may have different IPv6 global scope addresses :
+ *
+ *          NS1            NS2             NS3              NS4               NS5             NS6
+ *      lo  veth1 <-> veth2 veth3 <-> veth4 veth5 <-> veth6 lo veth7 <-> veth8 veth9 <-> veth10 lo
+ * fb00 ::1  ::12      ::21  ::34      ::43  ::56      ::65     ::78      ::87  ::910     ::109  ::6
+ * fd00                                                                                          ::4
+ * fc42                                                     ::1
+ *
+ * All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
+ * IPv6 header with a Segment Routing Header, with segments :
+ *	fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
+ *
+ * 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
+ * - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
+ * - fd00::2 : remove the TLV, change the flags, add a tag
+ * - fd00::3 : apply an End.T action to fd00::4, through routing table 117
+ *
+ * fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
+ * Each End.BPF action will validate the operations applied on the SRH by the
+ * previous BPF program in the chain, otherwise the packet is dropped.
+ *
+ * An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
+ * datagram can be read on NS6 when binding to fb00::6.
+ */
+
+#include "network_helpers.h"
+#include "test_progs.h"
+
+#define NETNS_BASE "lwt-seg6local-"
+#define BPF_FILE "test_lwt_seg6local.bpf.o"
+
+static void cleanup(void)
+{
+	int ns;
+
+	for (ns = 1; ns < 7; ns++)
+		SYS_NOFAIL("ip netns del %s%d", NETNS_BASE, ns);
+}
+
+static int setup(void)
+{
+	int ns;
+
+	for (ns = 1; ns < 7; ns++)
+		SYS(fail, "ip netns add %s%d", NETNS_BASE, ns);
+
+	SYS(fail, "ip -n %s6 link set dev lo up", NETNS_BASE);
+
+	for (ns = 1; ns < 6; ns++) {
+		int local_id = ns * 2 - 1;
+		int peer_id = ns * 2;
+		int next_ns = ns + 1;
+
+		SYS(fail, "ip -n %s%d link add veth%d type veth peer name veth%d netns %s%d",
+		    NETNS_BASE, ns, local_id, peer_id, NETNS_BASE, next_ns);
+
+		SYS(fail, "ip -n %s%d link set dev veth%d up", NETNS_BASE, ns, local_id);
+		SYS(fail, "ip -n %s%d link set dev veth%d up", NETNS_BASE, next_ns, peer_id);
+
+		/* All link scope addresses to veths */
+		SYS(fail, "ip -n %s%d -6 addr add fb00::%d%d/16 dev veth%d scope link",
+		    NETNS_BASE, ns, local_id, peer_id, local_id);
+		SYS(fail, "ip -n %s%d -6 addr add fb00::%d%d/16 dev veth%d scope link",
+		    NETNS_BASE, next_ns, peer_id, local_id, peer_id);
+	}
+
+
+	SYS(fail, "ip -n %s5 -6 route add fb00::109 table 117 dev veth9 scope link", NETNS_BASE);
+
+	SYS(fail, "ip -n %s1 -6 addr add fb00::1/16 dev lo", NETNS_BASE);
+	SYS(fail, "ip -n %s1 -6 route add fb00::6 dev veth1 via fb00::21", NETNS_BASE);
+
+	SYS(fail, "ip -n %s2 -6 route add fb00::6 encap bpf in obj %s sec encap_srh dev veth2",
+	    NETNS_BASE, BPF_FILE);
+	SYS(fail, "ip -n %s2 -6 route add fd00::1 dev veth3 via fb00::43 scope link", NETNS_BASE);
+
+	SYS(fail, "ip -n %s3 -6 route add fc42::1 dev veth5 via fb00::65", NETNS_BASE);
+	SYS(fail,
+	    "ip -n %s3 -6 route add fd00::1 encap seg6local action End.BPF endpoint obj %s sec add_egr_x dev veth4",
+	    NETNS_BASE, BPF_FILE);
+
+	SYS(fail,
+	    "ip -n %s4 -6 route add fd00::2 encap seg6local action End.BPF endpoint obj %s sec pop_egr dev veth6",
+	    NETNS_BASE, BPF_FILE);
+	SYS(fail, "ip -n %s4 -6 addr add fc42::1 dev lo", NETNS_BASE);
+	SYS(fail, "ip -n %s4 -6 route add fd00::3 dev veth7 via fb00::87", NETNS_BASE);
+
+	SYS(fail, "ip -n %s5 -6 route add fd00::4 table 117 dev veth9 via fb00::109", NETNS_BASE);
+	SYS(fail,
+	    "ip -n %s5 -6 route add fd00::3 encap seg6local action End.BPF endpoint obj %s sec inspect_t dev veth8",
+	    NETNS_BASE, BPF_FILE);
+
+	SYS(fail, "ip -n %s6 -6 addr add fb00::6/16 dev lo", NETNS_BASE);
+	SYS(fail, "ip -n %s6 -6 addr add fd00::4/16 dev lo", NETNS_BASE);
+
+	for (ns = 1; ns < 6; ns++)
+		SYS(fail, "ip netns exec %s%d sysctl -wq net.ipv6.conf.all.forwarding=1",
+		    NETNS_BASE, ns);
+
+	SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.all.seg6_enabled=1", NETNS_BASE);
+	SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.lo.seg6_enabled=1", NETNS_BASE);
+	SYS(fail, "ip netns exec %s6 sysctl -wq net.ipv6.conf.veth10.seg6_enabled=1", NETNS_BASE);
+
+	return 0;
+fail:
+	return -1;
+}
+
+#define SERVER_PORT 7330
+#define CLIENT_PORT 2121
+void test_lwt_seg6local(void)
+{
+	struct sockaddr_in6 server_addr = {};
+	const char *ns1 = NETNS_BASE "1";
+	const char *ns6 = NETNS_BASE "6";
+	struct nstoken *nstoken = NULL;
+	const char *foobar = "foobar";
+	ssize_t bytes;
+	int sfd, cfd;
+	char buf[7];
+
+	if (!ASSERT_OK(setup(), "setup"))
+		goto out;
+
+	nstoken = open_netns(ns6);
+	if (!ASSERT_OK_PTR(nstoken, "open ns6"))
+		goto out;
+
+	sfd = start_server_str(AF_INET6, SOCK_DGRAM, "fb00::6", SERVER_PORT, NULL);
+	if (!ASSERT_OK_FD(sfd, "start server"))
+		goto close_netns;
+
+	close_netns(nstoken);
+
+	nstoken = open_netns(ns1);
+	if (!ASSERT_OK_PTR(nstoken, "open ns1"))
+		goto close_server;
+
+	cfd = start_server_str(AF_INET6, SOCK_DGRAM, "fb00::1", CLIENT_PORT, NULL);
+	if (!ASSERT_OK_FD(cfd, "start client"))
+		goto close_server;
+
+	close_netns(nstoken);
+	nstoken = NULL;
+
+	/* Send a packet larger than MTU */
+	server_addr.sin6_family = AF_INET6;
+	server_addr.sin6_port = htons(SERVER_PORT);
+	if (!ASSERT_EQ(inet_pton(AF_INET6, "fb00::6", &server_addr.sin6_addr), 1,
+		       "build target addr"))
+		goto close_client;
+
+	bytes = sendto(cfd, foobar, sizeof(foobar), 0,
+		       (struct sockaddr *)&server_addr, sizeof(server_addr));
+	if (!ASSERT_EQ(bytes, sizeof(foobar), "send packet"))
+		goto close_client;
+
+	/* Verify we received all expected bytes */
+	bytes = read(sfd, buf, sizeof(buf));
+	if (!ASSERT_EQ(bytes, sizeof(buf), "receive packet"))
+		goto close_client;
+	ASSERT_STREQ(buf, foobar, "check udp packet");
+
+close_client:
+	close(cfd);
+close_server:
+	close(sfd);
+close_netns:
+	close_netns(nstoken);
+
+out:
+	cleanup();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_excl.c b/tools/testing/selftests/bpf/prog_tests/map_excl.c
new file mode 100644
index 000000000000..6bdc6d6de0da
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_excl.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Google LLC. */
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "map_excl.skel.h"
+
+static void test_map_excl_allowed(void)
+{
+	struct map_excl *skel = map_excl__open();
+	int err;
+
+	err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access);
+	if (!ASSERT_OK(err, "bpf_map__set_exclusive_program"))
+		goto out;
+
+	bpf_program__set_autoload(skel->progs.should_have_access, true);
+	bpf_program__set_autoload(skel->progs.should_not_have_access, false);
+
+	err = map_excl__load(skel);
+	ASSERT_OK(err, "map_excl__load");
+out:
+	map_excl__destroy(skel);
+}
+
+static void test_map_excl_denied(void)
+{
+	struct map_excl *skel = map_excl__open();
+	int err;
+
+	err = bpf_map__set_exclusive_program(skel->maps.excl_map, skel->progs.should_have_access);
+	if (!ASSERT_OK(err, "bpf_map__make_exclusive"))
+		goto out;
+
+	bpf_program__set_autoload(skel->progs.should_have_access, false);
+	bpf_program__set_autoload(skel->progs.should_not_have_access, true);
+
+	err = map_excl__load(skel);
+	ASSERT_EQ(err, -EACCES, "exclusive map access not denied\n");
+out:
+	map_excl__destroy(skel);
+
+}
+
+void test_map_excl(void)
+{
+	if (test__start_subtest("map_excl_allowed"))
+		test_map_excl_allowed();
+	if (test__start_subtest("map_excl_denied"))
+		test_map_excl_denied();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c
new file mode 100644
index 000000000000..40d4f687bd9c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/mem_rdonly_untrusted.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+#include "mem_rdonly_untrusted.skel.h"
+
+void test_mem_rdonly_untrusted(void)
+{
+	RUN_TESTS(mem_rdonly_untrusted);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/missed.c b/tools/testing/selftests/bpf/prog_tests/missed.c
index 70d90c43537c..ed8857ae914a 100644
--- a/tools/testing/selftests/bpf/prog_tests/missed.c
+++ b/tools/testing/selftests/bpf/prog_tests/missed.c
@@ -85,6 +85,7 @@ static void test_missed_kprobe_recursion(void)
 	ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses");
 	ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses");
 	ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test5)), 1, "test5_recursion_misses");
+	ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test6)), 1, "test6_recursion_misses");
 
 cleanup:
 	missed_kprobe_recursion__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 6d391d95f96e..70fa7ae93173 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -90,7 +90,7 @@ void test_module_attach(void)
 
 	test_module_attach__detach(skel);
 
-	/* attach fentry/fexit and make sure it get's module reference */
+	/* attach fentry/fexit and make sure it gets module reference */
 	link = bpf_program__attach(skel->progs.handle_fentry);
 	if (!ASSERT_OK_PTR(link, "attach_fentry"))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index f8eb7f9d4fd2..8fade8bdc451 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -6,11 +6,13 @@
 #include <netinet/in.h>
 #include <test_progs.h>
 #include <unistd.h>
+#include <errno.h>
 #include "cgroup_helpers.h"
 #include "network_helpers.h"
 #include "mptcp_sock.skel.h"
 #include "mptcpify.skel.h"
 #include "mptcp_subflow.skel.h"
+#include "mptcp_sockmap.skel.h"
 
 #define NS_TEST "mptcp_ns"
 #define ADDR_1	"10.0.1.1"
@@ -436,6 +438,142 @@ close_cgroup:
 	close(cgroup_fd);
 }
 
+/* Test sockmap on MPTCP server handling non-mp-capable clients. */
+static void test_sockmap_with_mptcp_fallback(struct mptcp_sockmap *skel)
+{
+	int listen_fd = -1, client_fd1 = -1, client_fd2 = -1;
+	int server_fd1 = -1, server_fd2 = -1, sent, recvd;
+	char snd[9] = "123456789";
+	char rcv[10];
+
+	/* start server with MPTCP enabled */
+	listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+	if (!ASSERT_OK_FD(listen_fd, "sockmap-fb:start_mptcp_server"))
+		return;
+
+	skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd));
+	skel->bss->sk_index = 0;
+	/* create client without MPTCP enabled */
+	client_fd1 = connect_to_fd_opts(listen_fd, NULL);
+	if (!ASSERT_OK_FD(client_fd1, "sockmap-fb:connect_to_fd"))
+		goto end;
+
+	server_fd1 = accept(listen_fd, NULL, 0);
+	skel->bss->sk_index = 1;
+	client_fd2 = connect_to_fd_opts(listen_fd, NULL);
+	if (!ASSERT_OK_FD(client_fd2, "sockmap-fb:connect_to_fd"))
+		goto end;
+
+	server_fd2 = accept(listen_fd, NULL, 0);
+	/* test normal redirect behavior: data sent by client_fd1 can be
+	 * received by client_fd2
+	 */
+	skel->bss->redirect_idx = 1;
+	sent = send(client_fd1, snd, sizeof(snd), 0);
+	if (!ASSERT_EQ(sent, sizeof(snd), "sockmap-fb:send(client_fd1)"))
+		goto end;
+
+	/* try to recv more bytes to avoid truncation check */
+	recvd = recv(client_fd2, rcv, sizeof(rcv), 0);
+	if (!ASSERT_EQ(recvd, sizeof(snd), "sockmap-fb:recv(client_fd2)"))
+		goto end;
+
+end:
+	if (client_fd1 >= 0)
+		close(client_fd1);
+	if (client_fd2 >= 0)
+		close(client_fd2);
+	if (server_fd1 >= 0)
+		close(server_fd1);
+	if (server_fd2 >= 0)
+		close(server_fd2);
+	close(listen_fd);
+}
+
+/* Test sockmap rejection of MPTCP sockets - both server and client sides. */
+static void test_sockmap_reject_mptcp(struct mptcp_sockmap *skel)
+{
+	int listen_fd = -1, server_fd = -1, client_fd1 = -1;
+	int err, zero = 0;
+
+	/* start server with MPTCP enabled */
+	listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+	if (!ASSERT_OK_FD(listen_fd, "start_mptcp_server"))
+		return;
+
+	skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd));
+	skel->bss->sk_index = 0;
+	/* create client with MPTCP enabled */
+	client_fd1 = connect_to_fd(listen_fd, 0);
+	if (!ASSERT_OK_FD(client_fd1, "connect_to_fd client_fd1"))
+		goto end;
+
+	/* bpf_sock_map_update() called from sockops should reject MPTCP sk */
+	if (!ASSERT_EQ(skel->bss->helper_ret, -EOPNOTSUPP, "should reject"))
+		goto end;
+
+	server_fd = accept(listen_fd, NULL, 0);
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map),
+				  &zero, &server_fd, BPF_NOEXIST);
+	if (!ASSERT_EQ(err, -EOPNOTSUPP, "server should be disallowed"))
+		goto end;
+
+	/* MPTCP client should also be disallowed */
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map),
+				  &zero, &client_fd1, BPF_NOEXIST);
+	if (!ASSERT_EQ(err, -EOPNOTSUPP, "client should be disallowed"))
+		goto end;
+end:
+	if (client_fd1 >= 0)
+		close(client_fd1);
+	if (server_fd >= 0)
+		close(server_fd);
+	close(listen_fd);
+}
+
+static void test_mptcp_sockmap(void)
+{
+	struct mptcp_sockmap *skel;
+	struct netns_obj *netns;
+	int cgroup_fd, err;
+
+	cgroup_fd = test__join_cgroup("/mptcp_sockmap");
+	if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_sockmap"))
+		return;
+
+	skel = mptcp_sockmap__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_sockmap"))
+		goto close_cgroup;
+
+	skel->links.mptcp_sockmap_inject =
+		bpf_program__attach_cgroup(skel->progs.mptcp_sockmap_inject, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel->links.mptcp_sockmap_inject, "attach sockmap"))
+		goto skel_destroy;
+
+	err = bpf_prog_attach(bpf_program__fd(skel->progs.mptcp_sockmap_redirect),
+			      bpf_map__fd(skel->maps.sock_map),
+			      BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
+		goto skel_destroy;
+
+	netns = netns_new(NS_TEST, true);
+	if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_sockmap"))
+		goto skel_destroy;
+
+	if (endpoint_init("subflow") < 0)
+		goto close_netns;
+
+	test_sockmap_with_mptcp_fallback(skel);
+	test_sockmap_reject_mptcp(skel);
+
+close_netns:
+	netns_free(netns);
+skel_destroy:
+	mptcp_sockmap__destroy(skel);
+close_cgroup:
+	close(cgroup_fd);
+}
+
 void test_mptcp(void)
 {
 	if (test__start_subtest("base"))
@@ -444,4 +582,6 @@ void test_mptcp(void)
 		test_mptcpify();
 	if (test__start_subtest("subflow"))
 		test_subflow();
+	if (test__start_subtest("sockmap"))
+		test_mptcp_sockmap();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/net_timestamping.c b/tools/testing/selftests/bpf/prog_tests/net_timestamping.c
new file mode 100644
index 000000000000..dbfd87499b6b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/net_timestamping.c
@@ -0,0 +1,239 @@
+#include <linux/net_tstamp.h>
+#include <sys/time.h>
+#include <linux/errqueue.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "net_timestamping.skel.h"
+
+#define CG_NAME "/net-timestamping-test"
+#define NSEC_PER_SEC    1000000000LL
+
+static const char addr4_str[] = "127.0.0.1";
+static const char addr6_str[] = "::1";
+static struct net_timestamping *skel;
+static const int cfg_payload_len = 30;
+static struct timespec usr_ts;
+static u64 delay_tolerance_nsec = 10000000000; /* 10 seconds */
+int SK_TS_SCHED;
+int SK_TS_TXSW;
+int SK_TS_ACK;
+
+static int64_t timespec_to_ns64(struct timespec *ts)
+{
+	return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec;
+}
+
+static void validate_key(int tskey, int tstype)
+{
+	static int expected_tskey = -1;
+
+	if (tstype == SCM_TSTAMP_SCHED)
+		expected_tskey = cfg_payload_len - 1;
+
+	ASSERT_EQ(expected_tskey, tskey, "tskey mismatch");
+
+	expected_tskey = tskey;
+}
+
+static void validate_timestamp(struct timespec *cur, struct timespec *prev)
+{
+	int64_t cur_ns, prev_ns;
+
+	cur_ns = timespec_to_ns64(cur);
+	prev_ns = timespec_to_ns64(prev);
+
+	ASSERT_LT(cur_ns - prev_ns, delay_tolerance_nsec, "latency");
+}
+
+static void test_socket_timestamp(struct scm_timestamping *tss, int tstype,
+				  int tskey)
+{
+	static struct timespec prev_ts;
+
+	validate_key(tskey, tstype);
+
+	switch (tstype) {
+	case SCM_TSTAMP_SCHED:
+		validate_timestamp(&tss->ts[0], &usr_ts);
+		SK_TS_SCHED += 1;
+		break;
+	case SCM_TSTAMP_SND:
+		validate_timestamp(&tss->ts[0], &prev_ts);
+		SK_TS_TXSW += 1;
+		break;
+	case SCM_TSTAMP_ACK:
+		validate_timestamp(&tss->ts[0], &prev_ts);
+		SK_TS_ACK += 1;
+		break;
+	}
+
+	prev_ts = tss->ts[0];
+}
+
+static void test_recv_errmsg_cmsg(struct msghdr *msg)
+{
+	struct sock_extended_err *serr = NULL;
+	struct scm_timestamping *tss = NULL;
+	struct cmsghdr *cm;
+
+	for (cm = CMSG_FIRSTHDR(msg);
+	     cm && cm->cmsg_len;
+	     cm = CMSG_NXTHDR(msg, cm)) {
+		if (cm->cmsg_level == SOL_SOCKET &&
+		    cm->cmsg_type == SCM_TIMESTAMPING) {
+			tss = (void *)CMSG_DATA(cm);
+		} else if ((cm->cmsg_level == SOL_IP &&
+			    cm->cmsg_type == IP_RECVERR) ||
+			   (cm->cmsg_level == SOL_IPV6 &&
+			    cm->cmsg_type == IPV6_RECVERR) ||
+			   (cm->cmsg_level == SOL_PACKET &&
+			    cm->cmsg_type == PACKET_TX_TIMESTAMP)) {
+			serr = (void *)CMSG_DATA(cm);
+			ASSERT_EQ(serr->ee_origin, SO_EE_ORIGIN_TIMESTAMPING,
+				  "cmsg type");
+		}
+
+		if (serr && tss)
+			test_socket_timestamp(tss, serr->ee_info,
+					      serr->ee_data);
+	}
+}
+
+static bool socket_recv_errmsg(int fd)
+{
+	static char ctrl[1024 /* overprovision*/];
+	char data[cfg_payload_len];
+	static struct msghdr msg;
+	struct iovec entry;
+	int n = 0;
+
+	memset(&msg, 0, sizeof(msg));
+	memset(&entry, 0, sizeof(entry));
+	memset(ctrl, 0, sizeof(ctrl));
+
+	entry.iov_base = data;
+	entry.iov_len = cfg_payload_len;
+	msg.msg_iov = &entry;
+	msg.msg_iovlen = 1;
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_control = ctrl;
+	msg.msg_controllen = sizeof(ctrl);
+
+	n = recvmsg(fd, &msg, MSG_ERRQUEUE);
+	if (n == -1)
+		ASSERT_EQ(errno, EAGAIN, "recvmsg MSG_ERRQUEUE");
+
+	if (n >= 0)
+		test_recv_errmsg_cmsg(&msg);
+
+	return n == -1;
+}
+
+static void test_socket_timestamping(int fd)
+{
+	while (!socket_recv_errmsg(fd));
+
+	ASSERT_EQ(SK_TS_SCHED, 1, "SCM_TSTAMP_SCHED");
+	ASSERT_EQ(SK_TS_TXSW, 1, "SCM_TSTAMP_SND");
+	ASSERT_EQ(SK_TS_ACK, 1, "SCM_TSTAMP_ACK");
+
+	SK_TS_SCHED = 0;
+	SK_TS_TXSW = 0;
+	SK_TS_ACK = 0;
+}
+
+static void test_tcp(int family, bool enable_socket_timestamping)
+{
+	struct net_timestamping__bss *bss;
+	char buf[cfg_payload_len];
+	int sfd = -1, cfd = -1;
+	unsigned int sock_opt;
+	struct netns_obj *ns;
+	int cg_fd;
+	int ret;
+
+	cg_fd = test__join_cgroup(CG_NAME);
+	if (!ASSERT_OK_FD(cg_fd, "join cgroup"))
+		return;
+
+	ns = netns_new("net_timestamping_ns", true);
+	if (!ASSERT_OK_PTR(ns, "create ns"))
+		goto out;
+
+	skel = net_timestamping__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open and load skel"))
+		goto out;
+
+	if (!ASSERT_OK(net_timestamping__attach(skel), "attach skel"))
+		goto out;
+
+	skel->links.skops_sockopt =
+		bpf_program__attach_cgroup(skel->progs.skops_sockopt, cg_fd);
+	if (!ASSERT_OK_PTR(skel->links.skops_sockopt, "attach cgroup"))
+		goto out;
+
+	bss = skel->bss;
+	memset(bss, 0, sizeof(*bss));
+
+	skel->bss->monitored_pid = getpid();
+
+	sfd = start_server(family, SOCK_STREAM,
+			   family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+	if (!ASSERT_OK_FD(sfd, "start_server"))
+		goto out;
+
+	cfd = connect_to_fd(sfd, 0);
+	if (!ASSERT_OK_FD(cfd, "connect_to_fd_server"))
+		goto out;
+
+	if (enable_socket_timestamping) {
+		sock_opt = SOF_TIMESTAMPING_SOFTWARE |
+			   SOF_TIMESTAMPING_OPT_ID |
+			   SOF_TIMESTAMPING_TX_SCHED |
+			   SOF_TIMESTAMPING_TX_SOFTWARE |
+			   SOF_TIMESTAMPING_TX_ACK;
+		ret = setsockopt(cfd, SOL_SOCKET, SO_TIMESTAMPING,
+				 (char *) &sock_opt, sizeof(sock_opt));
+		if (!ASSERT_OK(ret, "setsockopt SO_TIMESTAMPING"))
+			goto out;
+
+		ret = clock_gettime(CLOCK_REALTIME, &usr_ts);
+		if (!ASSERT_OK(ret, "get user time"))
+			goto out;
+	}
+
+	ret = write(cfd, buf, sizeof(buf));
+	if (!ASSERT_EQ(ret, sizeof(buf), "send to server"))
+		goto out;
+
+	if (enable_socket_timestamping)
+		test_socket_timestamping(cfd);
+
+	ASSERT_EQ(bss->nr_active, 1, "nr_active");
+	ASSERT_EQ(bss->nr_snd, 2, "nr_snd");
+	ASSERT_EQ(bss->nr_sched, 1, "nr_sched");
+	ASSERT_EQ(bss->nr_txsw, 1, "nr_txsw");
+	ASSERT_EQ(bss->nr_ack, 1, "nr_ack");
+
+out:
+	if (sfd >= 0)
+		close(sfd);
+	if (cfd >= 0)
+		close(cfd);
+	net_timestamping__destroy(skel);
+	netns_free(ns);
+	close(cg_fd);
+}
+
+void test_net_timestamping(void)
+{
+	if (test__start_subtest("INET4: bpf timestamping"))
+		test_tcp(AF_INET, false);
+	if (test__start_subtest("INET4: bpf and socket timestamping"))
+		test_tcp(AF_INET, true);
+	if (test__start_subtest("INET6: bpf timestamping"))
+		test_tcp(AF_INET6, false);
+	if (test__start_subtest("INET6: bpf and socket timestamping"))
+		test_tcp(AF_INET6, true);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
index ac3c3c097c0e..e00cd34586dd 100644
--- a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
@@ -33,20 +33,25 @@ void test_netns_cookie(void)
 
 	skel->links.get_netns_cookie_sockops = bpf_program__attach_cgroup(
 		skel->progs.get_netns_cookie_sockops, cgroup_fd);
-	if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach"))
+	if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach_sockops"))
 		goto done;
 
 	verdict = bpf_program__fd(skel->progs.get_netns_cookie_sk_msg);
 	map = bpf_map__fd(skel->maps.sock_map);
 	err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
-	if (!ASSERT_OK(err, "prog_attach"))
+	if (!ASSERT_OK(err, "prog_attach_sk_msg"))
 		goto done;
 
 	tc_fd = bpf_program__fd(skel->progs.get_netns_cookie_tcx);
 	err = bpf_prog_attach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &opta);
-	if (!ASSERT_OK(err, "prog_attach"))
+	if (!ASSERT_OK(err, "prog_attach_tcx"))
 		goto done;
 
+	skel->links.get_netns_cookie_cgroup_skb = bpf_program__attach_cgroup(
+		skel->progs.get_netns_cookie_cgroup_skb, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_cgroup_skb, "prog_attach_cgroup_skb"))
+		goto cleanup_tc;
+
 	server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
 	if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno))
 		goto cleanup_tc;
@@ -69,16 +74,18 @@ void test_netns_cookie(void)
 	if (!ASSERT_OK(err, "getsockopt"))
 		goto cleanup_tc;
 
-	ASSERT_EQ(val, cookie_expected_value, "cookie_value");
+	ASSERT_EQ(val, cookie_expected_value, "cookie_value_sockops");
 
 	err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies),
 				  &client_fd, &val);
 	if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)"))
 		goto cleanup_tc;
 
-	ASSERT_EQ(val, cookie_expected_value, "cookie_value");
-	ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value");
-	ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value");
+	ASSERT_EQ(val, cookie_expected_value, "cookie_value_sk_msg");
+	ASSERT_EQ(skel->bss->tcx_init_netns_cookie, cookie_expected_value, "cookie_value_init_tcx");
+	ASSERT_EQ(skel->bss->tcx_netns_cookie, cookie_expected_value, "cookie_value_tcx");
+	ASSERT_EQ(skel->bss->cgroup_skb_init_netns_cookie, cookie_expected_value, "cookie_value_init_cgroup_skb");
+	ASSERT_EQ(skel->bss->cgroup_skb_netns_cookie, cookie_expected_value, "cookie_value_cgroup_skb");
 
 cleanup_tc:
 	err = bpf_prog_detach_opts(tc_fd, loopback, BPF_TCX_INGRESS, &optd);
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
index 761ce24bce38..99c953f2be21 100644
--- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -200,41 +200,28 @@ static void test_ns_current_pid_tgid_new_ns(int (*fn)(void *), void *arg)
 		return;
 }
 
-static void test_in_netns(int (*fn)(void *), void *arg)
-{
-	struct nstoken *nstoken = NULL;
-
-	SYS(cleanup, "ip netns add ns_current_pid_tgid");
-	SYS(cleanup, "ip -net ns_current_pid_tgid link set dev lo up");
-
-	nstoken = open_netns("ns_current_pid_tgid");
-	if (!ASSERT_OK_PTR(nstoken, "open_netns"))
-		goto cleanup;
-
-	test_ns_current_pid_tgid_new_ns(fn, arg);
-
-cleanup:
-	if (nstoken)
-		close_netns(nstoken);
-	SYS_NOFAIL("ip netns del ns_current_pid_tgid");
-}
-
 /* TODO: use a different tracepoint */
-void serial_test_ns_current_pid_tgid(void)
+void serial_test_current_pid_tgid(void)
 {
 	if (test__start_subtest("root_ns_tp"))
 		test_current_pid_tgid_tp(NULL);
 	if (test__start_subtest("new_ns_tp"))
 		test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_tp, NULL);
-	if (test__start_subtest("new_ns_cgrp")) {
-		int cgroup_fd = -1;
-
-		cgroup_fd = test__join_cgroup("/sock_addr");
-		if (ASSERT_GE(cgroup_fd, 0, "join_cgroup")) {
-			test_in_netns(test_current_pid_tgid_cgrp, &cgroup_fd);
-			close(cgroup_fd);
-		}
+}
+
+void test_ns_current_pid_tgid_cgrp(void)
+{
+	int cgroup_fd = test__join_cgroup("/sock_addr");
+
+	if (ASSERT_OK_FD(cgroup_fd, "join_cgroup")) {
+		test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_cgrp, &cgroup_fd);
+		close(cgroup_fd);
 	}
-	if (test__start_subtest("new_ns_sk_msg"))
-		test_in_netns(test_current_pid_tgid_sk_msg, NULL);
 }
+
+void test_ns_current_pid_tgid_sk_msg(void)
+{
+	test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_sk_msg, NULL);
+}
+
+
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
index bc24f83339d6..0a7ef770c487 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -15,6 +15,10 @@ static void check_good_sample(struct test_perf_branches *skel)
 	int pbe_size = sizeof(struct perf_branch_entry);
 	int duration = 0;
 
+	if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
+		  "checked sample validity before prog run"))
+		return;
+
 	if (CHECK(!skel->bss->valid, "output not valid",
 		 "no valid sample from prog"))
 		return;
@@ -45,6 +49,10 @@ static void check_bad_sample(struct test_perf_branches *skel)
 	int written_stack = skel->bss->written_stack_out;
 	int duration = 0;
 
+	if (CHECK(!skel->bss->run_cnt, "invalid run_cnt",
+		  "checked sample validity before prog run"))
+		return;
+
 	if (CHECK(!skel->bss->valid, "output not valid",
 		 "no valid sample from prog"))
 		return;
@@ -83,8 +91,12 @@ static void test_perf_branches_common(int perf_fd,
 	err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
 	if (CHECK(err, "set_affinity", "cpu #0, err %d\n", err))
 		goto out_destroy;
-	/* spin the loop for a while (random high number) */
-	for (i = 0; i < 1000000; ++i)
+
+	/* Spin the loop for a while by using a high iteration count, and by
+	 * checking whether the specific run count marker has been explicitly
+	 * incremented at least once by the backing perf_event BPF program.
+	 */
+	for (i = 0; i < 100000000 && !*(volatile int *)&skel->bss->run_cnt; ++i)
 		++j;
 
 	test_perf_branches__detach(skel);
@@ -116,11 +128,11 @@ static void test_perf_branches_hw(void)
 	pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
 
 	/*
-	 * Some setups don't support branch records (virtual machines, !x86),
-	 * so skip test in this case.
+	 * Some setups don't support LBR (virtual machines, !x86, AMD Milan Zen
+	 * 3 which only supports BRS), so skip test in this case.
 	 */
 	if (pfd < 0) {
-		if (errno == ENOENT || errno == EOPNOTSUPP) {
+		if (errno == ENOENT || errno == EOPNOTSUPP || errno == EINVAL) {
 			printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
 			       __func__);
 			test__skip();
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c
new file mode 100644
index 000000000000..9ae49b587f3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <test_progs.h>
+
+
+#include "test_pinning_devmap.skel.h"
+
+void test_pinning_devmap_reuse(void)
+{
+	const char *pinpath1 = "/sys/fs/bpf/pinmap1";
+	const char *pinpath2 = "/sys/fs/bpf/pinmap2";
+	struct test_pinning_devmap *skel1 = NULL, *skel2 = NULL;
+	int err;
+	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+
+	/* load the object a first time */
+	skel1 = test_pinning_devmap__open_and_load();
+	if (!ASSERT_OK_PTR(skel1, "skel_load1"))
+		goto out;
+
+	/* load the object a second time, re-using the pinned map */
+	skel2 = test_pinning_devmap__open_and_load();
+	if (!ASSERT_OK_PTR(skel2, "skel_load2"))
+		goto out;
+
+	/* we can close the reference safely without
+	 * the map's refcount falling to 0
+	 */
+	test_pinning_devmap__destroy(skel1);
+	skel1 = NULL;
+
+	/* now, swap the pins */
+	err = renameat2(0, pinpath1, 0, pinpath2, RENAME_EXCHANGE);
+	if (!ASSERT_OK(err, "swap pins"))
+		goto out;
+
+	/* load the object again, this time the re-use should fail */
+	skel1 = test_pinning_devmap__open_and_load();
+	if (!ASSERT_ERR_PTR(skel1, "skel_load3"))
+		goto out;
+
+out:
+	unlink(pinpath1);
+	unlink(pinpath2);
+	test_pinning_devmap__destroy(skel1);
+	test_pinning_devmap__destroy(skel2);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_htab.c b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c
new file mode 100644
index 000000000000..16bd74be3dbe
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pinning_htab.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_pinning_htab.skel.h"
+
+static void unpin_map(const char *map_name, const char *pin_path)
+{
+	struct test_pinning_htab *skel;
+	struct bpf_map *map;
+	int err;
+
+	skel = test_pinning_htab__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+		return;
+
+	map = bpf_object__find_map_by_name(skel->obj, map_name);
+	if (!ASSERT_OK_PTR(map, "bpf_object__find_map_by_name"))
+		goto out;
+
+	err = bpf_map__pin(map, pin_path);
+	if (!ASSERT_OK(err, "bpf_map__pin"))
+		goto out;
+
+	err = bpf_map__unpin(map, pin_path);
+	ASSERT_OK(err, "bpf_map__unpin");
+out:
+	test_pinning_htab__destroy(skel);
+}
+
+void test_pinning_htab(void)
+{
+	if (test__start_subtest("timer_prealloc"))
+		unpin_map("timer_prealloc", "/sys/fs/bpf/timer_prealloc");
+	if (test__start_subtest("timer_no_prealloc"))
+		unpin_map("timer_no_prealloc", "/sys/fs/bpf/timer_no_prealloc");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/prepare.c b/tools/testing/selftests/bpf/prog_tests/prepare.c
new file mode 100644
index 000000000000..fb5cdad97116
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prepare.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "prepare.skel.h"
+
+static bool check_prepared(struct bpf_object *obj)
+{
+	bool is_prepared = true;
+	const struct bpf_map *map;
+
+	bpf_object__for_each_map(map, obj) {
+		if (bpf_map__fd(map) < 0)
+			is_prepared = false;
+	}
+
+	return is_prepared;
+}
+
+static void test_prepare_no_load(void)
+{
+	struct prepare *skel;
+	int err;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		    .data_in = &pkt_v4,
+		    .data_size_in = sizeof(pkt_v4),
+	);
+
+	skel = prepare__open();
+	if (!ASSERT_OK_PTR(skel, "prepare__open"))
+		return;
+
+	if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared"))
+		goto cleanup;
+
+	err = bpf_object__prepare(skel->obj);
+
+	if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared"))
+		goto cleanup;
+
+	if (!ASSERT_OK(err, "bpf_object__prepare"))
+		goto cleanup;
+
+cleanup:
+	prepare__destroy(skel);
+}
+
+static void test_prepare_load(void)
+{
+	struct prepare *skel;
+	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		    .data_in = &pkt_v4,
+		    .data_size_in = sizeof(pkt_v4),
+	);
+
+	skel = prepare__open();
+	if (!ASSERT_OK_PTR(skel, "prepare__open"))
+		return;
+
+	if (!ASSERT_FALSE(check_prepared(skel->obj), "not check_prepared"))
+		goto cleanup;
+
+	err = bpf_object__prepare(skel->obj);
+	if (!ASSERT_OK(err, "bpf_object__prepare"))
+		goto cleanup;
+
+	err = prepare__load(skel);
+	if (!ASSERT_OK(err, "prepare__load"))
+		goto cleanup;
+
+	if (!ASSERT_TRUE(check_prepared(skel->obj), "check_prepared"))
+		goto cleanup;
+
+	prog_fd = bpf_program__fd(skel->progs.program);
+	if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
+		goto cleanup;
+
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	if (!ASSERT_OK(err, "test_run_opts err"))
+		goto cleanup;
+
+	if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+		goto cleanup;
+
+	ASSERT_EQ(skel->bss->err, 0, "err");
+
+cleanup:
+	prepare__destroy(skel);
+}
+
+void test_prepare(void)
+{
+	if (test__start_subtest("prepare_load"))
+		test_prepare_load();
+	if (test__start_subtest("prepare_no_load"))
+		test_prepare_no_load();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c b/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c
index 509883e6823a..5d3c00a08a88 100644
--- a/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c
+++ b/tools/testing/selftests/bpf/prog_tests/pro_epilogue.c
@@ -6,6 +6,7 @@
 #include "epilogue_tailcall.skel.h"
 #include "pro_epilogue_goto_start.skel.h"
 #include "epilogue_exit.skel.h"
+#include "pro_epilogue_with_kfunc.skel.h"
 
 struct st_ops_args {
 	__u64 a;
@@ -55,6 +56,7 @@ void test_pro_epilogue(void)
 	RUN_TESTS(pro_epilogue);
 	RUN_TESTS(pro_epilogue_goto_start);
 	RUN_TESTS(epilogue_exit);
+	RUN_TESTS(pro_epilogue_with_kfunc);
 	if (test__start_subtest("tailcall"))
 		test_tailcall();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
index 14f2796076e0..7607cfc2408c 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
@@ -54,3 +54,128 @@ void test_prog_tests_framework(void)
 		return;
 	clear_test_state(state);
 }
+
+static void dummy_emit(const char *buf, bool force) {}
+
+void test_prog_tests_framework_expected_msgs(void)
+{
+	struct expected_msgs msgs;
+	int i, j, error_cnt;
+	const struct {
+		const char *name;
+		const char *log;
+		const char *expected;
+		struct expect_msg *pats;
+	} cases[] = {
+		{
+			.name = "simple-ok",
+			.log = "aaabbbccc",
+			.pats = (struct expect_msg[]) {
+				{ .substr = "aaa" },
+				{ .substr = "ccc" },
+				{}
+			}
+		},
+		{
+			.name = "simple-fail",
+			.log = "aaabbbddd",
+			.expected = "MATCHED    SUBSTR: 'aaa'\n"
+				    "EXPECTED   SUBSTR: 'ccc'\n",
+			.pats = (struct expect_msg[]) {
+				{ .substr = "aaa" },
+				{ .substr = "ccc" },
+				{}
+			}
+		},
+		{
+			.name = "negative-ok-mid",
+			.log = "aaabbbccc",
+			.pats = (struct expect_msg[]) {
+				{ .substr = "aaa" },
+				{ .substr = "foo", .negative = true },
+				{ .substr = "bar", .negative = true },
+				{ .substr = "ccc" },
+				{}
+			}
+		},
+		{
+			.name = "negative-ok-tail",
+			.log = "aaabbbccc",
+			.pats = (struct expect_msg[]) {
+				{ .substr = "aaa" },
+				{ .substr = "foo", .negative = true },
+				{}
+			}
+		},
+		{
+			.name = "negative-ok-head",
+			.log = "aaabbbccc",
+			.pats = (struct expect_msg[]) {
+				{ .substr = "foo", .negative = true },
+				{ .substr = "ccc" },
+				{}
+			}
+		},
+		{
+			.name = "negative-fail-head",
+			.log = "aaabbbccc",
+			.expected = "UNEXPECTED SUBSTR: 'aaa'\n",
+			.pats = (struct expect_msg[]) {
+				{ .substr = "aaa", .negative = true },
+				{ .substr = "bbb" },
+				{}
+			}
+		},
+		{
+			.name = "negative-fail-tail",
+			.log = "aaabbbccc",
+			.expected = "UNEXPECTED SUBSTR: 'ccc'\n",
+			.pats = (struct expect_msg[]) {
+				{ .substr = "bbb" },
+				{ .substr = "ccc", .negative = true },
+				{}
+			}
+		},
+		{
+			.name = "negative-fail-mid-1",
+			.log = "aaabbbccc",
+			.expected = "UNEXPECTED SUBSTR: 'bbb'\n",
+			.pats = (struct expect_msg[]) {
+				{ .substr = "aaa" },
+				{ .substr = "bbb", .negative = true },
+				{ .substr = "ccc" },
+				{}
+			}
+		},
+		{
+			.name = "negative-fail-mid-2",
+			.log = "aaabbb222ccc",
+			.expected = "UNEXPECTED SUBSTR: '222'\n",
+			.pats = (struct expect_msg[]) {
+				{ .substr = "aaa" },
+				{ .substr = "222", .negative = true },
+				{ .substr = "bbb", .negative = true },
+				{ .substr = "ccc" },
+				{}
+			}
+		}
+	};
+
+	for (i = 0; i < ARRAY_SIZE(cases); i++) {
+		if (test__start_subtest(cases[i].name)) {
+			error_cnt = env.subtest_state->error_cnt;
+			msgs.patterns = cases[i].pats;
+			msgs.cnt = 0;
+			for (j = 0; cases[i].pats[j].substr; j++)
+				msgs.cnt++;
+			validate_msgs(cases[i].log, &msgs, dummy_emit);
+			fflush(stderr);
+			env.subtest_state->error_cnt = error_cnt;
+			if (cases[i].expected)
+				ASSERT_HAS_SUBSTR(env.subtest_state->log_buf, cases[i].expected, "expected output");
+			else
+				ASSERT_STREQ(env.subtest_state->log_buf, "", "expected no output");
+			test__end_subtest();
+		}
+	}
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c
index 6fa19449297e..43676a9922dc 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_null.c
@@ -3,11 +3,14 @@
 
 #include <test_progs.h>
 #include "raw_tp_null.skel.h"
+#include "raw_tp_null_fail.skel.h"
 
 void test_raw_tp_null(void)
 {
 	struct raw_tp_null *skel;
 
+	RUN_TESTS(raw_tp_null_fail);
+
 	skel = raw_tp_null__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "raw_tp_null__open_and_load"))
 		return;
diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c
index 9818f06c97c5..d8f3d7a45fe9 100644
--- a/tools/testing/selftests/bpf/prog_tests/rbtree.c
+++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c
@@ -8,6 +8,7 @@
 #include "rbtree_fail.skel.h"
 #include "rbtree_btf_fail__wrong_node_type.skel.h"
 #include "rbtree_btf_fail__add_wrong_type.skel.h"
+#include "rbtree_search.skel.h"
 
 static void test_rbtree_add_nodes(void)
 {
@@ -187,3 +188,8 @@ void test_rbtree_fail(void)
 {
 	RUN_TESTS(rbtree_fail);
 }
+
+void test_rbtree_search(void)
+{
+	RUN_TESTS(rbtree_search);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
index ebe0c12b5536..246eb259c08a 100644
--- a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
@@ -28,6 +28,7 @@ static void test_success(void)
 	bpf_program__set_autoload(skel->progs.two_regions, true);
 	bpf_program__set_autoload(skel->progs.non_sleepable_1, true);
 	bpf_program__set_autoload(skel->progs.non_sleepable_2, true);
+	bpf_program__set_autoload(skel->progs.nested_rcu_region, true);
 	bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true);
 	bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog, true);
 	bpf_program__set_autoload(skel->progs.rcu_read_lock_global_subprog, true);
@@ -78,9 +79,13 @@ static const char * const inproper_region_tests[] = {
 	"non_sleepable_rcu_mismatch",
 	"inproper_sleepable_helper",
 	"inproper_sleepable_kfunc",
-	"nested_rcu_region",
+	"nested_rcu_region_unbalanced_1",
+	"nested_rcu_region_unbalanced_2",
 	"rcu_read_lock_global_subprog_lock",
 	"rcu_read_lock_global_subprog_unlock",
+	"rcu_read_lock_sleepable_helper_global_subprog",
+	"rcu_read_lock_sleepable_kfunc_global_subprog",
+	"rcu_read_lock_sleepable_global_subprog_indirect",
 };
 
 static void test_inproper_region(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
index c7b9ba8b1d06..a8d1eaa67020 100644
--- a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
@@ -24,6 +24,7 @@ struct read_ret_desc {
 	{ .name = "copy_from_user", .ret = -EFAULT },
 	{ .name = "copy_from_user_task", .ret = -EFAULT },
 	{ .name = "copy_from_user_str", .ret = -EFAULT },
+	{ .name = "copy_from_user_task_str", .ret = -EFAULT },
 };
 
 void test_read_vsyscall(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
index 8100509e561b..0ffa01d54ce2 100644
--- a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
@@ -149,3 +149,70 @@ close_prog:
 	fentry_recursive_target__destroy(target_skel);
 	fentry_recursive__destroy(tracing_skel);
 }
+
+static void *fentry_target_test_run(void *arg)
+{
+	for (;;) {
+		int prog_fd = __atomic_load_n((int *)arg, __ATOMIC_SEQ_CST);
+		LIBBPF_OPTS(bpf_test_run_opts, topts);
+		int err;
+
+		if (prog_fd == -1)
+			break;
+		err = bpf_prog_test_run_opts(prog_fd, &topts);
+		if (!ASSERT_OK(err, "fentry_target test_run"))
+			break;
+	}
+
+	return NULL;
+}
+
+void test_fentry_attach_stress(void)
+{
+	struct fentry_recursive_target *target_skel = NULL;
+	struct fentry_recursive *tracing_skel = NULL;
+	struct bpf_program *prog;
+	int err, i, tgt_prog_fd;
+	pthread_t thread;
+
+	target_skel = fentry_recursive_target__open_and_load();
+	if (!ASSERT_OK_PTR(target_skel,
+			   "fentry_recursive_target__open_and_load"))
+		goto close_prog;
+	tgt_prog_fd = bpf_program__fd(target_skel->progs.fentry_target);
+	err = pthread_create(&thread, NULL,
+			     fentry_target_test_run, &tgt_prog_fd);
+	if (!ASSERT_OK(err, "bpf_program__set_attach_target"))
+		goto close_prog;
+
+	for (i = 0; i < 1000; i++) {
+		tracing_skel = fentry_recursive__open();
+		if (!ASSERT_OK_PTR(tracing_skel, "fentry_recursive__open"))
+			goto stop_thread;
+
+		prog = tracing_skel->progs.recursive_attach;
+		err = bpf_program__set_attach_target(prog, tgt_prog_fd,
+						     "fentry_target");
+		if (!ASSERT_OK(err, "bpf_program__set_attach_target"))
+			goto stop_thread;
+
+		err = fentry_recursive__load(tracing_skel);
+		if (!ASSERT_OK(err, "fentry_recursive__load"))
+			goto stop_thread;
+
+		err = fentry_recursive__attach(tracing_skel);
+		if (!ASSERT_OK(err, "fentry_recursive__attach"))
+			goto stop_thread;
+
+		fentry_recursive__destroy(tracing_skel);
+		tracing_skel = NULL;
+	}
+
+stop_thread:
+	__atomic_store_n(&tgt_prog_fd, -1, __ATOMIC_SEQ_CST);
+	err = pthread_join(thread, NULL);
+	ASSERT_OK(err, "pthread_join");
+close_prog:
+	fentry_recursive__destroy(tracing_skel);
+	fentry_recursive_target__destroy(target_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
index d6bd5e16e637..d2c0542716a8 100644
--- a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
@@ -44,3 +44,59 @@ void test_refcounted_kptr_wrong_owner(void)
 	ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval");
 	refcounted_kptr__destroy(skel);
 }
+
+void test_percpu_hash_refcounted_kptr_refcount_leak(void)
+{
+	struct refcounted_kptr *skel;
+	int cpu_nr, fd, err, key = 0;
+	struct bpf_map *map;
+	size_t values_sz;
+	u64 *values;
+	LIBBPF_OPTS(bpf_test_run_opts, opts,
+		    .data_in = &pkt_v4,
+		    .data_size_in = sizeof(pkt_v4),
+		    .repeat = 1,
+	);
+
+	cpu_nr = libbpf_num_possible_cpus();
+	if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus"))
+		return;
+
+	values = calloc(cpu_nr, sizeof(u64));
+	if (!ASSERT_OK_PTR(values, "calloc values"))
+		return;
+
+	skel = refcounted_kptr__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) {
+		free(values);
+		return;
+	}
+
+	values_sz = cpu_nr * sizeof(u64);
+	memset(values, 0, values_sz);
+
+	map = skel->maps.percpu_hash;
+	err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
+	if (!ASSERT_OK(err, "bpf_map__update_elem"))
+		goto out;
+
+	fd = bpf_program__fd(skel->progs.percpu_hash_refcount_leak);
+	err = bpf_prog_test_run_opts(fd, &opts);
+	if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+		goto out;
+	if (!ASSERT_EQ(opts.retval, 2, "opts.retval"))
+		goto out;
+
+	err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
+	if (!ASSERT_OK(err, "bpf_map__update_elem"))
+		goto out;
+
+	fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount);
+	err = bpf_prog_test_run_opts(fd, &opts);
+	ASSERT_OK(err, "bpf_prog_test_run_opts");
+	ASSERT_EQ(opts.retval, 1, "opts.retval");
+
+out:
+	refcounted_kptr__destroy(skel);
+	free(values);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
index 39d42271cc46..d93a0c7b1786 100644
--- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -465,6 +465,20 @@ static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t,
 		return range_improve(x_t, x, x_swap);
 	}
 
+	if (!t_is_32(x_t) && !t_is_32(y_t) && x_t != y_t) {
+		if (x_t == S64 && x.a > x.b) {
+			if (x.b < y.a && x.a <= y.b)
+				return range(x_t, x.a, y.b);
+			if (x.a > y.b && x.b >= y.a)
+				return range(x_t, y.a, x.b);
+		} else if (x_t == U64 && y.a > y.b) {
+			if (y.b < x.a && y.a <= x.b)
+				return range(x_t, y.a, x.b);
+			if (y.a > x.b && y.b >= x.a)
+				return range(x_t, x.a, y.b);
+		}
+	}
+
 	/* otherwise, plain range cast and intersection works */
 	return range_improve(x_t, x, y_cast);
 }
@@ -609,7 +623,7 @@ static void range_cond(enum num_t t, struct range x, struct range y,
 			*newx = range(t, x.a, x.b);
 			*newy = range(t, y.a + 1, y.b);
 		} else if (x.a == x.b && x.b == y.b) {
-			/* X is a constant matching rigth side of Y */
+			/* X is a constant matching right side of Y */
 			*newx = range(t, x.a, x.b);
 			*newy = range(t, y.a, y.b - 1);
 		} else if (y.a == y.b && x.a == y.a) {
@@ -617,7 +631,7 @@ static void range_cond(enum num_t t, struct range x, struct range y,
 			*newx = range(t, x.a + 1, x.b);
 			*newy = range(t, y.a, y.b);
 		} else if (y.a == y.b && x.b == y.b) {
-			/* Y is a constant matching rigth side of X */
+			/* Y is a constant matching right side of X */
 			*newx = range(t, x.a, x.b - 1);
 			*newy = range(t, y.a, y.b);
 		} else {
diff --git a/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
new file mode 100644
index 000000000000..f0a8c828f8f1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/res_spin_lock.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024-2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <sys/sysinfo.h>
+
+#include "res_spin_lock.skel.h"
+#include "res_spin_lock_fail.skel.h"
+
+void test_res_spin_lock_failure(void)
+{
+	RUN_TESTS(res_spin_lock_fail);
+}
+
+static volatile int skip;
+
+static void *spin_lock_thread(void *arg)
+{
+	int err, prog_fd = *(u32 *) arg;
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 10000,
+	);
+
+	while (!READ_ONCE(skip)) {
+		err = bpf_prog_test_run_opts(prog_fd, &topts);
+		if (err || topts.retval) {
+			ASSERT_OK(err, "test_run");
+			ASSERT_OK(topts.retval, "test_run retval");
+			break;
+		}
+	}
+	pthread_exit(arg);
+}
+
+void test_res_spin_lock_success(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.repeat = 1,
+	);
+	struct res_spin_lock *skel;
+	pthread_t thread_id[16];
+	int prog_fd, i, err;
+	void *ret;
+
+	if (get_nprocs() < 2) {
+		test__skip();
+		return;
+	}
+
+	skel = res_spin_lock__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "res_spin_lock__open_and_load"))
+		return;
+	/* AA deadlock */
+	prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "error");
+	ASSERT_OK(topts.retval, "retval");
+
+	prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test_held_lock_max);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "error");
+	ASSERT_OK(topts.retval, "retval");
+
+	/* Multi-threaded ABBA deadlock. */
+
+	prog_fd = bpf_program__fd(skel->progs.res_spin_lock_test_AB);
+	for (i = 0; i < 16; i++) {
+		int err;
+
+		err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd);
+		if (!ASSERT_OK(err, "pthread_create"))
+			goto end;
+	}
+
+	topts.retval = 0;
+	topts.repeat = 1000;
+	int fd = bpf_program__fd(skel->progs.res_spin_lock_test_BA);
+	while (!topts.retval && !err && !READ_ONCE(skel->bss->err)) {
+		err = bpf_prog_test_run_opts(fd, &topts);
+	}
+
+	WRITE_ONCE(skip, true);
+
+	for (i = 0; i < 16; i++) {
+		if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join"))
+			goto end;
+		if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd"))
+			goto end;
+	}
+
+	ASSERT_EQ(READ_ONCE(skel->bss->err), -EDEADLK, "timeout err");
+	ASSERT_OK(err, "err");
+	ASSERT_EQ(topts.retval, -EDEADLK, "timeout");
+end:
+	res_spin_lock__destroy(skel);
+	return;
+}
+
+void serial_test_res_spin_lock_stress(void)
+{
+	if (libbpf_num_possible_cpus() < 3) {
+		test__skip();
+		return;
+	}
+
+	ASSERT_OK(load_module("bpf_test_rqspinlock.ko", false), "load module AA");
+	sleep(5);
+	unload_module("bpf_test_rqspinlock", false);
+	/*
+	 * Insert bpf_test_rqspinlock.ko manually with test_mode=[1|2] to test
+	 * other cases (ABBA, ABBCCA).
+	 */
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index da430df45aa4..64520684d2cb 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -17,6 +17,7 @@
 #include "test_ringbuf_n.lskel.h"
 #include "test_ringbuf_map_key.lskel.h"
 #include "test_ringbuf_write.lskel.h"
+#include "test_ringbuf_overwrite.lskel.h"
 
 #define EDONE 7777
 
@@ -97,7 +98,7 @@ static void ringbuf_write_subtest(void)
 	if (!ASSERT_OK_PTR(skel, "skel_open"))
 		return;
 
-	skel->maps.ringbuf.max_entries = 0x4000;
+	skel->maps.ringbuf.max_entries = 0x40000;
 
 	err = test_ringbuf_write_lskel__load(skel);
 	if (!ASSERT_OK(err, "skel_load"))
@@ -108,7 +109,7 @@ static void ringbuf_write_subtest(void)
 	mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
 	if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos"))
 		goto cleanup;
-	*mmap_ptr = 0x3000;
+	*mmap_ptr = 0x30000;
 	ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
 
 	skel->bss->pid = getpid();
@@ -497,6 +498,68 @@ cleanup:
 	test_ringbuf_map_key_lskel__destroy(skel_map_key);
 }
 
+static void ringbuf_overwrite_mode_subtest(void)
+{
+	unsigned long size, len1, len2, len3, len4, len5;
+	unsigned long expect_avail_data, expect_prod_pos, expect_over_pos;
+	struct test_ringbuf_overwrite_lskel *skel;
+	int page_size = getpagesize();
+	int err;
+
+	skel = test_ringbuf_overwrite_lskel__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	size = page_size;
+	len1 = page_size / 2;
+	len2 = page_size / 4;
+	len3 = size - len1 - len2 - BPF_RINGBUF_HDR_SZ * 3;
+	len4 = len3 - 8;
+	len5 = len3; /* retry with len3 */
+
+	skel->maps.ringbuf.max_entries = size;
+	skel->rodata->LEN1 = len1;
+	skel->rodata->LEN2 = len2;
+	skel->rodata->LEN3 = len3;
+	skel->rodata->LEN4 = len4;
+	skel->rodata->LEN5 = len5;
+
+	skel->bss->pid = getpid();
+
+	err = test_ringbuf_overwrite_lskel__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	err = test_ringbuf_overwrite_lskel__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto cleanup;
+
+	syscall(__NR_getpgid);
+
+	ASSERT_EQ(skel->bss->reserve1_fail, 0, "reserve 1");
+	ASSERT_EQ(skel->bss->reserve2_fail, 0, "reserve 2");
+	ASSERT_EQ(skel->bss->reserve3_fail, 1, "reserve 3");
+	ASSERT_EQ(skel->bss->reserve4_fail, 0, "reserve 4");
+	ASSERT_EQ(skel->bss->reserve5_fail, 0, "reserve 5");
+
+	ASSERT_EQ(skel->bss->ring_size, size, "check_ring_size");
+
+	expect_avail_data = len2 + len4 + len5 + 3 * BPF_RINGBUF_HDR_SZ;
+	ASSERT_EQ(skel->bss->avail_data, expect_avail_data, "check_avail_size");
+
+	ASSERT_EQ(skel->bss->cons_pos, 0, "check_cons_pos");
+
+	expect_prod_pos = len1 + len2 + len4 + len5 + 4 * BPF_RINGBUF_HDR_SZ;
+	ASSERT_EQ(skel->bss->prod_pos, expect_prod_pos, "check_prod_pos");
+
+	expect_over_pos = len1 + BPF_RINGBUF_HDR_SZ;
+	ASSERT_EQ(skel->bss->over_pos, expect_over_pos, "check_over_pos");
+
+	test_ringbuf_overwrite_lskel__detach(skel);
+cleanup:
+	test_ringbuf_overwrite_lskel__destroy(skel);
+}
+
 void test_ringbuf(void)
 {
 	if (test__start_subtest("ringbuf"))
@@ -507,4 +570,6 @@ void test_ringbuf(void)
 		ringbuf_map_key_subtest();
 	if (test__start_subtest("ringbuf_write"))
 		ringbuf_write_subtest();
+	if (test__start_subtest("ringbuf_overwrite_mode"))
+		ringbuf_overwrite_mode_subtest();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 036d4760d2c1..3dbcc091f16c 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -41,11 +41,7 @@ static struct bpf_object *obj;
 static __u32 index_zero;
 static int epfd;
 
-static union sa46 {
-	struct sockaddr_in6 v6;
-	struct sockaddr_in v4;
-	sa_family_t family;
-} srv_sa;
+static struct sockaddr_storage srv_sa;
 
 #define RET_IF(condition, tag, format...) ({				\
 	if (CHECK_FAIL(condition)) {					\
@@ -135,24 +131,24 @@ static int prepare_bpf_obj(void)
 	return 0;
 }
 
-static void sa46_init_loopback(union sa46 *sa, sa_family_t family)
+static void ss_init_loopback(struct sockaddr_storage *sa, sa_family_t family)
 {
 	memset(sa, 0, sizeof(*sa));
-	sa->family = family;
-	if (sa->family == AF_INET6)
-		sa->v6.sin6_addr = in6addr_loopback;
+	sa->ss_family = family;
+	if (sa->ss_family == AF_INET6)
+		((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_loopback;
 	else
-		sa->v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		((struct sockaddr_in *)sa)->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
 }
 
-static void sa46_init_inany(union sa46 *sa, sa_family_t family)
+static void ss_init_inany(struct sockaddr_storage *sa, sa_family_t family)
 {
 	memset(sa, 0, sizeof(*sa));
-	sa->family = family;
-	if (sa->family == AF_INET6)
-		sa->v6.sin6_addr = in6addr_any;
+	sa->ss_family = family;
+	if (sa->ss_family == AF_INET6)
+		((struct sockaddr_in6 *)sa)->sin6_addr = in6addr_any;
 	else
-		sa->v4.sin_addr.s_addr = INADDR_ANY;
+		((struct sockaddr_in *)sa)->sin_addr.s_addr = INADDR_ANY;
 }
 
 static int read_int_sysctl(const char *sysctl)
@@ -228,7 +224,7 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
 		       int cli_fd)
 {
 	struct data_check expected = {}, result;
-	union sa46 cli_sa;
+	struct sockaddr_storage cli_sa;
 	socklen_t addrlen;
 	int err;
 
@@ -251,26 +247,32 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
 	}
 
 	if (family == AF_INET6) {
+		struct sockaddr_in6 *srv_v6 = (struct sockaddr_in6 *)&srv_sa;
+		struct sockaddr_in6 *cli_v6 = (struct sockaddr_in6 *)&cli_sa;
+
 		expected.eth_protocol = htons(ETH_P_IPV6);
-		expected.bind_inany = !srv_sa.v6.sin6_addr.s6_addr32[3] &&
-			!srv_sa.v6.sin6_addr.s6_addr32[2] &&
-			!srv_sa.v6.sin6_addr.s6_addr32[1] &&
-			!srv_sa.v6.sin6_addr.s6_addr32[0];
+		expected.bind_inany = !srv_v6->sin6_addr.s6_addr32[3] &&
+			!srv_v6->sin6_addr.s6_addr32[2] &&
+			!srv_v6->sin6_addr.s6_addr32[1] &&
+			!srv_v6->sin6_addr.s6_addr32[0];
 
-		memcpy(&expected.skb_addrs[0], cli_sa.v6.sin6_addr.s6_addr32,
-		       sizeof(cli_sa.v6.sin6_addr));
+		memcpy(&expected.skb_addrs[0], cli_v6->sin6_addr.s6_addr32,
+		       sizeof(cli_v6->sin6_addr));
 		memcpy(&expected.skb_addrs[4], &in6addr_loopback,
 		       sizeof(in6addr_loopback));
-		expected.skb_ports[0] = cli_sa.v6.sin6_port;
-		expected.skb_ports[1] = srv_sa.v6.sin6_port;
+		expected.skb_ports[0] = cli_v6->sin6_port;
+		expected.skb_ports[1] = srv_v6->sin6_port;
 	} else {
+		struct sockaddr_in *srv_v4 = (struct sockaddr_in *)&srv_sa;
+		struct sockaddr_in *cli_v4 = (struct sockaddr_in *)&cli_sa;
+
 		expected.eth_protocol = htons(ETH_P_IP);
-		expected.bind_inany = !srv_sa.v4.sin_addr.s_addr;
+		expected.bind_inany = !srv_v4->sin_addr.s_addr;
 
-		expected.skb_addrs[0] = cli_sa.v4.sin_addr.s_addr;
+		expected.skb_addrs[0] = cli_v4->sin_addr.s_addr;
 		expected.skb_addrs[1] = htonl(INADDR_LOOPBACK);
-		expected.skb_ports[0] = cli_sa.v4.sin_port;
-		expected.skb_ports[1] = srv_sa.v4.sin_port;
+		expected.skb_ports[0] = cli_v4->sin_port;
+		expected.skb_ports[1] = srv_v4->sin_port;
 	}
 
 	if (memcmp(&result, &expected, offsetof(struct data_check,
@@ -364,16 +366,15 @@ static void check_results(void)
 static int send_data(int type, sa_family_t family, void *data, size_t len,
 		     enum result expected)
 {
-	union sa46 cli_sa;
+	struct sockaddr_storage cli_sa;
 	int fd, err;
 
 	fd = socket(family, type, 0);
 	RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno);
 
-	sa46_init_loopback(&cli_sa, family);
+	ss_init_loopback(&cli_sa, family);
 	err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa));
 	RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno);
-
 	err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa,
 		     sizeof(srv_sa));
 	RET_ERR(err != len && expected >= PASS,
@@ -589,9 +590,9 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
 	socklen_t addrlen;
 
 	if (inany)
-		sa46_init_inany(&srv_sa, family);
+		ss_init_inany(&srv_sa, family);
 	else
-		sa46_init_loopback(&srv_sa, family);
+		ss_init_loopback(&srv_sa, family);
 	addrlen = sizeof(srv_sa);
 
 	/*
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 1702aa592c2c..7ac4d5a488aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -206,6 +206,11 @@ destroy_skel:
 skel_open_load_failure:
 	close(pipe_c2p[0]);
 	close(pipe_p2c[1]);
+	/*
+	 * Child is either about to exit cleanly or stuck in case of errors.
+	 * Nudge it to exit.
+	 */
+	kill(pid, SIGKILL);
 	wait(NULL);
 }
 
diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
index e12255121c15..e4dac529d424 100644
--- a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
@@ -202,7 +202,7 @@ err_out:
 void test_setget_sockopt(void)
 {
 	cg_fd = test__join_cgroup(CG_NAME);
-	if (cg_fd < 0)
+	if (!ASSERT_OK_FD(cg_fd, "join cgroup"))
 		return;
 
 	if (create_netns())
diff --git a/tools/testing/selftests/bpf/prog_tests/sha256.c b/tools/testing/selftests/bpf/prog_tests/sha256.c
new file mode 100644
index 000000000000..604a0b1423d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sha256.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright 2025 Google LLC */
+
+#include <test_progs.h>
+#include "bpf/libbpf_internal.h"
+
+#define MAX_LEN 4096
+
+/* Test libbpf_sha256() for all lengths from 0 to MAX_LEN inclusively. */
+void test_sha256(void)
+{
+	/*
+	 * The correctness of this value was verified by running this test with
+	 * libbpf_sha256() replaced by OpenSSL's SHA256().
+	 */
+	static const __u8 expected_digest_of_digests[SHA256_DIGEST_LENGTH] = {
+		0x62, 0x30, 0x0e, 0x1d, 0xea, 0x7f, 0xc4, 0x74,
+		0xfd, 0x8e, 0x64, 0x0b, 0xd8, 0x5f, 0xea, 0x04,
+		0xf3, 0xef, 0x77, 0x42, 0xc2, 0x01, 0xb8, 0x90,
+		0x6e, 0x19, 0x91, 0x1b, 0xca, 0xb3, 0x28, 0x42,
+	};
+	__u64 seed = 0;
+	__u8 *data = NULL, *digests = NULL;
+	__u8 digest_of_digests[SHA256_DIGEST_LENGTH];
+	size_t i;
+
+	data = malloc(MAX_LEN);
+	if (!ASSERT_OK_PTR(data, "malloc"))
+		goto out;
+	digests = malloc((MAX_LEN + 1) * SHA256_DIGEST_LENGTH);
+	if (!ASSERT_OK_PTR(digests, "malloc"))
+		goto out;
+
+	/* Generate MAX_LEN bytes of "random" data deterministically. */
+	for (i = 0; i < MAX_LEN; i++) {
+		seed = (seed * 25214903917 + 11) & ((1ULL << 48) - 1);
+		data[i] = (__u8)(seed >> 16);
+	}
+
+	/* Calculate a digest for each length 0 through MAX_LEN inclusively. */
+	for (i = 0; i <= MAX_LEN; i++)
+		libbpf_sha256(data, i, &digests[i * SHA256_DIGEST_LENGTH]);
+
+	/* Calculate and verify the digest of all the digests. */
+	libbpf_sha256(digests, (MAX_LEN + 1) * SHA256_DIGEST_LENGTH,
+		      digest_of_digests);
+	ASSERT_MEMEQ(digest_of_digests, expected_digest_of_digests,
+		     SHA256_DIGEST_LENGTH, "digest_of_digests");
+out:
+	free(data);
+	free(digests);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index 0b9bd1d6f7cc..10a0ab954b8a 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -37,8 +37,10 @@ configure_stack(void)
 	tc = popen("tc -V", "r");
 	if (CHECK_FAIL(!tc))
 		return false;
-	if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc)))
+	if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc))) {
+		pclose(tc);
 		return false;
+	}
 	if (strstr(tc_version, ", libbpf "))
 		prog = "test_sk_assign_libbpf.bpf.o";
 	else
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
new file mode 100644
index 000000000000..e4940583924b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_bypass_prot_mem.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include <test_progs.h>
+#include "sk_bypass_prot_mem.skel.h"
+#include "network_helpers.h"
+
+#define NR_PAGES	32
+#define NR_SOCKETS	2
+#define BUF_TOTAL	(NR_PAGES * 4096 / NR_SOCKETS)
+#define BUF_SINGLE	1024
+#define NR_SEND		(BUF_TOTAL / BUF_SINGLE)
+
+struct test_case {
+	char name[8];
+	int family;
+	int type;
+	int (*create_sockets)(struct test_case *test_case, int sk[], int len);
+	long (*get_memory_allocated)(struct test_case *test_case, struct sk_bypass_prot_mem *skel);
+};
+
+static int tcp_create_sockets(struct test_case *test_case, int sk[], int len)
+{
+	int server, i, err = 0;
+
+	server = start_server(test_case->family, test_case->type, NULL, 0, 0);
+	if (!ASSERT_GE(server, 0, "start_server_str"))
+		return server;
+
+	/* Keep for-loop so we can change NR_SOCKETS easily. */
+	for (i = 0; i < len; i += 2) {
+		sk[i] = connect_to_fd(server, 0);
+		if (sk[i] < 0) {
+			ASSERT_GE(sk[i], 0, "connect_to_fd");
+			err = sk[i];
+			break;
+		}
+
+		sk[i + 1] = accept(server, NULL, NULL);
+		if (sk[i + 1] < 0) {
+			ASSERT_GE(sk[i + 1], 0, "accept");
+			err = sk[i + 1];
+			break;
+		}
+	}
+
+	close(server);
+
+	return err;
+}
+
+static int udp_create_sockets(struct test_case *test_case, int sk[], int len)
+{
+	int i, j, err, rcvbuf = BUF_TOTAL;
+
+	/* Keep for-loop so we can change NR_SOCKETS easily. */
+	for (i = 0; i < len; i += 2) {
+		sk[i] = start_server(test_case->family, test_case->type, NULL, 0, 0);
+		if (sk[i] < 0) {
+			ASSERT_GE(sk[i], 0, "start_server");
+			return sk[i];
+		}
+
+		sk[i + 1] = connect_to_fd(sk[i], 0);
+		if (sk[i + 1] < 0) {
+			ASSERT_GE(sk[i + 1], 0, "connect_to_fd");
+			return sk[i + 1];
+		}
+
+		err = connect_fd_to_fd(sk[i], sk[i + 1], 0);
+		if (err) {
+			ASSERT_EQ(err, 0, "connect_fd_to_fd");
+			return err;
+		}
+
+		for (j = 0; j < 2; j++) {
+			err = setsockopt(sk[i + j], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int));
+			if (err) {
+				ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)");
+				return err;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static long get_memory_allocated(struct test_case *test_case,
+				 bool *activated, long *memory_allocated)
+{
+	int sk;
+
+	*activated = true;
+
+	/* AF_INET and AF_INET6 share the same memory_allocated.
+	 * tcp_init_sock() is called by AF_INET and AF_INET6,
+	 * but udp_lib_init_sock() is inline.
+	 */
+	sk = socket(AF_INET, test_case->type, 0);
+	if (!ASSERT_GE(sk, 0, "get_memory_allocated"))
+		return -1;
+
+	close(sk);
+
+	return *memory_allocated;
+}
+
+static long tcp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel)
+{
+	return get_memory_allocated(test_case,
+				    &skel->bss->tcp_activated,
+				    &skel->bss->tcp_memory_allocated);
+}
+
+static long udp_get_memory_allocated(struct test_case *test_case, struct sk_bypass_prot_mem *skel)
+{
+	return get_memory_allocated(test_case,
+				    &skel->bss->udp_activated,
+				    &skel->bss->udp_memory_allocated);
+}
+
+static int check_bypass(struct test_case *test_case,
+			struct sk_bypass_prot_mem *skel, bool bypass)
+{
+	char buf[BUF_SINGLE] = {};
+	long memory_allocated[2];
+	int sk[NR_SOCKETS];
+	int err, i, j;
+
+	for (i = 0; i < ARRAY_SIZE(sk); i++)
+		sk[i] = -1;
+
+	err = test_case->create_sockets(test_case, sk, ARRAY_SIZE(sk));
+	if (err)
+		goto close;
+
+	memory_allocated[0] = test_case->get_memory_allocated(test_case, skel);
+
+	/* allocate pages >= NR_PAGES */
+	for (i = 0; i < ARRAY_SIZE(sk); i++) {
+		for (j = 0; j < NR_SEND; j++) {
+			int bytes = send(sk[i], buf, sizeof(buf), 0);
+
+			/* Avoid too noisy logs when something failed. */
+			if (bytes != sizeof(buf)) {
+				ASSERT_EQ(bytes, sizeof(buf), "send");
+				if (bytes < 0) {
+					err = bytes;
+					goto drain;
+				}
+			}
+		}
+	}
+
+	memory_allocated[1] = test_case->get_memory_allocated(test_case, skel);
+
+	if (bypass)
+		ASSERT_LE(memory_allocated[1], memory_allocated[0] + 10, "bypass");
+	else
+		ASSERT_GT(memory_allocated[1], memory_allocated[0] + NR_PAGES, "no bypass");
+
+drain:
+	if (test_case->type == SOCK_DGRAM) {
+		/* UDP starts purging sk->sk_receive_queue after one RCU
+		 * grace period, then udp_memory_allocated goes down,
+		 * so drain the queue before close().
+		 */
+		for (i = 0; i < ARRAY_SIZE(sk); i++) {
+			for (j = 0; j < NR_SEND; j++) {
+				int bytes = recv(sk[i], buf, 1, MSG_DONTWAIT | MSG_TRUNC);
+
+				if (bytes == sizeof(buf))
+					continue;
+				if (bytes != -1 || errno != EAGAIN)
+					PRINT_FAIL("bytes: %d, errno: %s\n", bytes, strerror(errno));
+				break;
+			}
+		}
+	}
+
+close:
+	for (i = 0; i < ARRAY_SIZE(sk); i++) {
+		if (sk[i] < 0)
+			break;
+
+		close(sk[i]);
+	}
+
+	return err;
+}
+
+static void run_test(struct test_case *test_case)
+{
+	struct sk_bypass_prot_mem *skel;
+	struct nstoken *nstoken;
+	int cgroup, err;
+
+	skel = sk_bypass_prot_mem__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+
+	skel->bss->nr_cpus = libbpf_num_possible_cpus();
+
+	err = sk_bypass_prot_mem__attach(skel);
+	if (!ASSERT_OK(err, "attach"))
+		goto destroy_skel;
+
+	cgroup = test__join_cgroup("/sk_bypass_prot_mem");
+	if (!ASSERT_GE(cgroup, 0, "join_cgroup"))
+		goto destroy_skel;
+
+	err = make_netns("sk_bypass_prot_mem");
+	if (!ASSERT_EQ(err, 0, "make_netns"))
+		goto close_cgroup;
+
+	nstoken = open_netns("sk_bypass_prot_mem");
+	if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+		goto remove_netns;
+
+	err = check_bypass(test_case, skel, false);
+	if (!ASSERT_EQ(err, 0, "test_bypass(false)"))
+		goto close_netns;
+
+	err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "1");
+	if (!ASSERT_EQ(err, 0, "write_sysctl(1)"))
+		goto close_netns;
+
+	err = check_bypass(test_case, skel, true);
+	if (!ASSERT_EQ(err, 0, "test_bypass(true by sysctl)"))
+		goto close_netns;
+
+	err = write_sysctl("/proc/sys/net/core/bypass_prot_mem", "0");
+	if (!ASSERT_EQ(err, 0, "write_sysctl(0)"))
+		goto close_netns;
+
+	skel->links.sock_create = bpf_program__attach_cgroup(skel->progs.sock_create, cgroup);
+	if (!ASSERT_OK_PTR(skel->links.sock_create, "attach_cgroup(sock_create)"))
+		goto close_netns;
+
+	err = check_bypass(test_case, skel, true);
+	ASSERT_EQ(err, 0, "test_bypass(true by bpf)");
+
+close_netns:
+	close_netns(nstoken);
+remove_netns:
+	remove_netns("sk_bypass_prot_mem");
+close_cgroup:
+	close(cgroup);
+destroy_skel:
+	sk_bypass_prot_mem__destroy(skel);
+}
+
+static struct test_case test_cases[] = {
+	{
+		.name = "TCP  ",
+		.family = AF_INET,
+		.type = SOCK_STREAM,
+		.create_sockets = tcp_create_sockets,
+		.get_memory_allocated = tcp_get_memory_allocated,
+	},
+	{
+		.name = "UDP  ",
+		.family = AF_INET,
+		.type = SOCK_DGRAM,
+		.create_sockets = udp_create_sockets,
+		.get_memory_allocated = udp_get_memory_allocated,
+	},
+	{
+		.name = "TCPv6",
+		.family = AF_INET6,
+		.type = SOCK_STREAM,
+		.create_sockets = tcp_create_sockets,
+		.get_memory_allocated = tcp_get_memory_allocated,
+	},
+	{
+		.name = "UDPv6",
+		.family = AF_INET6,
+		.type = SOCK_DGRAM,
+		.create_sockets = udp_create_sockets,
+		.get_memory_allocated = udp_get_memory_allocated,
+	},
+};
+
+void serial_test_sk_bypass_prot_mem(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+		if (test__start_subtest(test_cases[i].name))
+			run_test(&test_cases[i]);
+	}
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
index 4be6fdb78c6a..594441acb707 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -116,6 +116,8 @@ static void test_snprintf_negative(void)
 	ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7");
 	ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
 	ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
+	ASSERT_ERR(load_single_snprintf("%p%"), "invalid specifier 8");
+	ASSERT_ERR(load_single_snprintf("%s%"), "invalid specifier 9");
 }
 
 void test_snprintf(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
index d56e18b25528..27781df8f2fb 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
@@ -1,20 +1,875 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2024 Meta
 
+#include <poll.h>
 #include <test_progs.h>
 #include "network_helpers.h"
 #include "sock_iter_batch.skel.h"
 
 #define TEST_NS "sock_iter_batch_netns"
+#define TEST_CHILD_NS "sock_iter_batch_child_netns"
 
+static const int init_batch_size = 16;
 static const int nr_soreuse = 4;
 
+struct iter_out {
+	int idx;
+	__u64 cookie;
+} __packed;
+
+struct sock_count {
+	__u64 cookie;
+	int count;
+};
+
+static int insert(__u64 cookie, struct sock_count counts[], int counts_len)
+{
+	int insert = -1;
+	int i = 0;
+
+	for (; i < counts_len; i++) {
+		if (!counts[i].cookie) {
+			insert = i;
+		} else if (counts[i].cookie == cookie) {
+			insert = i;
+			break;
+		}
+	}
+	if (insert < 0)
+		return insert;
+
+	counts[insert].cookie = cookie;
+	counts[insert].count++;
+
+	return counts[insert].count;
+}
+
+static int read_n(int iter_fd, int n, struct sock_count counts[],
+		  int counts_len)
+{
+	struct iter_out out;
+	int nread = 1;
+	int i = 0;
+
+	for (; nread > 0 && (n < 0 || i < n); i++) {
+		nread = read(iter_fd, &out, sizeof(out));
+		if (!nread || !ASSERT_EQ(nread, sizeof(out), "nread"))
+			break;
+		ASSERT_GE(insert(out.cookie, counts, counts_len), 0, "insert");
+	}
+
+	ASSERT_TRUE(n < 0 || i == n, "n < 0 || i == n");
+
+	return i;
+}
+
+static __u64 socket_cookie(int fd)
+{
+	__u64 cookie;
+	socklen_t cookie_len = sizeof(cookie);
+
+	if (!ASSERT_OK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie,
+				  &cookie_len), "getsockopt(SO_COOKIE)"))
+		return 0;
+	return cookie;
+}
+
+static bool was_seen(int fd, struct sock_count counts[], int counts_len)
+{
+	__u64 cookie = socket_cookie(fd);
+	int i = 0;
+
+	for (; cookie && i < counts_len; i++)
+		if (cookie == counts[i].cookie)
+			return true;
+
+	return false;
+}
+
+static int get_seen_socket(int *fds, struct sock_count counts[], int n)
+{
+	int i = 0;
+
+	for (; i < n; i++)
+		if (was_seen(fds[i], counts, n))
+			return i;
+	return -1;
+}
+
+static int get_nth_socket(int *fds, int fds_len, struct bpf_link *link, int n)
+{
+	int i, nread, iter_fd;
+	int nth_sock_idx = -1;
+	struct iter_out out;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+		return -1;
+
+	for (; n >= 0; n--) {
+		nread = read(iter_fd, &out, sizeof(out));
+		if (!nread || !ASSERT_GE(nread, 1, "nread"))
+			goto done;
+	}
+
+	for (i = 0; i < fds_len && nth_sock_idx < 0; i++)
+		if (fds[i] >= 0 && socket_cookie(fds[i]) == out.cookie)
+			nth_sock_idx = i;
+done:
+	close(iter_fd);
+	return nth_sock_idx;
+}
+
+static void destroy(int fd)
+{
+	struct sock_iter_batch *skel = NULL;
+	__u64 cookie = socket_cookie(fd);
+	struct bpf_link *link = NULL;
+	int iter_fd = -1;
+	int nread;
+	__u64 out;
+
+	skel = sock_iter_batch__open();
+	if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
+		goto done;
+
+	skel->rodata->destroy_cookie = cookie;
+
+	if (!ASSERT_OK(sock_iter_batch__load(skel), "sock_iter_batch__load"))
+		goto done;
+
+	link = bpf_program__attach_iter(skel->progs.iter_tcp_destroy, NULL);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+		goto done;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+		goto done;
+
+	/* Delete matching socket. */
+	nread = read(iter_fd, &out, sizeof(out));
+	ASSERT_GE(nread, 0, "nread");
+	if (nread)
+		ASSERT_EQ(out, cookie, "cookie matches");
+done:
+	if (iter_fd >= 0)
+		close(iter_fd);
+	bpf_link__destroy(link);
+	sock_iter_batch__destroy(skel);
+	close(fd);
+}
+
+static int get_seen_count(int fd, struct sock_count counts[], int n)
+{
+	__u64 cookie = socket_cookie(fd);
+	int count = 0;
+	int i = 0;
+
+	for (; cookie && !count && i < n; i++)
+		if (cookie == counts[i].cookie)
+			count = counts[i].count;
+
+	return count;
+}
+
+static void check_n_were_seen_once(int *fds, int fds_len, int n,
+				   struct sock_count counts[], int counts_len)
+{
+	int seen_once = 0;
+	int seen_cnt;
+	int i = 0;
+
+	for (; i < fds_len; i++) {
+		/* Skip any sockets that were closed or that weren't seen
+		 * exactly once.
+		 */
+		if (fds[i] < 0)
+			continue;
+		seen_cnt = get_seen_count(fds[i], counts, counts_len);
+		if (seen_cnt && ASSERT_EQ(seen_cnt, 1, "seen_cnt"))
+			seen_once++;
+	}
+
+	ASSERT_EQ(seen_once, n, "seen_once");
+}
+
+static int accept_from_one(struct pollfd *server_poll_fds,
+			   int server_poll_fds_len)
+{
+	static const int poll_timeout_ms = 5000; /* 5s */
+	int ret;
+	int i;
+
+	ret = poll(server_poll_fds, server_poll_fds_len, poll_timeout_ms);
+	if (!ASSERT_EQ(ret, 1, "poll"))
+		return -1;
+
+	for (i = 0; i < server_poll_fds_len; i++)
+		if (server_poll_fds[i].revents & POLLIN)
+			return accept(server_poll_fds[i].fd, NULL, NULL);
+
+	return -1;
+}
+
+static int *connect_to_server(int family, int sock_type, const char *addr,
+			      __u16 port, int nr_connects, int *server_fds,
+			      int server_fds_len)
+{
+	struct pollfd *server_poll_fds = NULL;
+	int *established_socks = NULL;
+	int i;
+
+	server_poll_fds = calloc(server_fds_len, sizeof(*server_poll_fds));
+	if (!ASSERT_OK_PTR(server_poll_fds, "server_poll_fds"))
+		return NULL;
+
+	for (i = 0; i < server_fds_len; i++) {
+		server_poll_fds[i].fd = server_fds[i];
+		server_poll_fds[i].events = POLLIN;
+	}
+
+	i = 0;
+
+	established_socks = malloc(sizeof(*established_socks) * nr_connects*2);
+	if (!ASSERT_OK_PTR(established_socks, "established_socks"))
+		goto error;
+
+	while (nr_connects--) {
+		established_socks[i] = connect_to_addr_str(family, sock_type,
+							   addr, port, NULL);
+		if (!ASSERT_OK_FD(established_socks[i], "connect_to_addr_str"))
+			goto error;
+		i++;
+		established_socks[i] = accept_from_one(server_poll_fds,
+						       server_fds_len);
+		if (!ASSERT_OK_FD(established_socks[i], "accept_from_one"))
+			goto error;
+		i++;
+	}
+
+	free(server_poll_fds);
+	return established_socks;
+error:
+	free_fds(established_socks, i);
+	free(server_poll_fds);
+	return NULL;
+}
+
+static void remove_seen(int family, int sock_type, const char *addr, __u16 port,
+			int *socks, int socks_len, int *established_socks,
+			int established_socks_len, struct sock_count *counts,
+			int counts_len, struct bpf_link *link, int iter_fd)
+{
+	int close_idx;
+
+	/* Iterate through the first socks_len - 1 sockets. */
+	read_n(iter_fd, socks_len - 1, counts, counts_len);
+
+	/* Make sure we saw socks_len - 1 sockets exactly once. */
+	check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+			       counts_len);
+
+	/* Close a socket we've already seen to remove it from the bucket. */
+	close_idx = get_seen_socket(socks, counts, counts_len);
+	if (!ASSERT_GE(close_idx, 0, "close_idx"))
+		return;
+	close(socks[close_idx]);
+	socks[close_idx] = -1;
+
+	/* Iterate through the rest of the sockets. */
+	read_n(iter_fd, -1, counts, counts_len);
+
+	/* Make sure the last socket wasn't skipped and that there were no
+	 * repeats.
+	 */
+	check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+			       counts_len);
+}
+
+static void remove_seen_established(int family, int sock_type, const char *addr,
+				    __u16 port, int *listen_socks,
+				    int listen_socks_len, int *established_socks,
+				    int established_socks_len,
+				    struct sock_count *counts, int counts_len,
+				    struct bpf_link *link, int iter_fd)
+{
+	int close_idx;
+
+	/* Iterate through all listening sockets. */
+	read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+	/* Make sure we saw all listening sockets exactly once. */
+	check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+			       counts, counts_len);
+
+	/* Leave one established socket. */
+	read_n(iter_fd, established_socks_len - 1, counts, counts_len);
+
+	/* Close a socket we've already seen to remove it from the bucket. */
+	close_idx = get_nth_socket(established_socks, established_socks_len,
+				   link, listen_socks_len + 1);
+	if (!ASSERT_GE(close_idx, 0, "close_idx"))
+		return;
+	destroy(established_socks[close_idx]);
+	established_socks[close_idx] = -1;
+
+	/* Iterate through the rest of the sockets. */
+	read_n(iter_fd, -1, counts, counts_len);
+
+	/* Make sure the last socket wasn't skipped and that there were no
+	 * repeats.
+	 */
+	check_n_were_seen_once(established_socks, established_socks_len,
+			       established_socks_len - 1, counts, counts_len);
+}
+
+static void remove_unseen(int family, int sock_type, const char *addr,
+			  __u16 port, int *socks, int socks_len,
+			  int *established_socks, int established_socks_len,
+			  struct sock_count *counts, int counts_len,
+			  struct bpf_link *link, int iter_fd)
+{
+	int close_idx;
+
+	/* Iterate through the first socket. */
+	read_n(iter_fd, 1, counts, counts_len);
+
+	/* Make sure we saw a socket from fds. */
+	check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
+
+	/* Close what would be the next socket in the bucket to exercise the
+	 * condition where we need to skip past the first cookie we remembered.
+	 */
+	close_idx = get_nth_socket(socks, socks_len, link, 1);
+	if (!ASSERT_GE(close_idx, 0, "close_idx"))
+		return;
+	close(socks[close_idx]);
+	socks[close_idx] = -1;
+
+	/* Iterate through the rest of the sockets. */
+	read_n(iter_fd, -1, counts, counts_len);
+
+	/* Make sure the remaining sockets were seen exactly once and that we
+	 * didn't repeat the socket that was already seen.
+	 */
+	check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+			       counts_len);
+}
+
+static void remove_unseen_established(int family, int sock_type,
+				      const char *addr, __u16 port,
+				      int *listen_socks, int listen_socks_len,
+				      int *established_socks,
+				      int established_socks_len,
+				      struct sock_count *counts, int counts_len,
+				      struct bpf_link *link, int iter_fd)
+{
+	int close_idx;
+
+	/* Iterate through all listening sockets. */
+	read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+	/* Make sure we saw all listening sockets exactly once. */
+	check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+			       counts, counts_len);
+
+	/* Iterate through the first established socket. */
+	read_n(iter_fd, 1, counts, counts_len);
+
+	/* Make sure we saw one established socks. */
+	check_n_were_seen_once(established_socks, established_socks_len, 1,
+			       counts, counts_len);
+
+	/* Close what would be the next socket in the bucket to exercise the
+	 * condition where we need to skip past the first cookie we remembered.
+	 */
+	close_idx = get_nth_socket(established_socks, established_socks_len,
+				   link, listen_socks_len + 1);
+	if (!ASSERT_GE(close_idx, 0, "close_idx"))
+		return;
+
+	destroy(established_socks[close_idx]);
+	established_socks[close_idx] = -1;
+
+	/* Iterate through the rest of the sockets. */
+	read_n(iter_fd, -1, counts, counts_len);
+
+	/* Make sure the remaining sockets were seen exactly once and that we
+	 * didn't repeat the socket that was already seen.
+	 */
+	check_n_were_seen_once(established_socks, established_socks_len,
+			       established_socks_len - 1, counts, counts_len);
+}
+
+static void remove_all(int family, int sock_type, const char *addr,
+		       __u16 port, int *socks, int socks_len,
+		       int *established_socks, int established_socks_len,
+		       struct sock_count *counts, int counts_len,
+		       struct bpf_link *link, int iter_fd)
+{
+	int close_idx, i;
+
+	/* Iterate through the first socket. */
+	read_n(iter_fd, 1, counts, counts_len);
+
+	/* Make sure we saw a socket from fds. */
+	check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
+
+	/* Close all remaining sockets to exhaust the list of saved cookies and
+	 * exit without putting any sockets into the batch on the next read.
+	 */
+	for (i = 0; i < socks_len - 1; i++) {
+		close_idx = get_nth_socket(socks, socks_len, link, 1);
+		if (!ASSERT_GE(close_idx, 0, "close_idx"))
+			return;
+		close(socks[close_idx]);
+		socks[close_idx] = -1;
+	}
+
+	/* Make sure there are no more sockets returned */
+	ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
+}
+
+static void remove_all_established(int family, int sock_type, const char *addr,
+				   __u16 port, int *listen_socks,
+				   int listen_socks_len, int *established_socks,
+				   int established_socks_len,
+				   struct sock_count *counts, int counts_len,
+				   struct bpf_link *link, int iter_fd)
+{
+	int *close_idx = NULL;
+	int i;
+
+	/* Iterate through all listening sockets. */
+	read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+	/* Make sure we saw all listening sockets exactly once. */
+	check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+			       counts, counts_len);
+
+	/* Iterate through the first established socket. */
+	read_n(iter_fd, 1, counts, counts_len);
+
+	/* Make sure we saw one established socks. */
+	check_n_were_seen_once(established_socks, established_socks_len, 1,
+			       counts, counts_len);
+
+	/* Close all remaining sockets to exhaust the list of saved cookies and
+	 * exit without putting any sockets into the batch on the next read.
+	 */
+	close_idx = malloc(sizeof(int) * (established_socks_len - 1));
+	if (!ASSERT_OK_PTR(close_idx, "close_idx malloc"))
+		return;
+	for (i = 0; i < established_socks_len - 1; i++) {
+		close_idx[i] = get_nth_socket(established_socks,
+					      established_socks_len, link,
+					      listen_socks_len + i);
+		if (!ASSERT_GE(close_idx[i], 0, "close_idx"))
+			return;
+	}
+
+	for (i = 0; i < established_socks_len - 1; i++) {
+		destroy(established_socks[close_idx[i]]);
+		established_socks[close_idx[i]] = -1;
+	}
+
+	/* Make sure there are no more sockets returned */
+	ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
+	free(close_idx);
+}
+
+static void add_some(int family, int sock_type, const char *addr, __u16 port,
+		     int *socks, int socks_len, int *established_socks,
+		     int established_socks_len, struct sock_count *counts,
+		     int counts_len, struct bpf_link *link, int iter_fd)
+{
+	int *new_socks = NULL;
+
+	/* Iterate through the first socks_len - 1 sockets. */
+	read_n(iter_fd, socks_len - 1, counts, counts_len);
+
+	/* Make sure we saw socks_len - 1 sockets exactly once. */
+	check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+			       counts_len);
+
+	/* Double the number of sockets in the bucket. */
+	new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
+					   socks_len);
+	if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
+		goto done;
+
+	/* Iterate through the rest of the sockets. */
+	read_n(iter_fd, -1, counts, counts_len);
+
+	/* Make sure each of the original sockets was seen exactly once. */
+	check_n_were_seen_once(socks, socks_len, socks_len, counts,
+			       counts_len);
+done:
+	free_fds(new_socks, socks_len);
+}
+
+static void add_some_established(int family, int sock_type, const char *addr,
+				 __u16 port, int *listen_socks,
+				 int listen_socks_len, int *established_socks,
+				 int established_socks_len,
+				 struct sock_count *counts,
+				 int counts_len, struct bpf_link *link,
+				 int iter_fd)
+{
+	int *new_socks = NULL;
+
+	/* Iterate through all listening sockets. */
+	read_n(iter_fd, listen_socks_len, counts, counts_len);
+
+	/* Make sure we saw all listening sockets exactly once. */
+	check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+			       counts, counts_len);
+
+	/* Iterate through the first established_socks_len - 1 sockets. */
+	read_n(iter_fd, established_socks_len - 1, counts, counts_len);
+
+	/* Make sure we saw established_socks_len - 1 sockets exactly once. */
+	check_n_were_seen_once(established_socks, established_socks_len,
+			       established_socks_len - 1, counts, counts_len);
+
+	/* Double the number of established sockets in the bucket. */
+	new_socks = connect_to_server(family, sock_type, addr, port,
+				      established_socks_len / 2, listen_socks,
+				      listen_socks_len);
+	if (!ASSERT_OK_PTR(new_socks, "connect_to_server"))
+		goto done;
+
+	/* Iterate through the rest of the sockets. */
+	read_n(iter_fd, -1, counts, counts_len);
+
+	/* Make sure each of the original sockets was seen exactly once. */
+	check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+			       counts, counts_len);
+	check_n_were_seen_once(established_socks, established_socks_len,
+			       established_socks_len, counts, counts_len);
+done:
+	free_fds(new_socks, established_socks_len);
+}
+
+static void force_realloc(int family, int sock_type, const char *addr,
+			  __u16 port, int *socks, int socks_len,
+			  int *established_socks, int established_socks_len,
+			  struct sock_count *counts, int counts_len,
+			  struct bpf_link *link, int iter_fd)
+{
+	int *new_socks = NULL;
+
+	/* Iterate through the first socket just to initialize the batch. */
+	read_n(iter_fd, 1, counts, counts_len);
+
+	/* Double the number of sockets in the bucket to force a realloc on the
+	 * next read.
+	 */
+	new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
+					   socks_len);
+	if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
+		goto done;
+
+	/* Iterate through the rest of the sockets. */
+	read_n(iter_fd, -1, counts, counts_len);
+
+	/* Make sure each socket from the first set was seen exactly once. */
+	check_n_were_seen_once(socks, socks_len, socks_len, counts,
+			       counts_len);
+done:
+	free_fds(new_socks, socks_len);
+}
+
+static void force_realloc_established(int family, int sock_type,
+				      const char *addr, __u16 port,
+				      int *listen_socks, int listen_socks_len,
+				      int *established_socks,
+				      int established_socks_len,
+				      struct sock_count *counts, int counts_len,
+				      struct bpf_link *link, int iter_fd)
+{
+	/* Iterate through all sockets to trigger a realloc. */
+	read_n(iter_fd, -1, counts, counts_len);
+
+	/* Make sure each socket was seen exactly once. */
+	check_n_were_seen_once(listen_socks, listen_socks_len, listen_socks_len,
+			       counts, counts_len);
+	check_n_were_seen_once(established_socks, established_socks_len,
+			       established_socks_len, counts, counts_len);
+}
+
+struct test_case {
+	void (*test)(int family, int sock_type, const char *addr, __u16 port,
+		     int *socks, int socks_len, int *established_socks,
+		     int established_socks_len, struct sock_count *counts,
+		     int counts_len, struct bpf_link *link, int iter_fd);
+	const char *description;
+	int ehash_buckets;
+	int connections;
+	int init_socks;
+	int max_socks;
+	int sock_type;
+	int family;
+};
+
+static struct test_case resume_tests[] = {
+	{
+		.description = "udp: resume after removing a seen socket",
+		.init_socks = nr_soreuse,
+		.max_socks = nr_soreuse,
+		.sock_type = SOCK_DGRAM,
+		.family = AF_INET6,
+		.test = remove_seen,
+	},
+	{
+		.description = "udp: resume after removing one unseen socket",
+		.init_socks = nr_soreuse,
+		.max_socks = nr_soreuse,
+		.sock_type = SOCK_DGRAM,
+		.family = AF_INET6,
+		.test = remove_unseen,
+	},
+	{
+		.description = "udp: resume after removing all unseen sockets",
+		.init_socks = nr_soreuse,
+		.max_socks = nr_soreuse,
+		.sock_type = SOCK_DGRAM,
+		.family = AF_INET6,
+		.test = remove_all,
+	},
+	{
+		.description = "udp: resume after adding a few sockets",
+		.init_socks = nr_soreuse,
+		.max_socks = nr_soreuse,
+		.sock_type = SOCK_DGRAM,
+		/* Use AF_INET so that new sockets are added to the head of the
+		 * bucket's list.
+		 */
+		.family = AF_INET,
+		.test = add_some,
+	},
+	{
+		.description = "udp: force a realloc to occur",
+		.init_socks = init_batch_size,
+		.max_socks = init_batch_size * 2,
+		.sock_type = SOCK_DGRAM,
+		/* Use AF_INET6 so that new sockets are added to the tail of the
+		 * bucket's list, needing to be added to the next batch to force
+		 * a realloc.
+		 */
+		.family = AF_INET6,
+		.test = force_realloc,
+	},
+	{
+		.description = "tcp: resume after removing a seen socket (listening)",
+		.init_socks = nr_soreuse,
+		.max_socks = nr_soreuse,
+		.sock_type = SOCK_STREAM,
+		.family = AF_INET6,
+		.test = remove_seen,
+	},
+	{
+		.description = "tcp: resume after removing one unseen socket (listening)",
+		.init_socks = nr_soreuse,
+		.max_socks = nr_soreuse,
+		.sock_type = SOCK_STREAM,
+		.family = AF_INET6,
+		.test = remove_unseen,
+	},
+	{
+		.description = "tcp: resume after removing all unseen sockets (listening)",
+		.init_socks = nr_soreuse,
+		.max_socks = nr_soreuse,
+		.sock_type = SOCK_STREAM,
+		.family = AF_INET6,
+		.test = remove_all,
+	},
+	{
+		.description = "tcp: resume after adding a few sockets (listening)",
+		.init_socks = nr_soreuse,
+		.max_socks = nr_soreuse,
+		.sock_type = SOCK_STREAM,
+		/* Use AF_INET so that new sockets are added to the head of the
+		 * bucket's list.
+		 */
+		.family = AF_INET,
+		.test = add_some,
+	},
+	{
+		.description = "tcp: force a realloc to occur (listening)",
+		.init_socks = init_batch_size,
+		.max_socks = init_batch_size * 2,
+		.sock_type = SOCK_STREAM,
+		/* Use AF_INET6 so that new sockets are added to the tail of the
+		 * bucket's list, needing to be added to the next batch to force
+		 * a realloc.
+		 */
+		.family = AF_INET6,
+		.test = force_realloc,
+	},
+	{
+		.description = "tcp: resume after removing a seen socket (established)",
+		/* Force all established sockets into one bucket */
+		.ehash_buckets = 1,
+		.connections = nr_soreuse,
+		.init_socks = nr_soreuse,
+		/* Room for connect()ed and accept()ed sockets */
+		.max_socks = nr_soreuse * 3,
+		.sock_type = SOCK_STREAM,
+		.family = AF_INET6,
+		.test = remove_seen_established,
+	},
+	{
+		.description = "tcp: resume after removing one unseen socket (established)",
+		/* Force all established sockets into one bucket */
+		.ehash_buckets = 1,
+		.connections = nr_soreuse,
+		.init_socks = nr_soreuse,
+		/* Room for connect()ed and accept()ed sockets */
+		.max_socks = nr_soreuse * 3,
+		.sock_type = SOCK_STREAM,
+		.family = AF_INET6,
+		.test = remove_unseen_established,
+	},
+	{
+		.description = "tcp: resume after removing all unseen sockets (established)",
+		/* Force all established sockets into one bucket */
+		.ehash_buckets = 1,
+		.connections = nr_soreuse,
+		.init_socks = nr_soreuse,
+		/* Room for connect()ed and accept()ed sockets */
+		.max_socks = nr_soreuse * 3,
+		.sock_type = SOCK_STREAM,
+		.family = AF_INET6,
+		.test = remove_all_established,
+	},
+	{
+		.description = "tcp: resume after adding a few sockets (established)",
+		/* Force all established sockets into one bucket */
+		.ehash_buckets = 1,
+		.connections = nr_soreuse,
+		.init_socks = nr_soreuse,
+		/* Room for connect()ed and accept()ed sockets */
+		.max_socks = nr_soreuse * 3,
+		.sock_type = SOCK_STREAM,
+		.family = AF_INET6,
+		.test = add_some_established,
+	},
+	{
+		.description = "tcp: force a realloc to occur (established)",
+		/* Force all established sockets into one bucket */
+		.ehash_buckets = 1,
+		/* Bucket size will need to double when going from listening to
+		 * established sockets.
+		 */
+		.connections = init_batch_size,
+		.init_socks = nr_soreuse,
+		/* Room for connect()ed and accept()ed sockets */
+		.max_socks = nr_soreuse + (init_batch_size * 2),
+		.sock_type = SOCK_STREAM,
+		.family = AF_INET6,
+		.test = force_realloc_established,
+	},
+};
+
+static void do_resume_test(struct test_case *tc)
+{
+	struct sock_iter_batch *skel = NULL;
+	struct sock_count *counts = NULL;
+	static const __u16 port = 10001;
+	struct nstoken *nstoken = NULL;
+	struct bpf_link *link = NULL;
+	int *established_fds = NULL;
+	int err, iter_fd = -1;
+	const char *addr;
+	int *fds = NULL;
+
+	if (tc->ehash_buckets) {
+		SYS_NOFAIL("ip netns del " TEST_CHILD_NS);
+		SYS(done, "sysctl -wq net.ipv4.tcp_child_ehash_entries=%d",
+		    tc->ehash_buckets);
+		SYS(done, "ip netns add %s", TEST_CHILD_NS);
+		SYS(done, "ip -net %s link set dev lo up", TEST_CHILD_NS);
+		nstoken = open_netns(TEST_CHILD_NS);
+		if (!ASSERT_OK_PTR(nstoken, "open_child_netns"))
+			goto done;
+	}
+
+	counts = calloc(tc->max_socks, sizeof(*counts));
+	if (!ASSERT_OK_PTR(counts, "counts"))
+		goto done;
+	skel = sock_iter_batch__open();
+	if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
+		goto done;
+
+	/* Prepare a bucket of sockets in the kernel hashtable */
+	addr = tc->family == AF_INET6 ? "::1" : "127.0.0.1";
+	fds = start_reuseport_server(tc->family, tc->sock_type, addr, port, 0,
+				     tc->init_socks);
+	if (!ASSERT_OK_PTR(fds, "start_reuseport_server"))
+		goto done;
+	if (tc->connections) {
+		established_fds = connect_to_server(tc->family, tc->sock_type,
+						    addr, port,
+						    tc->connections, fds,
+						    tc->init_socks);
+		if (!ASSERT_OK_PTR(established_fds, "connect_to_server"))
+			goto done;
+	}
+	skel->rodata->ports[0] = 0;
+	skel->rodata->ports[1] = 0;
+	skel->rodata->sf = tc->family;
+	skel->rodata->ss = 0;
+
+	err = sock_iter_batch__load(skel);
+	if (!ASSERT_OK(err, "sock_iter_batch__load"))
+		goto done;
+
+	link = bpf_program__attach_iter(tc->sock_type == SOCK_STREAM ?
+					skel->progs.iter_tcp_soreuse :
+					skel->progs.iter_udp_soreuse,
+					NULL);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+		goto done;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+		goto done;
+
+	tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks,
+		 established_fds, tc->connections*2, counts, tc->max_socks,
+		 link, iter_fd);
+done:
+	close_netns(nstoken);
+	SYS_NOFAIL("ip netns del " TEST_CHILD_NS);
+	SYS_NOFAIL("sysctl -w net.ipv4.tcp_child_ehash_entries=0");
+	free(counts);
+	free_fds(fds, tc->init_socks);
+	free_fds(established_fds, tc->connections*2);
+	if (iter_fd >= 0)
+		close(iter_fd);
+	bpf_link__destroy(link);
+	sock_iter_batch__destroy(skel);
+}
+
+static void do_resume_tests(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(resume_tests); i++) {
+		if (test__start_subtest(resume_tests[i].description)) {
+			do_resume_test(&resume_tests[i]);
+		}
+	}
+}
+
 static void do_test(int sock_type, bool onebyone)
 {
 	int err, i, nread, to_read, total_read, iter_fd = -1;
-	int first_idx, second_idx, indices[nr_soreuse];
+	struct iter_out outputs[nr_soreuse];
 	struct bpf_link *link = NULL;
 	struct sock_iter_batch *skel;
+	int first_idx, second_idx;
 	int *fds[2] = {};
 
 	skel = sock_iter_batch__open();
@@ -34,6 +889,9 @@ static void do_test(int sock_type, bool onebyone)
 			goto done;
 		skel->rodata->ports[i] = ntohs(local_port);
 	}
+	skel->rodata->sf = AF_INET6;
+	if (sock_type == SOCK_STREAM)
+		skel->rodata->ss = TCP_LISTEN;
 
 	err = sock_iter_batch__load(skel);
 	if (!ASSERT_OK(err, "sock_iter_batch__load"))
@@ -55,38 +913,38 @@ static void do_test(int sock_type, bool onebyone)
 	 * from a bucket and leave one socket out from
 	 * that bucket on purpose.
 	 */
-	to_read = (nr_soreuse - 1) * sizeof(*indices);
+	to_read = (nr_soreuse - 1) * sizeof(*outputs);
 	total_read = 0;
 	first_idx = -1;
 	do {
-		nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read);
-		if (nread <= 0 || nread % sizeof(*indices))
+		nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
+		if (nread <= 0 || nread % sizeof(*outputs))
 			break;
 		total_read += nread;
 
 		if (first_idx == -1)
-			first_idx = indices[0];
-		for (i = 0; i < nread / sizeof(*indices); i++)
-			ASSERT_EQ(indices[i], first_idx, "first_idx");
+			first_idx = outputs[0].idx;
+		for (i = 0; i < nread / sizeof(*outputs); i++)
+			ASSERT_EQ(outputs[i].idx, first_idx, "first_idx");
 	} while (total_read < to_read);
-	ASSERT_EQ(nread, onebyone ? sizeof(*indices) : to_read, "nread");
+	ASSERT_EQ(nread, onebyone ? sizeof(*outputs) : to_read, "nread");
 	ASSERT_EQ(total_read, to_read, "total_read");
 
 	free_fds(fds[first_idx], nr_soreuse);
 	fds[first_idx] = NULL;
 
 	/* Read the "whole" second bucket */
-	to_read = nr_soreuse * sizeof(*indices);
+	to_read = nr_soreuse * sizeof(*outputs);
 	total_read = 0;
 	second_idx = !first_idx;
 	do {
-		nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read);
-		if (nread <= 0 || nread % sizeof(*indices))
+		nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
+		if (nread <= 0 || nread % sizeof(*outputs))
 			break;
 		total_read += nread;
 
-		for (i = 0; i < nread / sizeof(*indices); i++)
-			ASSERT_EQ(indices[i], second_idx, "second_idx");
+		for (i = 0; i < nread / sizeof(*outputs); i++)
+			ASSERT_EQ(outputs[i].idx, second_idx, "second_idx");
 	} while (total_read <= to_read);
 	ASSERT_EQ(nread, 0, "nread");
 	/* Both so_reuseport ports should be in different buckets, so
@@ -128,6 +986,7 @@ void test_sock_iter_batch(void)
 		do_test(SOCK_DGRAM, true);
 		do_test(SOCK_DGRAM, false);
 	}
+	do_resume_tests();
 	close_netns(nstoken);
 
 done:
diff --git a/tools/testing/selftests/bpf/prog_tests/socket_helpers.h b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
new file mode 100644
index 000000000000..0d59503a0c73
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/socket_helpers.h
@@ -0,0 +1,473 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SOCKET_HELPERS__
+#define __SOCKET_HELPERS__
+
+#include <sys/un.h>
+#include <linux/vm_sockets.h>
+
+/* include/linux/net.h */
+#define SOCK_TYPE_MASK 0xf
+
+#define IO_TIMEOUT_SEC 30
+#define MAX_STRERR_LEN 256
+
+/* workaround for older vm_sockets.h */
+#ifndef VMADDR_CID_LOCAL
+#define VMADDR_CID_LOCAL 1
+#endif
+
+/* include/linux/compiler_types.h */
+#if __STDC_VERSION__ < 202311L && !defined(auto)
+# define auto __auto_type
+#endif
+
+/* include/linux/cleanup.h */
+#define __get_and_null(p, nullvalue)                                           \
+	({                                                                     \
+		auto __ptr = &(p);					       \
+		auto __val = *__ptr;                                           \
+		*__ptr = nullvalue;                                            \
+		__val;                                                         \
+	})
+
+#define take_fd(fd) __get_and_null(fd, -EBADF)
+
+/* Wrappers that fail the test on error and report it. */
+
+#define _FAIL(errnum, fmt...)                                                  \
+	({                                                                     \
+		error_at_line(0, (errnum), __func__, __LINE__, fmt);           \
+		CHECK_FAIL(true);                                              \
+	})
+#define FAIL(fmt...) _FAIL(0, fmt)
+#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
+#define FAIL_LIBBPF(err, msg)                                                  \
+	({                                                                     \
+		char __buf[MAX_STRERR_LEN];                                    \
+		libbpf_strerror((err), __buf, sizeof(__buf));                  \
+		FAIL("%s: %s", (msg), __buf);                                  \
+	})
+
+
+#define xaccept_nonblock(fd, addr, len)                                        \
+	({                                                                     \
+		int __ret =                                                    \
+			accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC);   \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("accept");                                  \
+		__ret;                                                         \
+	})
+
+#define xbind(fd, addr, len)                                                   \
+	({                                                                     \
+		int __ret = bind((fd), (addr), (len));                         \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("bind");                                    \
+		__ret;                                                         \
+	})
+
+#define xclose(fd)                                                             \
+	({                                                                     \
+		int __ret = close((fd));                                       \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("close");                                   \
+		__ret;                                                         \
+	})
+
+#define xconnect(fd, addr, len)                                                \
+	({                                                                     \
+		int __ret = connect((fd), (addr), (len));                      \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("connect");                                 \
+		__ret;                                                         \
+	})
+
+#define xgetsockname(fd, addr, len)                                            \
+	({                                                                     \
+		int __ret = getsockname((fd), (addr), (len));                  \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("getsockname");                             \
+		__ret;                                                         \
+	})
+
+#define xgetsockopt(fd, level, name, val, len)                                 \
+	({                                                                     \
+		int __ret = getsockopt((fd), (level), (name), (val), (len));   \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("getsockopt(" #name ")");                   \
+		__ret;                                                         \
+	})
+
+#define xlisten(fd, backlog)                                                   \
+	({                                                                     \
+		int __ret = listen((fd), (backlog));                           \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("listen");                                  \
+		__ret;                                                         \
+	})
+
+#define xsetsockopt(fd, level, name, val, len)                                 \
+	({                                                                     \
+		int __ret = setsockopt((fd), (level), (name), (val), (len));   \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("setsockopt(" #name ")");                   \
+		__ret;                                                         \
+	})
+
+#define xsend(fd, buf, len, flags)                                             \
+	({                                                                     \
+		ssize_t __ret = send((fd), (buf), (len), (flags));             \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("send");                                    \
+		__ret;                                                         \
+	})
+
+#define xrecv_nonblock(fd, buf, len, flags)                                    \
+	({                                                                     \
+		ssize_t __ret = recv_timeout((fd), (buf), (len), (flags),      \
+					     IO_TIMEOUT_SEC);                  \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("recv");                                    \
+		__ret;                                                         \
+	})
+
+#define xsocket(family, sotype, flags)                                         \
+	({                                                                     \
+		int __ret = socket(family, sotype, flags);                     \
+		if (__ret == -1)                                               \
+			FAIL_ERRNO("socket");                                  \
+		__ret;                                                         \
+	})
+
+static inline void close_fd(int *fd)
+{
+	if (*fd >= 0)
+		xclose(*fd);
+}
+
+#define __close_fd __attribute__((cleanup(close_fd)))
+
+static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
+{
+	return (struct sockaddr *)ss;
+}
+
+static inline void init_addr_loopback4(struct sockaddr_storage *ss,
+				       socklen_t *len)
+{
+	struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
+
+	addr4->sin_family = AF_INET;
+	addr4->sin_port = 0;
+	addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+	*len = sizeof(*addr4);
+}
+
+static inline void init_addr_loopback6(struct sockaddr_storage *ss,
+				       socklen_t *len)
+{
+	struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
+
+	addr6->sin6_family = AF_INET6;
+	addr6->sin6_port = 0;
+	addr6->sin6_addr = in6addr_loopback;
+	*len = sizeof(*addr6);
+}
+
+static inline void init_addr_loopback_unix(struct sockaddr_storage *ss,
+					   socklen_t *len)
+{
+	struct sockaddr_un *addr = memset(ss, 0, sizeof(*ss));
+
+	addr->sun_family = AF_UNIX;
+	*len = sizeof(sa_family_t);
+}
+
+static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
+					    socklen_t *len)
+{
+	struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
+
+	addr->svm_family = AF_VSOCK;
+	addr->svm_port = VMADDR_PORT_ANY;
+	addr->svm_cid = VMADDR_CID_LOCAL;
+	*len = sizeof(*addr);
+}
+
+static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
+				      socklen_t *len)
+{
+	switch (family) {
+	case AF_INET:
+		init_addr_loopback4(ss, len);
+		return;
+	case AF_INET6:
+		init_addr_loopback6(ss, len);
+		return;
+	case AF_UNIX:
+		init_addr_loopback_unix(ss, len);
+		return;
+	case AF_VSOCK:
+		init_addr_loopback_vsock(ss, len);
+		return;
+	default:
+		FAIL("unsupported address family %d", family);
+	}
+}
+
+static inline int enable_reuseport(int s, int progfd)
+{
+	int err, one = 1;
+
+	err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+	if (err)
+		return -1;
+	err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
+			  sizeof(progfd));
+	if (err)
+		return -1;
+
+	return 0;
+}
+
+static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = 0;
+	int err, s;
+
+	init_addr_loopback(family, &addr, &len);
+
+	s = xsocket(family, sotype, 0);
+	if (s == -1)
+		return -1;
+
+	if (progfd >= 0)
+		enable_reuseport(s, progfd);
+
+	err = xbind(s, sockaddr(&addr), len);
+	if (err)
+		goto close;
+
+	if (sotype & SOCK_DGRAM)
+		return s;
+
+	err = xlisten(s, SOMAXCONN);
+	if (err)
+		goto close;
+
+	return s;
+close:
+	xclose(s);
+	return -1;
+}
+
+static inline int socket_loopback(int family, int sotype)
+{
+	return socket_loopback_reuseport(family, sotype, -1);
+}
+
+static inline int poll_connect(int fd, unsigned int timeout_sec)
+{
+	struct timeval timeout = { .tv_sec = timeout_sec };
+	fd_set wfds;
+	int r, eval;
+	socklen_t esize = sizeof(eval);
+
+	FD_ZERO(&wfds);
+	FD_SET(fd, &wfds);
+
+	r = select(fd + 1, NULL, &wfds, NULL, &timeout);
+	if (r == 0)
+		errno = ETIME;
+	if (r != 1)
+		return -1;
+
+	if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
+		return -1;
+	if (eval != 0) {
+		errno = eval;
+		return -1;
+	}
+
+	return 0;
+}
+
+static inline int poll_read(int fd, unsigned int timeout_sec)
+{
+	struct timeval timeout = { .tv_sec = timeout_sec };
+	fd_set rfds;
+	int r;
+
+	FD_ZERO(&rfds);
+	FD_SET(fd, &rfds);
+
+	r = select(fd + 1, &rfds, NULL, NULL, &timeout);
+	if (r == 0)
+		errno = ETIME;
+
+	return r == 1 ? 0 : -1;
+}
+
+static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
+				 unsigned int timeout_sec)
+{
+	if (poll_read(fd, timeout_sec))
+		return -1;
+
+	return accept(fd, addr, len);
+}
+
+static inline int recv_timeout(int fd, void *buf, size_t len, int flags,
+			       unsigned int timeout_sec)
+{
+	if (poll_read(fd, timeout_sec))
+		return -1;
+
+	return recv(fd, buf, len, flags);
+}
+
+
+static inline int create_pair(int family, int sotype, int *p0, int *p1)
+{
+	__close_fd int s, c = -1, p = -1;
+	struct sockaddr_storage addr;
+	socklen_t len;
+	int err;
+
+	s = socket_loopback(family, sotype);
+	if (s < 0)
+		return s;
+
+	c = xsocket(family, sotype, 0);
+	if (c < 0)
+		return c;
+
+	init_addr_loopback(family, &addr, &len);
+	err = xbind(c, sockaddr(&addr), len);
+	if (err)
+		return err;
+
+	len = sizeof(addr);
+	err = xgetsockname(s, sockaddr(&addr), &len);
+	if (err)
+		return err;
+
+	err = connect(c, sockaddr(&addr), len);
+	if (err) {
+		if (errno != EINPROGRESS) {
+			FAIL_ERRNO("connect");
+			return err;
+		}
+
+		err = poll_connect(c, IO_TIMEOUT_SEC);
+		if (err) {
+			FAIL_ERRNO("poll_connect");
+			return err;
+		}
+	}
+
+	switch (sotype & SOCK_TYPE_MASK) {
+	case SOCK_DGRAM:
+		err = xgetsockname(c, sockaddr(&addr), &len);
+		if (err)
+			return err;
+
+		err = xconnect(s, sockaddr(&addr), len);
+		if (err)
+			return err;
+
+		*p0 = take_fd(s);
+		break;
+	case SOCK_STREAM:
+	case SOCK_SEQPACKET:
+		p = xaccept_nonblock(s, NULL, NULL);
+		if (p < 0)
+			return p;
+
+		*p0 = take_fd(p);
+		break;
+	default:
+		FAIL("Unsupported socket type %#x", sotype);
+		return -EOPNOTSUPP;
+	}
+
+	*p1 = take_fd(c);
+	return 0;
+}
+
+static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
+				      int *p0, int *p1)
+{
+	int err;
+
+	err = create_pair(family, sotype, c0, p0);
+	if (err)
+		return err;
+
+	err = create_pair(family, sotype, c1, p1);
+	if (err) {
+		close(*c0);
+		close(*p0);
+	}
+
+	return err;
+}
+
+static inline const char *socket_kind_to_str(int sock_fd)
+{
+	socklen_t opt_len;
+	int domain, type;
+
+	opt_len = sizeof(domain);
+	if (getsockopt(sock_fd, SOL_SOCKET, SO_DOMAIN, &domain, &opt_len))
+		FAIL_ERRNO("getsockopt(SO_DOMAIN)");
+
+	opt_len = sizeof(type);
+	if (getsockopt(sock_fd, SOL_SOCKET, SO_TYPE, &type, &opt_len))
+		FAIL_ERRNO("getsockopt(SO_TYPE)");
+
+	switch (domain) {
+	case AF_INET:
+		switch (type) {
+		case SOCK_STREAM:
+			return "tcp4";
+		case SOCK_DGRAM:
+			return "udp4";
+		}
+		break;
+	case AF_INET6:
+		switch (type) {
+		case SOCK_STREAM:
+			return "tcp6";
+		case SOCK_DGRAM:
+			return "udp6";
+		}
+		break;
+	case AF_UNIX:
+		switch (type) {
+		case SOCK_STREAM:
+			return "u_str";
+		case SOCK_DGRAM:
+			return "u_dgr";
+		case SOCK_SEQPACKET:
+			return "u_seq";
+		}
+		break;
+	case AF_VSOCK:
+		switch (type) {
+		case SOCK_STREAM:
+			return "v_str";
+		case SOCK_DGRAM:
+			return "v_dgr";
+		case SOCK_SEQPACKET:
+			return "v_seq";
+		}
+		break;
+	}
+
+	return "???";
+}
+
+#endif // __SOCKET_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index a2041f8e32eb..1e3e4392dcca 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -12,6 +12,7 @@
 #include "test_sockmap_progs_query.skel.h"
 #include "test_sockmap_pass_prog.skel.h"
 #include "test_sockmap_drop_prog.skel.h"
+#include "test_sockmap_change_tail.skel.h"
 #include "bpf_iter_sockmap.skel.h"
 
 #include "sockmap_helpers.h"
@@ -108,6 +109,39 @@ out:
 	close(s);
 }
 
+static void test_sockmap_vsock_delete_on_close(void)
+{
+	int map, c, p, err, zero = 0;
+
+	map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
+			     sizeof(int), 1, NULL);
+	if (!ASSERT_OK_FD(map, "bpf_map_create"))
+		return;
+
+	err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
+	if (!ASSERT_OK(err, "create_pair"))
+		goto close_map;
+
+	if (xbpf_map_update_elem(map, &zero, &c, BPF_NOEXIST))
+		goto close_socks;
+
+	xclose(c);
+	xclose(p);
+
+	err = create_pair(AF_VSOCK, SOCK_STREAM, &c, &p);
+	if (!ASSERT_OK(err, "create_pair"))
+		goto close_map;
+
+	err = bpf_map_update_elem(map, &zero, &c, BPF_NOEXIST);
+	ASSERT_OK(err, "after close(), bpf_map_update");
+
+close_socks:
+	xclose(c);
+	xclose(p);
+close_map:
+	xclose(map);
+}
+
 static void test_skmsg_helpers(enum bpf_map_type map_type)
 {
 	struct test_skmsg_load_helpers *skel;
@@ -492,8 +526,8 @@ static void test_sockmap_skb_verdict_shutdown(void)
 	if (!ASSERT_EQ(err, 1, "epoll_wait(fd)"))
 		goto out_close;
 
-	n = recv(c1, &b, 1, SOCK_NONBLOCK);
-	ASSERT_EQ(n, 0, "recv_timeout(fin)");
+	n = recv(c1, &b, 1, MSG_DONTWAIT);
+	ASSERT_EQ(n, 0, "recv(fin)");
 out_close:
 	close(c1);
 	close(p1);
@@ -501,57 +535,6 @@ out:
 	test_sockmap_pass_prog__destroy(skel);
 }
 
-static void test_sockmap_stream_pass(void)
-{
-	int zero = 0, sent, recvd;
-	int verdict, parser;
-	int err, map;
-	int c = -1, p = -1;
-	struct test_sockmap_pass_prog *pass = NULL;
-	char snd[256] = "0123456789";
-	char rcv[256] = "0";
-
-	pass = test_sockmap_pass_prog__open_and_load();
-	verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
-	parser = bpf_program__fd(pass->progs.prog_skb_parser);
-	map = bpf_map__fd(pass->maps.sock_map_rx);
-
-	err = bpf_prog_attach(parser, map, BPF_SK_SKB_STREAM_PARSER, 0);
-	if (!ASSERT_OK(err, "bpf_prog_attach stream parser"))
-		goto out;
-
-	err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
-	if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
-		goto out;
-
-	err = create_pair(AF_INET, SOCK_STREAM, &c, &p);
-	if (err)
-		goto out;
-
-	/* sk_data_ready of 'p' will be replaced by strparser handler */
-	err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
-	if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
-		goto out_close;
-
-	/*
-	 * as 'prog_skb_parser' return the original skb len and
-	 * 'prog_skb_verdict' return SK_PASS, the kernel will just
-	 * pass it through to original socket 'p'
-	 */
-	sent = xsend(c, snd, sizeof(snd), 0);
-	ASSERT_EQ(sent, sizeof(snd), "xsend(c)");
-
-	recvd = recv_timeout(p, rcv, sizeof(rcv), SOCK_NONBLOCK,
-			     IO_TIMEOUT_SEC);
-	ASSERT_EQ(recvd, sizeof(rcv), "recv_timeout(p)");
-
-out_close:
-	close(c);
-	close(p);
-
-out:
-	test_sockmap_pass_prog__destroy(pass);
-}
 
 static void test_sockmap_skb_verdict_fionread(bool pass_prog)
 {
@@ -598,7 +581,7 @@ static void test_sockmap_skb_verdict_fionread(bool pass_prog)
 	ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
 	/* On DROP test there will be no data to read */
 	if (pass_prog) {
-		recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC);
+		recvd = recv_timeout(c1, &buf, sizeof(buf), MSG_DONTWAIT, IO_TIMEOUT_SEC);
 		ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)");
 	}
 
@@ -614,6 +597,54 @@ out:
 		test_sockmap_drop_prog__destroy(drop);
 }
 
+static void test_sockmap_skb_verdict_change_tail(void)
+{
+	struct test_sockmap_change_tail *skel;
+	int err, map, verdict;
+	int c1, p1, sent, recvd;
+	int zero = 0;
+	char buf[2];
+
+	skel = test_sockmap_change_tail__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+	verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+	map = bpf_map__fd(skel->maps.sock_map_rx);
+
+	err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (!ASSERT_OK(err, "bpf_prog_attach"))
+		goto out;
+	err = create_pair(AF_INET, SOCK_STREAM, &c1, &p1);
+	if (!ASSERT_OK(err, "create_pair()"))
+		goto out;
+	err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
+		goto out_close;
+	sent = xsend(p1, "Tr", 2, 0);
+	ASSERT_EQ(sent, 2, "xsend(p1)");
+	recvd = recv(c1, buf, 2, 0);
+	ASSERT_EQ(recvd, 1, "recv(c1)");
+	ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
+
+	sent = xsend(p1, "G", 1, 0);
+	ASSERT_EQ(sent, 1, "xsend(p1)");
+	recvd = recv(c1, buf, 2, 0);
+	ASSERT_EQ(recvd, 2, "recv(c1)");
+	ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
+
+	sent = xsend(p1, "E", 1, 0);
+	ASSERT_EQ(sent, 1, "xsend(p1)");
+	recvd = recv(c1, buf, 1, 0);
+	ASSERT_EQ(recvd, 1, "recv(c1)");
+	ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret");
+
+out_close:
+	close(c1);
+	close(p1);
+out:
+	test_sockmap_change_tail__destroy(skel);
+}
+
 static void test_sockmap_skb_verdict_peek_helper(int map)
 {
 	int err, c1, p1, zero = 0, sent, recvd, avail;
@@ -905,8 +936,10 @@ static void test_sockmap_same_sock(void)
 
 	err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
 	ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
-	if (err)
+	if (err) {
+		close(tcp);
 		goto out;
+	}
 
 	for (i = 0; i < 2; i++) {
 		err = bpf_map_update_elem(map, &zero, &stream[0], BPF_ANY);
@@ -925,24 +958,98 @@ static void test_sockmap_same_sock(void)
 		ASSERT_OK(err, "bpf_map_update_elem(tcp)");
 	}
 
+	close(tcp);
 	err = bpf_map_delete_elem(map, &zero);
-	ASSERT_OK(err, "bpf_map_delete_elem(entry)");
+	ASSERT_ERR(err, "bpf_map_delete_elem(entry)");
 
 	close(stream[0]);
 	close(stream[1]);
 out:
 	close(dgram);
-	close(tcp);
 	close(udp);
 	test_sockmap_pass_prog__destroy(skel);
 }
 
+static void test_sockmap_skb_verdict_vsock_poll(void)
+{
+	struct test_sockmap_pass_prog *skel;
+	int err, map, conn, peer;
+	struct bpf_program *prog;
+	struct bpf_link *link;
+	char buf = 'x';
+	int zero = 0;
+
+	skel = test_sockmap_pass_prog__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
+		return;
+
+	if (create_pair(AF_VSOCK, SOCK_STREAM, &conn, &peer))
+		goto destroy;
+
+	prog = skel->progs.prog_skb_verdict;
+	map = bpf_map__fd(skel->maps.sock_map_rx);
+	link = bpf_program__attach_sockmap(prog, map);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
+		goto close;
+
+	err = bpf_map_update_elem(map, &zero, &conn, BPF_ANY);
+	if (!ASSERT_OK(err, "bpf_map_update_elem"))
+		goto detach;
+
+	if (xsend(peer, &buf, 1, 0) != 1)
+		goto detach;
+
+	err = poll_read(conn, IO_TIMEOUT_SEC);
+	if (!ASSERT_OK(err, "poll"))
+		goto detach;
+
+	if (xrecv_nonblock(conn, &buf, 1, 0) != 1)
+		FAIL("xrecv_nonblock");
+detach:
+	bpf_link__detach(link);
+close:
+	xclose(conn);
+	xclose(peer);
+destroy:
+	test_sockmap_pass_prog__destroy(skel);
+}
+
+static void test_sockmap_vsock_unconnected(void)
+{
+	struct sockaddr_storage addr;
+	int map, s, zero = 0;
+	socklen_t alen;
+
+	map = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(int),
+			     sizeof(int), 1, NULL);
+	if (!ASSERT_OK_FD(map, "bpf_map_create"))
+		return;
+
+	s = xsocket(AF_VSOCK, SOCK_STREAM, 0);
+	if (s < 0)
+		goto close_map;
+
+	/* Fail connect(), but trigger transport assignment. */
+	init_addr_loopback(AF_VSOCK, &addr, &alen);
+	if (!ASSERT_ERR(connect(s, sockaddr(&addr), alen), "connect"))
+		goto close_sock;
+
+	ASSERT_ERR(bpf_map_update_elem(map, &zero, &s, BPF_ANY), "map_update");
+
+close_sock:
+	xclose(s);
+close_map:
+	xclose(map);
+}
+
 void test_sockmap_basic(void)
 {
 	if (test__start_subtest("sockmap create_update_free"))
 		test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP);
 	if (test__start_subtest("sockhash create_update_free"))
 		test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH);
+	if (test__start_subtest("sockmap vsock delete on close"))
+		test_sockmap_vsock_delete_on_close();
 	if (test__start_subtest("sockmap sk_msg load helpers"))
 		test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP);
 	if (test__start_subtest("sockhash sk_msg load helpers"))
@@ -975,12 +1082,12 @@ void test_sockmap_basic(void)
 		test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
 	if (test__start_subtest("sockmap skb_verdict shutdown"))
 		test_sockmap_skb_verdict_shutdown();
-	if (test__start_subtest("sockmap stream parser and verdict pass"))
-		test_sockmap_stream_pass();
 	if (test__start_subtest("sockmap skb_verdict fionread"))
 		test_sockmap_skb_verdict_fionread(true);
 	if (test__start_subtest("sockmap skb_verdict fionread on drop"))
 		test_sockmap_skb_verdict_fionread(false);
+	if (test__start_subtest("sockmap skb_verdict change tail"))
+		test_sockmap_skb_verdict_change_tail();
 	if (test__start_subtest("sockmap skb_verdict msg_f_peek"))
 		test_sockmap_skb_verdict_peek();
 	if (test__start_subtest("sockmap skb_verdict msg_f_peek with link"))
@@ -997,4 +1104,8 @@ void test_sockmap_basic(void)
 		test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKMAP);
 	if (test__start_subtest("sockhash sk_msg attach sockhash helpers with link"))
 		test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH);
+	if (test__start_subtest("sockmap skb_verdict vsock poll"))
+		test_sockmap_skb_verdict_vsock_poll();
+	if (test__start_subtest("sockmap vsock unconnected"))
+		test_sockmap_vsock_unconnected();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
index 38e35c72bdaa..d815efac52fd 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -1,143 +1,19 @@
 #ifndef __SOCKMAP_HELPERS__
 #define __SOCKMAP_HELPERS__
 
-#include <linux/vm_sockets.h>
+#include "socket_helpers.h"
 
-/* include/linux/net.h */
-#define SOCK_TYPE_MASK 0xf
-
-#define IO_TIMEOUT_SEC 30
-#define MAX_STRERR_LEN 256
 #define MAX_TEST_NAME 80
 
-/* workaround for older vm_sockets.h */
-#ifndef VMADDR_CID_LOCAL
-#define VMADDR_CID_LOCAL 1
-#endif
+#define u32(v) ((u32){(v)})
+#define u64(v) ((u64){(v)})
 
 #define __always_unused	__attribute__((__unused__))
 
-/* include/linux/cleanup.h */
-#define __get_and_null(p, nullvalue)                                           \
-	({                                                                     \
-		__auto_type __ptr = &(p);                                      \
-		__auto_type __val = *__ptr;                                    \
-		*__ptr = nullvalue;                                            \
-		__val;                                                         \
-	})
-
-#define take_fd(fd) __get_and_null(fd, -EBADF)
-
-#define _FAIL(errnum, fmt...)                                                  \
-	({                                                                     \
-		error_at_line(0, (errnum), __func__, __LINE__, fmt);           \
-		CHECK_FAIL(true);                                              \
-	})
-#define FAIL(fmt...) _FAIL(0, fmt)
-#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
-#define FAIL_LIBBPF(err, msg)                                                  \
-	({                                                                     \
-		char __buf[MAX_STRERR_LEN];                                    \
-		libbpf_strerror((err), __buf, sizeof(__buf));                  \
-		FAIL("%s: %s", (msg), __buf);                                  \
-	})
-
-/* Wrappers that fail the test on error and report it. */
-
-#define xaccept_nonblock(fd, addr, len)                                        \
-	({                                                                     \
-		int __ret =                                                    \
-			accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC);   \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("accept");                                  \
-		__ret;                                                         \
-	})
-
-#define xbind(fd, addr, len)                                                   \
-	({                                                                     \
-		int __ret = bind((fd), (addr), (len));                         \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("bind");                                    \
-		__ret;                                                         \
-	})
-
-#define xclose(fd)                                                             \
-	({                                                                     \
-		int __ret = close((fd));                                       \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("close");                                   \
-		__ret;                                                         \
-	})
-
-#define xconnect(fd, addr, len)                                                \
-	({                                                                     \
-		int __ret = connect((fd), (addr), (len));                      \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("connect");                                 \
-		__ret;                                                         \
-	})
-
-#define xgetsockname(fd, addr, len)                                            \
-	({                                                                     \
-		int __ret = getsockname((fd), (addr), (len));                  \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("getsockname");                             \
-		__ret;                                                         \
-	})
-
-#define xgetsockopt(fd, level, name, val, len)                                 \
-	({                                                                     \
-		int __ret = getsockopt((fd), (level), (name), (val), (len));   \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("getsockopt(" #name ")");                   \
-		__ret;                                                         \
-	})
-
-#define xlisten(fd, backlog)                                                   \
-	({                                                                     \
-		int __ret = listen((fd), (backlog));                           \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("listen");                                  \
-		__ret;                                                         \
-	})
-
-#define xsetsockopt(fd, level, name, val, len)                                 \
-	({                                                                     \
-		int __ret = setsockopt((fd), (level), (name), (val), (len));   \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("setsockopt(" #name ")");                   \
-		__ret;                                                         \
-	})
-
-#define xsend(fd, buf, len, flags)                                             \
-	({                                                                     \
-		ssize_t __ret = send((fd), (buf), (len), (flags));             \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("send");                                    \
-		__ret;                                                         \
-	})
-
-#define xrecv_nonblock(fd, buf, len, flags)                                    \
-	({                                                                     \
-		ssize_t __ret = recv_timeout((fd), (buf), (len), (flags),      \
-					     IO_TIMEOUT_SEC);                  \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("recv");                                    \
-		__ret;                                                         \
-	})
-
-#define xsocket(family, sotype, flags)                                         \
-	({                                                                     \
-		int __ret = socket(family, sotype, flags);                     \
-		if (__ret == -1)                                               \
-			FAIL_ERRNO("socket");                                  \
-		__ret;                                                         \
-	})
-
 #define xbpf_map_delete_elem(fd, key)                                          \
 	({                                                                     \
 		int __ret = bpf_map_delete_elem((fd), (key));                  \
-		if (__ret < 0)                                               \
+		if (__ret < 0)                                                 \
 			FAIL_ERRNO("map_delete");                              \
 		__ret;                                                         \
 	})
@@ -145,7 +21,7 @@
 #define xbpf_map_lookup_elem(fd, key, val)                                     \
 	({                                                                     \
 		int __ret = bpf_map_lookup_elem((fd), (key), (val));           \
-		if (__ret < 0)                                               \
+		if (__ret < 0)                                                 \
 			FAIL_ERRNO("map_lookup");                              \
 		__ret;                                                         \
 	})
@@ -153,7 +29,7 @@
 #define xbpf_map_update_elem(fd, key, val, flags)                              \
 	({                                                                     \
 		int __ret = bpf_map_update_elem((fd), (key), (val), (flags));  \
-		if (__ret < 0)                                               \
+		if (__ret < 0)                                                 \
 			FAIL_ERRNO("map_update");                              \
 		__ret;                                                         \
 	})
@@ -162,7 +38,7 @@
 	({                                                                     \
 		int __ret =                                                    \
 			bpf_prog_attach((prog), (target), (type), (flags));    \
-		if (__ret < 0)                                               \
+		if (__ret < 0)                                                 \
 			FAIL_ERRNO("prog_attach(" #type ")");                  \
 		__ret;                                                         \
 	})
@@ -170,7 +46,7 @@
 #define xbpf_prog_detach2(prog, target, type)                                  \
 	({                                                                     \
 		int __ret = bpf_prog_detach2((prog), (target), (type));        \
-		if (__ret < 0)                                               \
+		if (__ret < 0)                                                 \
 			FAIL_ERRNO("prog_detach2(" #type ")");                 \
 		__ret;                                                         \
 	})
@@ -193,277 +69,15 @@
 		__ret;                                                         \
 	})
 
-static inline void close_fd(int *fd)
-{
-	if (*fd >= 0)
-		xclose(*fd);
-}
-
-#define __close_fd __attribute__((cleanup(close_fd)))
-
-static inline int poll_connect(int fd, unsigned int timeout_sec)
-{
-	struct timeval timeout = { .tv_sec = timeout_sec };
-	fd_set wfds;
-	int r, eval;
-	socklen_t esize = sizeof(eval);
-
-	FD_ZERO(&wfds);
-	FD_SET(fd, &wfds);
-
-	r = select(fd + 1, NULL, &wfds, NULL, &timeout);
-	if (r == 0)
-		errno = ETIME;
-	if (r != 1)
-		return -1;
-
-	if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
-		return -1;
-	if (eval != 0) {
-		errno = eval;
-		return -1;
-	}
-
-	return 0;
-}
-
-static inline int poll_read(int fd, unsigned int timeout_sec)
-{
-	struct timeval timeout = { .tv_sec = timeout_sec };
-	fd_set rfds;
-	int r;
-
-	FD_ZERO(&rfds);
-	FD_SET(fd, &rfds);
-
-	r = select(fd + 1, &rfds, NULL, NULL, &timeout);
-	if (r == 0)
-		errno = ETIME;
-
-	return r == 1 ? 0 : -1;
-}
-
-static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
-				 unsigned int timeout_sec)
+static inline int add_to_sockmap(int mapfd, int fd1, int fd2)
 {
-	if (poll_read(fd, timeout_sec))
-		return -1;
-
-	return accept(fd, addr, len);
-}
-
-static inline int recv_timeout(int fd, void *buf, size_t len, int flags,
-			       unsigned int timeout_sec)
-{
-	if (poll_read(fd, timeout_sec))
-		return -1;
-
-	return recv(fd, buf, len, flags);
-}
-
-static inline void init_addr_loopback4(struct sockaddr_storage *ss,
-				       socklen_t *len)
-{
-	struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
-
-	addr4->sin_family = AF_INET;
-	addr4->sin_port = 0;
-	addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-	*len = sizeof(*addr4);
-}
-
-static inline void init_addr_loopback6(struct sockaddr_storage *ss,
-				       socklen_t *len)
-{
-	struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
-
-	addr6->sin6_family = AF_INET6;
-	addr6->sin6_port = 0;
-	addr6->sin6_addr = in6addr_loopback;
-	*len = sizeof(*addr6);
-}
-
-static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
-					    socklen_t *len)
-{
-	struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
-
-	addr->svm_family = AF_VSOCK;
-	addr->svm_port = VMADDR_PORT_ANY;
-	addr->svm_cid = VMADDR_CID_LOCAL;
-	*len = sizeof(*addr);
-}
-
-static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
-				      socklen_t *len)
-{
-	switch (family) {
-	case AF_INET:
-		init_addr_loopback4(ss, len);
-		return;
-	case AF_INET6:
-		init_addr_loopback6(ss, len);
-		return;
-	case AF_VSOCK:
-		init_addr_loopback_vsock(ss, len);
-		return;
-	default:
-		FAIL("unsupported address family %d", family);
-	}
-}
-
-static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
-{
-	return (struct sockaddr *)ss;
-}
-
-static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
-{
-	u64 value;
-	u32 key;
 	int err;
 
-	key = 0;
-	value = fd1;
-	err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+	err = xbpf_map_update_elem(mapfd, &u32(0), &u64(fd1), BPF_NOEXIST);
 	if (err)
 		return err;
 
-	key = 1;
-	value = fd2;
-	return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
-}
-
-static inline int enable_reuseport(int s, int progfd)
-{
-	int err, one = 1;
-
-	err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
-	if (err)
-		return -1;
-	err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
-			  sizeof(progfd));
-	if (err)
-		return -1;
-
-	return 0;
-}
-
-static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
-{
-	struct sockaddr_storage addr;
-	socklen_t len = 0;
-	int err, s;
-
-	init_addr_loopback(family, &addr, &len);
-
-	s = xsocket(family, sotype, 0);
-	if (s == -1)
-		return -1;
-
-	if (progfd >= 0)
-		enable_reuseport(s, progfd);
-
-	err = xbind(s, sockaddr(&addr), len);
-	if (err)
-		goto close;
-
-	if (sotype & SOCK_DGRAM)
-		return s;
-
-	err = xlisten(s, SOMAXCONN);
-	if (err)
-		goto close;
-
-	return s;
-close:
-	xclose(s);
-	return -1;
-}
-
-static inline int socket_loopback(int family, int sotype)
-{
-	return socket_loopback_reuseport(family, sotype, -1);
-}
-
-static inline int create_pair(int family, int sotype, int *p0, int *p1)
-{
-	__close_fd int s, c = -1, p = -1;
-	struct sockaddr_storage addr;
-	socklen_t len = sizeof(addr);
-	int err;
-
-	s = socket_loopback(family, sotype);
-	if (s < 0)
-		return s;
-
-	err = xgetsockname(s, sockaddr(&addr), &len);
-	if (err)
-		return err;
-
-	c = xsocket(family, sotype, 0);
-	if (c < 0)
-		return c;
-
-	err = connect(c, sockaddr(&addr), len);
-	if (err) {
-		if (errno != EINPROGRESS) {
-			FAIL_ERRNO("connect");
-			return err;
-		}
-
-		err = poll_connect(c, IO_TIMEOUT_SEC);
-		if (err) {
-			FAIL_ERRNO("poll_connect");
-			return err;
-		}
-	}
-
-	switch (sotype & SOCK_TYPE_MASK) {
-	case SOCK_DGRAM:
-		err = xgetsockname(c, sockaddr(&addr), &len);
-		if (err)
-			return err;
-
-		err = xconnect(s, sockaddr(&addr), len);
-		if (err)
-			return err;
-
-		*p0 = take_fd(s);
-		break;
-	case SOCK_STREAM:
-	case SOCK_SEQPACKET:
-		p = xaccept_nonblock(s, NULL, NULL);
-		if (p < 0)
-			return p;
-
-		*p0 = take_fd(p);
-		break;
-	default:
-		FAIL("Unsupported socket type %#x", sotype);
-		return -EOPNOTSUPP;
-	}
-
-	*p1 = take_fd(c);
-	return 0;
-}
-
-static inline int create_socket_pairs(int family, int sotype, int *c0, int *c1,
-				      int *p0, int *p1)
-{
-	int err;
-
-	err = create_pair(family, sotype, c0, p0);
-	if (err)
-		return err;
-
-	err = create_pair(family, sotype, c1, p1);
-	if (err) {
-		close(*c0);
-		close(*p0);
-	}
-
-	return err;
+	return xbpf_map_update_elem(mapfd, &u32(1), &u64(fd2), BPF_NOEXIST);
 }
 
 #endif // __SOCKMAP_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index 2d0796314862..b87e7f39e15a 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -3,77 +3,62 @@
 /*
  * Tests for sockmap/sockhash holding kTLS sockets.
  */
-
+#include <error.h>
 #include <netinet/tcp.h>
+#include <linux/tls.h>
 #include "test_progs.h"
+#include "sockmap_helpers.h"
+#include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_ktls.skel.h"
 
 #define MAX_TEST_NAME 80
 #define TCP_ULP 31
 
-static int tcp_server(int family)
+static int init_ktls_pairs(int c, int p)
 {
-	int err, s;
-
-	s = socket(family, SOCK_STREAM, 0);
-	if (!ASSERT_GE(s, 0, "socket"))
-		return -1;
-
-	err = listen(s, SOMAXCONN);
-	if (!ASSERT_OK(err, "listen"))
-		return -1;
-
-	return s;
-}
+	int err;
+	struct tls12_crypto_info_aes_gcm_128 crypto_rx;
+	struct tls12_crypto_info_aes_gcm_128 crypto_tx;
 
-static int disconnect(int fd)
-{
-	struct sockaddr unspec = { AF_UNSPEC };
+	err = setsockopt(c, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+	if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
+		goto out;
 
-	return connect(fd, &unspec, sizeof(unspec));
+	err = setsockopt(p, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+	if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
+		goto out;
+
+	memset(&crypto_rx, 0, sizeof(crypto_rx));
+	memset(&crypto_tx, 0, sizeof(crypto_tx));
+	crypto_rx.info.version = TLS_1_2_VERSION;
+	crypto_tx.info.version = TLS_1_2_VERSION;
+	crypto_rx.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+	crypto_tx.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+
+	err = setsockopt(c, SOL_TLS, TLS_TX, &crypto_tx, sizeof(crypto_tx));
+	if (!ASSERT_OK(err, "setsockopt(TLS_TX)"))
+		goto out;
+
+	err = setsockopt(p, SOL_TLS, TLS_RX, &crypto_rx, sizeof(crypto_rx));
+	if (!ASSERT_OK(err, "setsockopt(TLS_RX)"))
+		goto out;
+	return 0;
+out:
+	return -1;
 }
 
-/* Disconnect (unhash) a kTLS socket after removing it from sockmap. */
-static void test_sockmap_ktls_disconnect_after_delete(int family, int map)
+static int create_ktls_pairs(int family, int sotype, int *c, int *p)
 {
-	struct sockaddr_storage addr = {0};
-	socklen_t len = sizeof(addr);
-	int err, cli, srv, zero = 0;
-
-	srv = tcp_server(family);
-	if (srv == -1)
-		return;
-
-	err = getsockname(srv, (struct sockaddr *)&addr, &len);
-	if (!ASSERT_OK(err, "getsockopt"))
-		goto close_srv;
-
-	cli = socket(family, SOCK_STREAM, 0);
-	if (!ASSERT_GE(cli, 0, "socket"))
-		goto close_srv;
+	int err;
 
-	err = connect(cli, (struct sockaddr *)&addr, len);
-	if (!ASSERT_OK(err, "connect"))
-		goto close_cli;
-
-	err = bpf_map_update_elem(map, &zero, &cli, 0);
-	if (!ASSERT_OK(err, "bpf_map_update_elem"))
-		goto close_cli;
-
-	err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
-	if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
-		goto close_cli;
-
-	err = bpf_map_delete_elem(map, &zero);
-	if (!ASSERT_OK(err, "bpf_map_delete_elem"))
-		goto close_cli;
-
-	err = disconnect(cli);
-	ASSERT_OK(err, "disconnect");
+	err = create_pair(family, sotype, c, p);
+	if (!ASSERT_OK(err, "create_pair()"))
+		return -1;
 
-close_cli:
-	close(cli);
-close_srv:
-	close(srv);
+	err = init_ktls_pairs(*c, *p);
+	if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+		return -1;
+	return 0;
 }
 
 static void test_sockmap_ktls_update_fails_when_sock_has_ulp(int family, int map)
@@ -146,6 +131,278 @@ static const char *fmt_test_name(const char *subtest_name, int family,
 	return test_name;
 }
 
+static void test_sockmap_ktls_offload(int family, int sotype)
+{
+	int err;
+	int c = 0, p = 0, sent, recvd;
+	char msg[12] = "hello world\0";
+	char rcv[13];
+
+	err = create_ktls_pairs(family, sotype, &c, &p);
+	if (!ASSERT_OK(err, "create_ktls_pairs()"))
+		goto out;
+
+	sent = send(c, msg, sizeof(msg), 0);
+	if (!ASSERT_OK(err, "send(msg)"))
+		goto out;
+
+	recvd = recv(p, rcv, sizeof(rcv), 0);
+	if (!ASSERT_OK(err, "recv(msg)") ||
+	    !ASSERT_EQ(recvd, sent, "length mismatch"))
+		goto out;
+
+	ASSERT_OK(memcmp(msg, rcv, sizeof(msg)), "data mismatch");
+
+out:
+	if (c)
+		close(c);
+	if (p)
+		close(p);
+}
+
+static void test_sockmap_ktls_tx_cork(int family, int sotype, bool push)
+{
+	int err, off;
+	int i, j;
+	int start_push = 0, push_len = 0;
+	int c = 0, p = 0, one = 1, sent, recvd;
+	int prog_fd, map_fd;
+	char msg[12] = "hello world\0";
+	char rcv[20] = {0};
+	struct test_sockmap_ktls *skel;
+
+	skel = test_sockmap_ktls__open_and_load();
+	if (!ASSERT_TRUE(skel, "open ktls skel"))
+		return;
+
+	err = create_pair(family, sotype, &c, &p);
+	if (!ASSERT_OK(err, "create_pair()"))
+		goto out;
+
+	prog_fd = bpf_program__fd(skel->progs.prog_sk_policy);
+	map_fd = bpf_map__fd(skel->maps.sock_map);
+
+	err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+	if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+		goto out;
+
+	err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+		goto out;
+
+	err = init_ktls_pairs(c, p);
+	if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+		goto out;
+
+	skel->bss->cork_byte = sizeof(msg);
+	if (push) {
+		start_push = 1;
+		push_len = 2;
+	}
+	skel->bss->push_start = start_push;
+	skel->bss->push_end = push_len;
+
+	off = sizeof(msg) / 2;
+	sent = send(c, msg, off, 0);
+	if (!ASSERT_EQ(sent, off, "send(msg)"))
+		goto out;
+
+	recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+	if (!ASSERT_EQ(-1, recvd, "expected no data"))
+		goto out;
+
+	/* send remaining msg */
+	sent = send(c, msg + off, sizeof(msg) - off, 0);
+	if (!ASSERT_EQ(sent, sizeof(msg) - off, "send remaining data"))
+		goto out;
+
+	recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+	if (!ASSERT_OK(err, "recv(msg)") ||
+	    !ASSERT_EQ(recvd, sizeof(msg) + push_len, "check length mismatch"))
+		goto out;
+
+	for (i = 0, j = 0; i < recvd;) {
+		/* skip checking the data that has been pushed in */
+		if (i >= start_push && i <= start_push + push_len - 1) {
+			i++;
+			continue;
+		}
+		if (!ASSERT_EQ(rcv[i], msg[j], "data mismatch"))
+			goto out;
+		i++;
+		j++;
+	}
+out:
+	if (c)
+		close(c);
+	if (p)
+		close(p);
+	test_sockmap_ktls__destroy(skel);
+}
+
+static void test_sockmap_ktls_tx_no_buf(int family, int sotype, bool push)
+{
+	int c = -1, p = -1, one = 1, two = 2;
+	struct test_sockmap_ktls *skel;
+	unsigned char *data = NULL;
+	struct msghdr msg = {0};
+	struct iovec iov[2];
+	int prog_fd, map_fd;
+	int txrx_buf = 1024;
+	int iov_length = 8192;
+	int err;
+
+	skel = test_sockmap_ktls__open_and_load();
+	if (!ASSERT_TRUE(skel, "open ktls skel"))
+		return;
+
+	err = create_pair(family, sotype, &c, &p);
+	if (!ASSERT_OK(err, "create_pair()"))
+		goto out;
+
+	err = setsockopt(c, SOL_SOCKET, SO_RCVBUFFORCE, &txrx_buf, sizeof(int));
+	err |= setsockopt(p, SOL_SOCKET, SO_SNDBUFFORCE, &txrx_buf, sizeof(int));
+	if (!ASSERT_OK(err, "set buf limit"))
+		goto out;
+
+	prog_fd = bpf_program__fd(skel->progs.prog_sk_policy_redir);
+	map_fd = bpf_map__fd(skel->maps.sock_map);
+
+	err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+	if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+		goto out;
+
+	err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+		goto out;
+
+	err = bpf_map_update_elem(map_fd, &two, &p, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
+		goto out;
+
+	skel->bss->apply_bytes = 1024;
+
+	err = init_ktls_pairs(c, p);
+	if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+		goto out;
+
+	data = calloc(iov_length, sizeof(char));
+	if (!data)
+		goto out;
+
+	iov[0].iov_base = data;
+	iov[0].iov_len = iov_length;
+	iov[1].iov_base = data;
+	iov[1].iov_len = iov_length;
+	msg.msg_iov = iov;
+	msg.msg_iovlen = 2;
+
+	for (;;) {
+		err = sendmsg(c, &msg, MSG_DONTWAIT);
+		if (err <= 0)
+			break;
+	}
+
+out:
+	if (data)
+		free(data);
+	if (c != -1)
+		close(c);
+	if (p != -1)
+		close(p);
+
+	test_sockmap_ktls__destroy(skel);
+}
+
+static void test_sockmap_ktls_tx_pop(int family, int sotype)
+{
+	char msg[37] = "0123456789abcdefghijklmnopqrstuvwxyz\0";
+	int c = 0, p = 0, one = 1, sent, recvd;
+	struct test_sockmap_ktls *skel;
+	int prog_fd, map_fd;
+	char rcv[50] = {0};
+	int err;
+	int i, m, r;
+
+	skel = test_sockmap_ktls__open_and_load();
+	if (!ASSERT_TRUE(skel, "open ktls skel"))
+		return;
+
+	err = create_pair(family, sotype, &c, &p);
+	if (!ASSERT_OK(err, "create_pair()"))
+		goto out;
+
+	prog_fd = bpf_program__fd(skel->progs.prog_sk_policy);
+	map_fd = bpf_map__fd(skel->maps.sock_map);
+
+	err = bpf_prog_attach(prog_fd, map_fd, BPF_SK_MSG_VERDICT, 0);
+	if (!ASSERT_OK(err, "bpf_prog_attach sk msg"))
+		goto out;
+
+	err = bpf_map_update_elem(map_fd, &one, &c, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(c)"))
+		goto out;
+
+	err = init_ktls_pairs(c, p);
+	if (!ASSERT_OK(err, "init_ktls_pairs(c, p)"))
+		goto out;
+
+	struct {
+		int	pop_start;
+		int	pop_len;
+	} pop_policy[] = {
+		/* trim the start */
+		{0, 2},
+		{0, 10},
+		{1, 2},
+		{1, 10},
+		/* trim the end */
+		{35, 2},
+		/* New entries should be added before this line */
+		{-1, -1},
+	};
+
+	i = 0;
+	while (pop_policy[i].pop_start >= 0) {
+		skel->bss->pop_start = pop_policy[i].pop_start;
+		skel->bss->pop_end =  pop_policy[i].pop_len;
+
+		sent = send(c, msg, sizeof(msg), 0);
+		if (!ASSERT_EQ(sent, sizeof(msg), "send(msg)"))
+			goto out;
+
+		recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+		if (!ASSERT_EQ(recvd, sizeof(msg) - pop_policy[i].pop_len, "pop len mismatch"))
+			goto out;
+
+		/* verify the data
+		 * msg: 0123456789a bcdefghij klmnopqrstuvwxyz
+		 *                  |       |
+		 *                  popped data
+		 */
+		for (m = 0, r = 0; m < sizeof(msg);) {
+			/* skip checking the data that has been popped */
+			if (m >= pop_policy[i].pop_start &&
+			    m <= pop_policy[i].pop_start + pop_policy[i].pop_len - 1) {
+				m++;
+				continue;
+			}
+
+			if (!ASSERT_EQ(msg[m], rcv[r], "data mismatch"))
+				goto out;
+			m++;
+			r++;
+		}
+		i++;
+	}
+out:
+	if (c)
+		close(c);
+	if (p)
+		close(p);
+	test_sockmap_ktls__destroy(skel);
+}
+
 static void run_tests(int family, enum bpf_map_type map_type)
 {
 	int map;
@@ -154,18 +411,32 @@ static void run_tests(int family, enum bpf_map_type map_type)
 	if (!ASSERT_GE(map, 0, "bpf_map_create"))
 		return;
 
-	if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type)))
-		test_sockmap_ktls_disconnect_after_delete(family, map);
 	if (test__start_subtest(fmt_test_name("update_fails_when_sock_has_ulp", family, map_type)))
 		test_sockmap_ktls_update_fails_when_sock_has_ulp(family, map);
 
 	close(map);
 }
 
+static void run_ktls_test(int family, int sotype)
+{
+	if (test__start_subtest("tls simple offload"))
+		test_sockmap_ktls_offload(family, sotype);
+	if (test__start_subtest("tls tx cork"))
+		test_sockmap_ktls_tx_cork(family, sotype, false);
+	if (test__start_subtest("tls tx cork with push"))
+		test_sockmap_ktls_tx_cork(family, sotype, true);
+	if (test__start_subtest("tls tx egress with no buf"))
+		test_sockmap_ktls_tx_no_buf(family, sotype, true);
+	if (test__start_subtest("tls tx with pop"))
+		test_sockmap_ktls_tx_pop(family, sotype);
+}
+
 void test_sockmap_ktls(void)
 {
 	run_tests(AF_INET, BPF_MAP_TYPE_SOCKMAP);
 	run_tests(AF_INET, BPF_MAP_TYPE_SOCKHASH);
 	run_tests(AF_INET6, BPF_MAP_TYPE_SOCKMAP);
 	run_tests(AF_INET6, BPF_MAP_TYPE_SOCKHASH);
+	run_ktls_test(AF_INET, SOCK_STREAM);
+	run_ktls_test(AF_INET6, SOCK_STREAM);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index 4ee1148d22be..f1bdccc7e4e7 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -924,6 +924,8 @@ static void redir_partial(int family, int sotype, int sock_map, int parser_map)
 		goto close;
 
 	n = xsend(c1, buf, sizeof(buf), 0);
+	if (n == -1)
+		goto close;
 	if (n < sizeof(buf))
 		FAIL("incomplete write");
 
@@ -1366,237 +1368,6 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
 	}
 }
 
-static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
-				     int sock_mapfd, int nop_mapfd,
-				     int verd_mapfd, enum redir_mode mode,
-				     int send_flags)
-{
-	const char *log_prefix = redir_mode_str(mode);
-	unsigned int pass;
-	int err, n;
-	u32 key;
-	char b;
-
-	zero_verdict_count(verd_mapfd);
-
-	err = add_to_sockmap(sock_mapfd, peer0, peer1);
-	if (err)
-		return;
-
-	if (nop_mapfd >= 0) {
-		err = add_to_sockmap(nop_mapfd, cli0, cli1);
-		if (err)
-			return;
-	}
-
-	/* Last byte is OOB data when send_flags has MSG_OOB bit set */
-	n = xsend(cli1, "ab", 2, send_flags);
-	if (n >= 0 && n < 2)
-		FAIL("%s: incomplete send", log_prefix);
-	if (n < 2)
-		return;
-
-	key = SK_PASS;
-	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
-	if (err)
-		return;
-	if (pass != 1)
-		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
-	n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC);
-	if (n < 0)
-		FAIL_ERRNO("%s: recv_timeout", log_prefix);
-	if (n == 0)
-		FAIL("%s: incomplete recv", log_prefix);
-
-	if (send_flags & MSG_OOB) {
-		/* Check that we can't read OOB while in sockmap */
-		errno = 0;
-		n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT);
-		if (n != -1 || errno != EOPNOTSUPP)
-			FAIL("%s: recv(MSG_OOB): expected EOPNOTSUPP: retval=%d errno=%d",
-			     log_prefix, n, errno);
-
-		/* Remove peer1 from sockmap */
-		xbpf_map_delete_elem(sock_mapfd, &(int){ 1 });
-
-		/* Check that OOB was dropped on redirect */
-		errno = 0;
-		n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT);
-		if (n != -1 || errno != EINVAL)
-			FAIL("%s: recv(MSG_OOB): expected EINVAL: retval=%d errno=%d",
-			     log_prefix, n, errno);
-	}
-}
-
-static void unix_redir_to_connected(int sotype, int sock_mapfd,
-			       int verd_mapfd, enum redir_mode mode)
-{
-	int c0, c1, p0, p1;
-	int sfd[2];
-
-	if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
-		return;
-	c0 = sfd[0], p0 = sfd[1];
-
-	if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
-		goto close0;
-	c1 = sfd[0], p1 = sfd[1];
-
-	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
-				 mode, NO_FLAGS);
-
-	xclose(c1);
-	xclose(p1);
-close0:
-	xclose(c0);
-	xclose(p0);
-}
-
-static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
-					struct bpf_map *inner_map, int sotype)
-{
-	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
-	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
-	int sock_map = bpf_map__fd(inner_map);
-	int err;
-
-	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
-	if (err)
-		return;
-
-	skel->bss->test_ingress = false;
-	unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
-	skel->bss->test_ingress = true;
-	unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
-
-	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
-			    int sotype)
-{
-	const char *family_name, *map_name;
-	char s[MAX_TEST_NAME];
-
-	family_name = family_str(AF_UNIX);
-	map_name = map_type_str(map);
-	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
-	if (!test__start_subtest(s))
-		return;
-	unix_skb_redir_to_connected(skel, map, sotype);
-}
-
-/* Returns two connected loopback vsock sockets */
-static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
-{
-	return create_pair(AF_VSOCK, sotype | SOCK_NONBLOCK, v0, v1);
-}
-
-static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
-					 enum redir_mode mode, int sotype)
-{
-	const char *log_prefix = redir_mode_str(mode);
-	char a = 'a', b = 'b';
-	int u0, u1, v0, v1;
-	int sfd[2];
-	unsigned int pass;
-	int err, n;
-	u32 key;
-
-	zero_verdict_count(verd_mapfd);
-
-	if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd))
-		return;
-
-	u0 = sfd[0];
-	u1 = sfd[1];
-
-	err = vsock_socketpair_connectible(sotype, &v0, &v1);
-	if (err) {
-		FAIL("vsock_socketpair_connectible() failed");
-		goto close_uds;
-	}
-
-	err = add_to_sockmap(sock_mapfd, u0, v0);
-	if (err) {
-		FAIL("add_to_sockmap failed");
-		goto close_vsock;
-	}
-
-	n = write(v1, &a, sizeof(a));
-	if (n < 0)
-		FAIL_ERRNO("%s: write", log_prefix);
-	if (n == 0)
-		FAIL("%s: incomplete write", log_prefix);
-	if (n < 1)
-		goto out;
-
-	n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
-	if (n < 0)
-		FAIL("%s: recv() err, errno=%d", log_prefix, errno);
-	if (n == 0)
-		FAIL("%s: incomplete recv", log_prefix);
-	if (b != a)
-		FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b);
-
-	key = SK_PASS;
-	err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
-	if (err)
-		goto out;
-	if (pass != 1)
-		FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-out:
-	key = 0;
-	bpf_map_delete_elem(sock_mapfd, &key);
-	key = 1;
-	bpf_map_delete_elem(sock_mapfd, &key);
-
-close_vsock:
-	close(v0);
-	close(v1);
-
-close_uds:
-	close(u0);
-	close(u1);
-}
-
-static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel,
-					     struct bpf_map *inner_map,
-					     int sotype)
-{
-	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
-	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
-	int sock_map = bpf_map__fd(inner_map);
-	int err;
-
-	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
-	if (err)
-		return;
-
-	skel->bss->test_ingress = false;
-	vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype);
-	skel->bss->test_ingress = true;
-	vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype);
-
-	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map)
-{
-	const char *family_name, *map_name;
-	char s[MAX_TEST_NAME];
-
-	family_name = family_str(AF_VSOCK);
-	map_name = map_type_str(map);
-	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
-	if (!test__start_subtest(s))
-		return;
-
-	vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM);
-	vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET);
-}
-
 static void test_reuseport(struct test_sockmap_listen *skel,
 			   struct bpf_map *map, int family, int sotype)
 {
@@ -1637,224 +1408,6 @@ static void test_reuseport(struct test_sockmap_listen *skel,
 	}
 }
 
-static int inet_socketpair(int family, int type, int *s, int *c)
-{
-	return create_pair(family, type | SOCK_NONBLOCK, s, c);
-}
-
-static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
-				   enum redir_mode mode)
-{
-	int c0, c1, p0, p1;
-	int err;
-
-	err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
-	if (err)
-		return;
-	err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
-	if (err)
-		goto close_cli0;
-
-	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
-				 mode, NO_FLAGS);
-
-	xclose(c1);
-	xclose(p1);
-close_cli0:
-	xclose(c0);
-	xclose(p0);
-}
-
-static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
-				       struct bpf_map *inner_map, int family)
-{
-	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
-	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
-	int sock_map = bpf_map__fd(inner_map);
-	int err;
-
-	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
-	if (err)
-		return;
-
-	skel->bss->test_ingress = false;
-	udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
-	skel->bss->test_ingress = true;
-	udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
-
-	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
-			   int family)
-{
-	const char *family_name, *map_name;
-	char s[MAX_TEST_NAME];
-
-	family_name = family_str(family);
-	map_name = map_type_str(map);
-	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
-	if (!test__start_subtest(s))
-		return;
-	udp_skb_redir_to_connected(skel, map, family);
-}
-
-static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
-					int verd_mapfd, enum redir_mode mode)
-{
-	int c0, c1, p0, p1;
-	int sfd[2];
-	int err;
-
-	if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
-		return;
-	c0 = sfd[0], p0 = sfd[1];
-
-	err = inet_socketpair(family, type, &p1, &c1);
-	if (err)
-		goto close;
-
-	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
-				 mode, NO_FLAGS);
-
-	xclose(c1);
-	xclose(p1);
-close:
-	xclose(c0);
-	xclose(p0);
-}
-
-static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
-					    struct bpf_map *inner_map, int family)
-{
-	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
-	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
-	int sock_map = bpf_map__fd(inner_map);
-	int err;
-
-	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
-	if (err)
-		return;
-
-	skel->bss->test_ingress = false;
-	inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
-				    REDIR_EGRESS);
-	inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
-				    REDIR_EGRESS);
-	skel->bss->test_ingress = true;
-	inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
-				    REDIR_INGRESS);
-	inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
-				    REDIR_INGRESS);
-
-	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
-					 int nop_mapfd, int verd_mapfd,
-					 enum redir_mode mode, int send_flags)
-{
-	int c0, c1, p0, p1;
-	int sfd[2];
-	int err;
-
-	err = inet_socketpair(family, type, &p0, &c0);
-	if (err)
-		return;
-
-	if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
-		goto close_cli0;
-	c1 = sfd[0], p1 = sfd[1];
-
-	pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, nop_mapfd,
-				 verd_mapfd, mode, send_flags);
-
-	xclose(c1);
-	xclose(p1);
-close_cli0:
-	xclose(c0);
-	xclose(p0);
-}
-
-static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
-					    struct bpf_map *inner_map, int family)
-{
-	int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
-	int nop_map = bpf_map__fd(skel->maps.nop_map);
-	int verdict_map = bpf_map__fd(skel->maps.verdict_map);
-	int sock_map = bpf_map__fd(inner_map);
-	int err;
-
-	err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
-	if (err)
-		return;
-
-	skel->bss->test_ingress = false;
-	unix_inet_redir_to_connected(family, SOCK_DGRAM,
-				     sock_map, -1, verdict_map,
-				     REDIR_EGRESS, NO_FLAGS);
-	unix_inet_redir_to_connected(family, SOCK_STREAM,
-				     sock_map, -1, verdict_map,
-				     REDIR_EGRESS, NO_FLAGS);
-
-	unix_inet_redir_to_connected(family, SOCK_DGRAM,
-				     sock_map, nop_map, verdict_map,
-				     REDIR_EGRESS, NO_FLAGS);
-	unix_inet_redir_to_connected(family, SOCK_STREAM,
-				     sock_map, nop_map, verdict_map,
-				     REDIR_EGRESS, NO_FLAGS);
-
-	/* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */
-	unix_inet_redir_to_connected(family, SOCK_STREAM,
-				     sock_map, nop_map, verdict_map,
-				     REDIR_EGRESS, MSG_OOB);
-
-	skel->bss->test_ingress = true;
-	unix_inet_redir_to_connected(family, SOCK_DGRAM,
-				     sock_map, -1, verdict_map,
-				     REDIR_INGRESS, NO_FLAGS);
-	unix_inet_redir_to_connected(family, SOCK_STREAM,
-				     sock_map, -1, verdict_map,
-				     REDIR_INGRESS, NO_FLAGS);
-
-	unix_inet_redir_to_connected(family, SOCK_DGRAM,
-				     sock_map, nop_map, verdict_map,
-				     REDIR_INGRESS, NO_FLAGS);
-	unix_inet_redir_to_connected(family, SOCK_STREAM,
-				     sock_map, nop_map, verdict_map,
-				     REDIR_INGRESS, NO_FLAGS);
-
-	/* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */
-	unix_inet_redir_to_connected(family, SOCK_STREAM,
-				     sock_map, nop_map, verdict_map,
-				     REDIR_INGRESS, MSG_OOB);
-
-	xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
-}
-
-static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
-				int family)
-{
-	const char *family_name, *map_name;
-	struct netns_obj *netns;
-	char s[MAX_TEST_NAME];
-
-	family_name = family_str(family);
-	map_name = map_type_str(map);
-	snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
-	if (!test__start_subtest(s))
-		return;
-
-	netns = netns_new("sockmap_listen", true);
-	if (!ASSERT_OK_PTR(netns, "netns_new"))
-		return;
-
-	inet_unix_skb_redir_to_connected(skel, map, family);
-	unix_inet_skb_redir_to_connected(skel, map, family);
-
-	netns_free(netns);
-}
-
 static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
 		      int family)
 {
@@ -1863,8 +1416,6 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
 	test_redir(skel, map, family, SOCK_STREAM);
 	test_reuseport(skel, map, family, SOCK_STREAM);
 	test_reuseport(skel, map, family, SOCK_DGRAM);
-	test_udp_redir(skel, map, family);
-	test_udp_unix_redir(skel, map, family);
 }
 
 void serial_test_sockmap_listen(void)
@@ -1880,16 +1431,10 @@ void serial_test_sockmap_listen(void)
 	skel->bss->test_sockmap = true;
 	run_tests(skel, skel->maps.sock_map, AF_INET);
 	run_tests(skel, skel->maps.sock_map, AF_INET6);
-	test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
-	test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
-	test_vsock_redir(skel, skel->maps.sock_map);
 
 	skel->bss->test_sockmap = false;
 	run_tests(skel, skel->maps.sock_hash, AF_INET);
 	run_tests(skel, skel->maps.sock_hash, AF_INET6);
-	test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
-	test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
-	test_vsock_redir(skel, skel->maps.sock_hash);
 
 	test_sockmap_listen__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
new file mode 100644
index 000000000000..9c461d93113d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_redir.c
@@ -0,0 +1,465 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for sockmap/sockhash redirection.
+ *
+ * BPF_MAP_TYPE_SOCKMAP
+ * BPF_MAP_TYPE_SOCKHASH
+ *	x
+ * sk_msg-to-egress
+ * sk_msg-to-ingress
+ * sk_skb-to-egress
+ * sk_skb-to-ingress
+ *	x
+ * AF_INET, SOCK_STREAM
+ * AF_INET6, SOCK_STREAM
+ * AF_INET, SOCK_DGRAM
+ * AF_INET6, SOCK_DGRAM
+ * AF_UNIX, SOCK_STREAM
+ * AF_UNIX, SOCK_DGRAM
+ * AF_VSOCK, SOCK_STREAM
+ * AF_VSOCK, SOCK_SEQPACKET
+ */
+
+#include <errno.h>
+#include <error.h>
+#include <sched.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <linux/string.h>
+#include <linux/vm_sockets.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "linux/const.h"
+#include "test_progs.h"
+#include "sockmap_helpers.h"
+#include "test_sockmap_redir.skel.h"
+
+/* The meaning of SUPPORTED is "will redirect packet as expected".
+ */
+#define SUPPORTED		_BITUL(0)
+
+/* Note on sk_skb-to-ingress ->af_vsock:
+ *
+ * Peer socket may receive the packet some time after the return from sendmsg().
+ * In a typical usage scenario, recvmsg() will block until the redirected packet
+ * appears in the destination queue, or timeout if the packet was dropped. By
+ * that point, the verdict map has already been updated to reflect what has
+ * happened.
+ *
+ * But sk_skb-to-ingress/af_vsock is an unsupported combination, so no recvmsg()
+ * takes place. Which means we may race the execution of the verdict logic and
+ * read map_verd before it has been updated, i.e. we might observe
+ * map_verd[SK_DROP]=0 instead of map_verd[SK_DROP]=1.
+ *
+ * This confuses the selftest logic: if there was no packet dropped, where's the
+ * packet? So here's a heuristic: on map_verd[SK_DROP]=map_verd[SK_PASS]=0
+ * (which implies the verdict program has not been ran) just re-read the verdict
+ * map again.
+ */
+#define UNSUPPORTED_RACY_VERD	_BITUL(1)
+
+enum prog_type {
+	SK_MSG_EGRESS,
+	SK_MSG_INGRESS,
+	SK_SKB_EGRESS,
+	SK_SKB_INGRESS,
+};
+
+enum {
+	SEND_INNER = 0,
+	SEND_OUTER,
+};
+
+enum {
+	RECV_INNER = 0,
+	RECV_OUTER,
+};
+
+struct maps {
+	int in;
+	int out;
+	int verd;
+};
+
+struct combo_spec {
+	enum prog_type prog_type;
+	const char *in, *out;
+};
+
+struct redir_spec {
+	const char *name;
+	int idx_send;
+	int idx_recv;
+	enum prog_type prog_type;
+};
+
+struct socket_spec {
+	int family;
+	int sotype;
+	int send_flags;
+	int in[2];
+	int out[2];
+};
+
+static int socket_spec_pairs(struct socket_spec *s)
+{
+	return create_socket_pairs(s->family, s->sotype,
+				   &s->in[0], &s->out[0],
+				   &s->in[1], &s->out[1]);
+}
+
+static void socket_spec_close(struct socket_spec *s)
+{
+	xclose(s->in[0]);
+	xclose(s->in[1]);
+	xclose(s->out[0]);
+	xclose(s->out[1]);
+}
+
+static void get_redir_params(struct redir_spec *redir,
+			     struct test_sockmap_redir *skel, int *prog_fd,
+			     enum bpf_attach_type *attach_type,
+			     int *redirect_flags)
+{
+	enum prog_type type = redir->prog_type;
+	struct bpf_program *prog;
+	bool sk_msg;
+
+	sk_msg = type == SK_MSG_INGRESS || type == SK_MSG_EGRESS;
+	prog = sk_msg ? skel->progs.prog_msg_verdict : skel->progs.prog_skb_verdict;
+
+	*prog_fd = bpf_program__fd(prog);
+	*attach_type = sk_msg ? BPF_SK_MSG_VERDICT : BPF_SK_SKB_VERDICT;
+
+	if (type == SK_MSG_INGRESS || type == SK_SKB_INGRESS)
+		*redirect_flags = BPF_F_INGRESS;
+	else
+		*redirect_flags = 0;
+}
+
+static void try_recv(const char *prefix, int fd, int flags, bool expect_success)
+{
+	ssize_t n;
+	char buf;
+
+	errno = 0;
+	n = recv(fd, &buf, 1, flags);
+	if (n < 0 && expect_success)
+		FAIL_ERRNO("%s: unexpected failure: retval=%zd", prefix, n);
+	if (!n && !expect_success)
+		FAIL("%s: expected failure: retval=%zd", prefix, n);
+}
+
+static void handle_unsupported(int sd_send, int sd_peer, int sd_in, int sd_out,
+			       int sd_recv, int map_verd, int status)
+{
+	unsigned int drop, pass;
+	char recv_buf;
+	ssize_t n;
+
+get_verdict:
+	if (xbpf_map_lookup_elem(map_verd, &u32(SK_DROP), &drop) ||
+	    xbpf_map_lookup_elem(map_verd, &u32(SK_PASS), &pass))
+		return;
+
+	if (pass == 0 && drop == 0 && (status & UNSUPPORTED_RACY_VERD)) {
+		sched_yield();
+		goto get_verdict;
+	}
+
+	if (pass != 0) {
+		FAIL("unsupported: wanted verdict pass 0, have %u", pass);
+		return;
+	}
+
+	/* If nothing was dropped, packet should have reached the peer */
+	if (drop == 0) {
+		errno = 0;
+		n = recv_timeout(sd_peer, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
+		if (n != 1)
+			FAIL_ERRNO("unsupported: packet missing, retval=%zd", n);
+	}
+
+	/* Ensure queues are empty */
+	try_recv("bpf.recv(sd_send)", sd_send, MSG_DONTWAIT, false);
+	if (sd_in != sd_send)
+		try_recv("bpf.recv(sd_in)", sd_in, MSG_DONTWAIT, false);
+
+	try_recv("bpf.recv(sd_out)", sd_out, MSG_DONTWAIT, false);
+	if (sd_recv != sd_out)
+		try_recv("bpf.recv(sd_recv)", sd_recv, MSG_DONTWAIT, false);
+}
+
+static void test_send_redir_recv(int sd_send, int send_flags, int sd_peer,
+				 int sd_in, int sd_out, int sd_recv,
+				 struct maps *maps, int status)
+{
+	unsigned int drop, pass;
+	char *send_buf = "ab";
+	char recv_buf = '\0';
+	ssize_t n, len = 1;
+
+	/* Zero out the verdict map */
+	if (xbpf_map_update_elem(maps->verd, &u32(SK_DROP), &u32(0), BPF_ANY) ||
+	    xbpf_map_update_elem(maps->verd, &u32(SK_PASS), &u32(0), BPF_ANY))
+		return;
+
+	if (xbpf_map_update_elem(maps->in, &u32(0), &u64(sd_in), BPF_NOEXIST))
+		return;
+
+	if (xbpf_map_update_elem(maps->out, &u32(0), &u64(sd_out), BPF_NOEXIST))
+		goto del_in;
+
+	/* Last byte is OOB data when send_flags has MSG_OOB bit set */
+	if (send_flags & MSG_OOB)
+		len++;
+	n = send(sd_send, send_buf, len, send_flags);
+	if (n >= 0 && n < len)
+		FAIL("incomplete send");
+	if (n < 0) {
+		/* sk_msg redirect combo not supported? */
+		if (status & SUPPORTED || errno != EACCES)
+			FAIL_ERRNO("send");
+		goto out;
+	}
+
+	if (!(status & SUPPORTED)) {
+		handle_unsupported(sd_send, sd_peer, sd_in, sd_out, sd_recv,
+				   maps->verd, status);
+		goto out;
+	}
+
+	errno = 0;
+	n = recv_timeout(sd_recv, &recv_buf, 1, 0, IO_TIMEOUT_SEC);
+	if (n != 1) {
+		FAIL_ERRNO("recv_timeout()");
+		goto out;
+	}
+
+	/* Check verdict _after_ recv(); af_vsock may need time to catch up */
+	if (xbpf_map_lookup_elem(maps->verd, &u32(SK_DROP), &drop) ||
+	    xbpf_map_lookup_elem(maps->verd, &u32(SK_PASS), &pass))
+		goto out;
+
+	if (drop != 0 || pass != 1)
+		FAIL("unexpected verdict drop/pass: wanted 0/1, have %u/%u",
+		     drop, pass);
+
+	if (recv_buf != send_buf[0])
+		FAIL("recv(): payload check, %02x != %02x", recv_buf, send_buf[0]);
+
+	if (send_flags & MSG_OOB) {
+		/* Fail reading OOB while in sockmap */
+		try_recv("bpf.recv(sd_out, MSG_OOB)", sd_out,
+			 MSG_OOB | MSG_DONTWAIT, false);
+
+		/* Remove sd_out from sockmap */
+		xbpf_map_delete_elem(maps->out, &u32(0));
+
+		/* Check that OOB was dropped on redirect */
+		try_recv("recv(sd_out, MSG_OOB)", sd_out,
+			 MSG_OOB | MSG_DONTWAIT, false);
+
+		goto del_in;
+	}
+out:
+	xbpf_map_delete_elem(maps->out, &u32(0));
+del_in:
+	xbpf_map_delete_elem(maps->in, &u32(0));
+}
+
+static int is_redir_supported(enum prog_type type, const char *in,
+			      const char *out)
+{
+	/* Matching based on strings returned by socket_kind_to_str():
+	 * tcp4, udp4, tcp6, udp6, u_str, u_dgr, v_str, v_seq
+	 * Plus a wildcard: any
+	 * Not in use: u_seq, v_dgr
+	 */
+	struct combo_spec *c, combos[] = {
+		/* Send to local: TCP -> any, but vsock */
+		{ SK_MSG_INGRESS,	"tcp",	"tcp"	},
+		{ SK_MSG_INGRESS,	"tcp",	"udp"	},
+		{ SK_MSG_INGRESS,	"tcp",	"u_str"	},
+		{ SK_MSG_INGRESS,	"tcp",	"u_dgr"	},
+
+		/* Send to egress: TCP -> TCP */
+		{ SK_MSG_EGRESS,	"tcp",	"tcp"	},
+
+		/* Ingress to egress: any -> any */
+		{ SK_SKB_EGRESS,	"any",	"any"	},
+
+		/* Ingress to local: any -> any, but vsock */
+		{ SK_SKB_INGRESS,	"any",	"tcp"	},
+		{ SK_SKB_INGRESS,	"any",	"udp"	},
+		{ SK_SKB_INGRESS,	"any",	"u_str"	},
+		{ SK_SKB_INGRESS,	"any",	"u_dgr"	},
+	};
+
+	for (c = combos; c < combos + ARRAY_SIZE(combos); c++) {
+		if (c->prog_type == type &&
+		    (!strcmp(c->in, "any") || strstarts(in, c->in)) &&
+		    (!strcmp(c->out, "any") || strstarts(out, c->out)))
+			return SUPPORTED;
+	}
+
+	return 0;
+}
+
+static int get_support_status(enum prog_type type, const char *in,
+			      const char *out)
+{
+	int status = is_redir_supported(type, in, out);
+
+	if (type == SK_SKB_INGRESS && strstarts(out, "v_"))
+		status |= UNSUPPORTED_RACY_VERD;
+
+	return status;
+}
+
+static void test_socket(enum bpf_map_type type, struct redir_spec *redir,
+			struct maps *maps, struct socket_spec *s_in,
+			struct socket_spec *s_out)
+{
+	int fd_in, fd_out, fd_send, fd_peer, fd_recv, flags, status;
+	const char *in_str, *out_str;
+	char s[MAX_TEST_NAME];
+
+	fd_in = s_in->in[0];
+	fd_out = s_out->out[0];
+	fd_send = s_in->in[redir->idx_send];
+	fd_peer = s_in->in[redir->idx_send ^ 1];
+	fd_recv = s_out->out[redir->idx_recv];
+	flags = s_in->send_flags;
+
+	in_str = socket_kind_to_str(fd_in);
+	out_str = socket_kind_to_str(fd_out);
+	status = get_support_status(redir->prog_type, in_str, out_str);
+
+	snprintf(s, sizeof(s),
+		 "%-4s %-17s %-5s %s %-5s%6s",
+		 /* hash sk_skb-to-ingress u_str → v_str (OOB) */
+		 type == BPF_MAP_TYPE_SOCKMAP ? "map" : "hash",
+		 redir->name,
+		 in_str,
+		 status & SUPPORTED ? "→" : " ",
+		 out_str,
+		 (flags & MSG_OOB) ? "(OOB)" : "");
+
+	if (!test__start_subtest(s))
+		return;
+
+	test_send_redir_recv(fd_send, flags, fd_peer, fd_in, fd_out, fd_recv,
+			     maps, status);
+}
+
+static void test_redir(enum bpf_map_type type, struct redir_spec *redir,
+		       struct maps *maps)
+{
+	struct socket_spec *s, sockets[] = {
+		{ AF_INET, SOCK_STREAM },
+		// { AF_INET, SOCK_STREAM, MSG_OOB }, /* Known to be broken */
+		{ AF_INET6, SOCK_STREAM },
+		{ AF_INET, SOCK_DGRAM },
+		{ AF_INET6, SOCK_DGRAM },
+		{ AF_UNIX, SOCK_STREAM },
+		{ AF_UNIX, SOCK_STREAM, MSG_OOB },
+		{ AF_UNIX, SOCK_DGRAM },
+		// { AF_UNIX, SOCK_SEQPACKET},	/* Unsupported BPF_MAP_UPDATE_ELEM */
+		{ AF_VSOCK, SOCK_STREAM },
+		// { AF_VSOCK, SOCK_DGRAM },	/* Unsupported socket() */
+		{ AF_VSOCK, SOCK_SEQPACKET },
+	};
+
+	for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
+		if (socket_spec_pairs(s))
+			goto out;
+
+	/* Intra-proto */
+	for (s = sockets; s < sockets + ARRAY_SIZE(sockets); s++)
+		test_socket(type, redir, maps, s, s);
+
+	/* Cross-proto */
+	for (int i = 0; i < ARRAY_SIZE(sockets); i++) {
+		for (int j = 0; j < ARRAY_SIZE(sockets); j++) {
+			struct socket_spec *out = &sockets[j];
+			struct socket_spec *in = &sockets[i];
+
+			/* Skip intra-proto and between variants */
+			if (out->send_flags ||
+			    (in->family == out->family &&
+			     in->sotype == out->sotype))
+				continue;
+
+			test_socket(type, redir, maps, in, out);
+		}
+	}
+out:
+	while (--s >= sockets)
+		socket_spec_close(s);
+}
+
+static void test_map(enum bpf_map_type type)
+{
+	struct redir_spec *r, redirs[] = {
+		{ "sk_msg-to-ingress", SEND_INNER, RECV_INNER, SK_MSG_INGRESS },
+		{ "sk_msg-to-egress", SEND_INNER, RECV_OUTER, SK_MSG_EGRESS },
+		{ "sk_skb-to-egress", SEND_OUTER, RECV_OUTER, SK_SKB_EGRESS },
+		{ "sk_skb-to-ingress", SEND_OUTER, RECV_INNER, SK_SKB_INGRESS },
+	};
+
+	for (r = redirs; r < redirs + ARRAY_SIZE(redirs); r++) {
+		enum bpf_attach_type attach_type;
+		struct test_sockmap_redir *skel;
+		struct maps maps;
+		int prog_fd;
+
+		skel = test_sockmap_redir__open_and_load();
+		if (!skel) {
+			FAIL("open_and_load");
+			return;
+		}
+
+		switch (type) {
+		case BPF_MAP_TYPE_SOCKMAP:
+			maps.in = bpf_map__fd(skel->maps.nop_map);
+			maps.out = bpf_map__fd(skel->maps.sock_map);
+			break;
+		case BPF_MAP_TYPE_SOCKHASH:
+			maps.in = bpf_map__fd(skel->maps.nop_hash);
+			maps.out = bpf_map__fd(skel->maps.sock_hash);
+			break;
+		default:
+			FAIL("Unsupported bpf_map_type");
+			return;
+		}
+
+		skel->bss->redirect_type = type;
+		maps.verd = bpf_map__fd(skel->maps.verdict_map);
+		get_redir_params(r, skel, &prog_fd, &attach_type,
+				 &skel->bss->redirect_flags);
+
+		if (xbpf_prog_attach(prog_fd, maps.in, attach_type, 0))
+			return;
+
+		test_redir(type, r, &maps);
+
+		if (xbpf_prog_detach2(prog_fd, maps.in, attach_type))
+			return;
+
+		test_sockmap_redir__destroy(skel);
+	}
+}
+
+void serial_test_sockmap_redir(void)
+{
+	test_map(BPF_MAP_TYPE_SOCKMAP);
+	test_map(BPF_MAP_TYPE_SOCKHASH);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
new file mode 100644
index 000000000000..621b3b71888e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_strp.c
@@ -0,0 +1,454 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <netinet/tcp.h>
+#include <test_progs.h>
+#include "sockmap_helpers.h"
+#include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_strp.skel.h"
+
+#define STRP_PKT_HEAD_LEN 4
+#define STRP_PKT_BODY_LEN 6
+#define STRP_PKT_FULL_LEN (STRP_PKT_HEAD_LEN + STRP_PKT_BODY_LEN)
+
+static const char packet[STRP_PKT_FULL_LEN] = "head+body\0";
+static const int test_packet_num = 100;
+
+/* Current implementation of tcp_bpf_recvmsg_parser() invokes data_ready
+ * with sk held if an skb exists in sk_receive_queue. Then for the
+ * data_ready implementation of strparser, it will delay the read
+ * operation if sk is held and EAGAIN is returned.
+ */
+static int sockmap_strp_consume_pre_data(int p)
+{
+	int recvd;
+	bool retried = false;
+	char rcv[10];
+
+retry:
+	errno = 0;
+	recvd = recv_timeout(p, rcv, sizeof(rcv), 0, 1);
+	if (recvd < 0 && errno == EAGAIN && retried == false) {
+		/* On the first call, EAGAIN will certainly be returned.
+		 * A 1-second wait is enough for the workqueue to finish.
+		 */
+		sleep(1);
+		retried = true;
+		goto retry;
+	}
+
+	if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv error or truncated data") ||
+	    !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+				"data mismatch"))
+		return -1;
+	return 0;
+}
+
+static struct test_sockmap_strp *sockmap_strp_init(int *out_map, bool pass,
+						   bool need_parser)
+{
+	struct test_sockmap_strp *strp = NULL;
+	int verdict, parser;
+	int err;
+
+	strp = test_sockmap_strp__open_and_load();
+	*out_map = bpf_map__fd(strp->maps.sock_map);
+
+	if (need_parser)
+		parser = bpf_program__fd(strp->progs.prog_skb_parser_partial);
+	else
+		parser = bpf_program__fd(strp->progs.prog_skb_parser);
+
+	if (pass)
+		verdict = bpf_program__fd(strp->progs.prog_skb_verdict_pass);
+	else
+		verdict = bpf_program__fd(strp->progs.prog_skb_verdict);
+
+	err = bpf_prog_attach(parser, *out_map, BPF_SK_SKB_STREAM_PARSER, 0);
+	if (!ASSERT_OK(err, "bpf_prog_attach stream parser"))
+		goto err;
+
+	err = bpf_prog_attach(verdict, *out_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+	if (!ASSERT_OK(err, "bpf_prog_attach stream verdict"))
+		goto err;
+
+	return strp;
+err:
+	test_sockmap_strp__destroy(strp);
+	return NULL;
+}
+
+/* Dispatch packets to different socket by packet size:
+ *
+ *                      ------  ------
+ *                     | pkt4 || pkt1 |... > remote socket
+ *  ------ ------     / ------  ------
+ * | pkt8 | pkt7 |...
+ *  ------ ------     \ ------  ------
+ *                     | pkt3 || pkt2 |... > local socket
+ *                      ------  ------
+ */
+static void test_sockmap_strp_dispatch_pkt(int family, int sotype)
+{
+	int i, j, zero = 0, one = 1, recvd;
+	int err, map;
+	int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+	struct test_sockmap_strp *strp = NULL;
+	int test_cnt = 6;
+	char rcv[10];
+	struct {
+		char	data[7];
+		int	data_len;
+		int	send_cnt;
+		int	*receiver;
+	} send_dir[2] = {
+		/* data expected to deliver to local */
+		{"llllll", 6, 0, &p0},
+		/* data expected to deliver to remote */
+		{"rrrrr",  5, 0, &c1}
+	};
+
+	strp = sockmap_strp_init(&map, false, false);
+	if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+		return;
+
+	err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1);
+	if (!ASSERT_OK(err, "create_socket_pairs()"))
+		goto out;
+
+	err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
+		goto out_close;
+
+	err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
+		goto out_close;
+
+	err = setsockopt(c1, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero));
+	if (!ASSERT_OK(err, "setsockopt(TCP_NODELAY)"))
+		goto out_close;
+
+	/* deliver data with data size greater than 5 to local */
+	strp->data->verdict_max_size = 5;
+
+	for (i = 0; i < test_cnt; i++) {
+		int d = i % 2;
+
+		xsend(c0, send_dir[d].data, send_dir[d].data_len, 0);
+		send_dir[d].send_cnt++;
+	}
+
+	for (i = 0; i < 2; i++) {
+		for (j = 0; j < send_dir[i].send_cnt; j++) {
+			int expected = send_dir[i].data_len;
+
+			recvd = recv_timeout(*send_dir[i].receiver, rcv,
+					     expected, MSG_DONTWAIT,
+					     IO_TIMEOUT_SEC);
+			if (!ASSERT_EQ(recvd, expected, "recv_timeout()"))
+				goto out_close;
+			if (!ASSERT_OK(memcmp(send_dir[i].data, rcv, recvd),
+				       "data mismatch"))
+				goto out_close;
+		}
+	}
+out_close:
+	close(c0);
+	close(c1);
+	close(p0);
+	close(p1);
+out:
+	test_sockmap_strp__destroy(strp);
+}
+
+/* We have multiple packets in one skb
+ * ------------ ------------ ------------
+ * |  packet1  |   packet2  |  ...
+ * ------------ ------------ ------------
+ */
+static void test_sockmap_strp_multiple_pkt(int family, int sotype)
+{
+	int i, zero = 0;
+	int sent, recvd, total;
+	int err, map;
+	int c = -1, p = -1;
+	struct test_sockmap_strp *strp = NULL;
+	char *snd = NULL, *rcv = NULL;
+
+	strp = sockmap_strp_init(&map, true, true);
+	if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+		return;
+
+	err = create_pair(family, sotype, &c, &p);
+	if (err)
+		goto out;
+
+	err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)"))
+		goto out_close;
+
+	/* construct multiple packets in one buffer */
+	total = test_packet_num * STRP_PKT_FULL_LEN;
+	snd = malloc(total);
+	rcv = malloc(total + 1);
+	if (!ASSERT_TRUE(snd, "malloc(snd)") ||
+	    !ASSERT_TRUE(rcv, "malloc(rcv)"))
+		goto out_close;
+
+	for (i = 0; i < test_packet_num; i++) {
+		memcpy(snd + i * STRP_PKT_FULL_LEN,
+		       packet, STRP_PKT_FULL_LEN);
+	}
+
+	sent = xsend(c, snd, total, 0);
+	if (!ASSERT_EQ(sent, total, "xsend(c)"))
+		goto out_close;
+
+	/* try to recv one more byte to avoid truncation check */
+	recvd = recv_timeout(p, rcv, total + 1, MSG_DONTWAIT, IO_TIMEOUT_SEC);
+	if (!ASSERT_EQ(recvd, total, "recv(rcv)"))
+		goto out_close;
+
+	/* we sent TCP segment with multiple encapsulation
+	 * then check whether packets are handled correctly
+	 */
+	if (!ASSERT_OK(memcmp(snd, rcv, total), "data mismatch"))
+		goto out_close;
+
+out_close:
+	close(c);
+	close(p);
+	if (snd)
+		free(snd);
+	if (rcv)
+		free(rcv);
+out:
+	test_sockmap_strp__destroy(strp);
+}
+
+/* Test strparser with partial read */
+static void test_sockmap_strp_partial_read(int family, int sotype)
+{
+	int zero = 0, recvd, off;
+	int err, map;
+	int c = -1, p = -1;
+	struct test_sockmap_strp *strp = NULL;
+	char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+	strp = sockmap_strp_init(&map, true, true);
+	if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+		return;
+
+	err = create_pair(family, sotype, &c, &p);
+	if (err)
+		goto out;
+
+	/* sk_data_ready of 'p' will be replaced by strparser handler */
+	err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(zero, p)"))
+		goto out_close;
+
+	/* 1.1 send partial head, 1 byte header left */
+	off = STRP_PKT_HEAD_LEN - 1;
+	xsend(c, packet, off, 0);
+	recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+	if (!ASSERT_EQ(-1, recvd, "partial head sent, expected no data"))
+		goto out_close;
+
+	/* 1.2 send remaining head and body */
+	xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0);
+	recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC);
+	if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data"))
+		goto out_close;
+
+	/* 2.1 send partial head, 1 byte header left */
+	off = STRP_PKT_HEAD_LEN - 1;
+	xsend(c, packet, off, 0);
+
+	/* 2.2 send remaining head and partial body, 1 byte body left */
+	xsend(c, packet + off, STRP_PKT_FULL_LEN - off - 1, 0);
+	off = STRP_PKT_FULL_LEN - 1;
+	recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, 1);
+	if (!ASSERT_EQ(-1, recvd, "partial body sent, expected no data"))
+		goto out_close;
+
+	/* 2.3 send remaining body */
+	xsend(c, packet + off, STRP_PKT_FULL_LEN - off, 0);
+	recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT, IO_TIMEOUT_SEC);
+	if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "expected full data"))
+		goto out_close;
+
+out_close:
+	close(c);
+	close(p);
+
+out:
+	test_sockmap_strp__destroy(strp);
+}
+
+/* Test simple socket read/write with strparser + FIONREAD */
+static void test_sockmap_strp_pass(int family, int sotype, bool fionread)
+{
+	int zero = 0, pkt_size = STRP_PKT_FULL_LEN, sent, recvd, avail;
+	int err, map;
+	int c = -1, p = -1;
+	int test_cnt = 10, i;
+	struct test_sockmap_strp *strp = NULL;
+	char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+	strp = sockmap_strp_init(&map, true, true);
+	if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+		return;
+
+	err = create_pair(family, sotype, &c, &p);
+	if (err)
+		goto out;
+
+	/* inject some data before bpf process, it should be read
+	 * correctly because we check sk_receive_queue in
+	 * tcp_bpf_recvmsg_parser().
+	 */
+	sent = xsend(c, packet, pkt_size, 0);
+	if (!ASSERT_EQ(sent, pkt_size, "xsend(pre-data)"))
+		goto out_close;
+
+	/* sk_data_ready of 'p' will be replaced by strparser handler */
+	err = bpf_map_update_elem(map, &zero, &p, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(p)"))
+		goto out_close;
+
+	/* consume previous data we injected */
+	if (sockmap_strp_consume_pre_data(p))
+		goto out_close;
+
+	/* Previously, we encountered issues such as deadlocks and
+	 * sequence errors that resulted in the inability to read
+	 * continuously. Therefore, we perform multiple iterations
+	 * of testing here.
+	 */
+	for (i = 0; i < test_cnt; i++) {
+		sent = xsend(c, packet, pkt_size, 0);
+		if (!ASSERT_EQ(sent, pkt_size, "xsend(c)"))
+			goto out_close;
+
+		recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT,
+				     IO_TIMEOUT_SEC);
+		if (!ASSERT_EQ(recvd, pkt_size, "recv_timeout(p)") ||
+		    !ASSERT_OK(memcmp(packet, rcv, pkt_size),
+				  "memcmp, data mismatch"))
+			goto out_close;
+	}
+
+	if (fionread) {
+		sent = xsend(c, packet, pkt_size, 0);
+		if (!ASSERT_EQ(sent, pkt_size, "second xsend(c)"))
+			goto out_close;
+
+		err = ioctl(p, FIONREAD, &avail);
+		if (!ASSERT_OK(err, "ioctl(FIONREAD) error") ||
+		    !ASSERT_EQ(avail, pkt_size, "ioctl(FIONREAD)"))
+			goto out_close;
+
+		recvd = recv_timeout(p, rcv, sizeof(rcv), MSG_DONTWAIT,
+				     IO_TIMEOUT_SEC);
+		if (!ASSERT_EQ(recvd, pkt_size, "second recv_timeout(p)") ||
+		    !ASSERT_OK(memcmp(packet, rcv, pkt_size),
+			      "second memcmp, data mismatch"))
+			goto out_close;
+	}
+
+out_close:
+	close(c);
+	close(p);
+
+out:
+	test_sockmap_strp__destroy(strp);
+}
+
+/* Test strparser with verdict mode */
+static void test_sockmap_strp_verdict(int family, int sotype)
+{
+	int zero = 0, one = 1, sent, recvd, off;
+	int err, map;
+	int c0 = -1, p0 = -1, c1 = -1, p1 = -1;
+	struct test_sockmap_strp *strp = NULL;
+	char rcv[STRP_PKT_FULL_LEN + 1] = "0";
+
+	strp = sockmap_strp_init(&map, false, true);
+	if (!ASSERT_TRUE(strp, "sockmap_strp_init"))
+		return;
+
+	/* We simulate a reverse proxy server.
+	 * When p0 receives data from c0, we forward it to c1.
+	 * From c1's perspective, it will consider this data
+	 * as being sent by p1.
+	 */
+	err = create_socket_pairs(family, sotype, &c0, &c1, &p0, &p1);
+	if (!ASSERT_OK(err, "create_socket_pairs()"))
+		goto out;
+
+	err = bpf_map_update_elem(map, &zero, &p0, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(p0)"))
+		goto out_close;
+
+	err = bpf_map_update_elem(map, &one, &p1, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "bpf_map_update_elem(p1)"))
+		goto out_close;
+
+	sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0);
+	if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "xsend(c0)"))
+		goto out_close;
+
+	recvd = recv_timeout(c1, rcv, sizeof(rcv), MSG_DONTWAIT,
+			     IO_TIMEOUT_SEC);
+	if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "recv_timeout(c1)") ||
+	    !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+			  "received data does not match the sent data"))
+		goto out_close;
+
+	/* send again to ensure the stream is functioning correctly. */
+	sent = xsend(c0, packet, STRP_PKT_FULL_LEN, 0);
+	if (!ASSERT_EQ(sent, STRP_PKT_FULL_LEN, "second xsend(c0)"))
+		goto out_close;
+
+	/* partial read */
+	off = STRP_PKT_FULL_LEN / 2;
+	recvd = recv_timeout(c1, rcv, off, MSG_DONTWAIT,
+			     IO_TIMEOUT_SEC);
+	recvd += recv_timeout(c1, rcv + off, sizeof(rcv) - off, MSG_DONTWAIT,
+			      IO_TIMEOUT_SEC);
+
+	if (!ASSERT_EQ(recvd, STRP_PKT_FULL_LEN, "partial recv_timeout(c1)") ||
+	    !ASSERT_OK(memcmp(packet, rcv, STRP_PKT_FULL_LEN),
+			  "partial received data does not match the sent data"))
+		goto out_close;
+
+out_close:
+	close(c0);
+	close(c1);
+	close(p0);
+	close(p1);
+out:
+	test_sockmap_strp__destroy(strp);
+}
+
+void test_sockmap_strp(void)
+{
+	if (test__start_subtest("sockmap strp tcp pass"))
+		test_sockmap_strp_pass(AF_INET, SOCK_STREAM, false);
+	if (test__start_subtest("sockmap strp tcp v6 pass"))
+		test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, false);
+	if (test__start_subtest("sockmap strp tcp pass fionread"))
+		test_sockmap_strp_pass(AF_INET, SOCK_STREAM, true);
+	if (test__start_subtest("sockmap strp tcp v6 pass fionread"))
+		test_sockmap_strp_pass(AF_INET6, SOCK_STREAM, true);
+	if (test__start_subtest("sockmap strp tcp verdict"))
+		test_sockmap_strp_verdict(AF_INET, SOCK_STREAM);
+	if (test__start_subtest("sockmap strp tcp v6 verdict"))
+		test_sockmap_strp_verdict(AF_INET6, SOCK_STREAM);
+	if (test__start_subtest("sockmap strp tcp partial read"))
+		test_sockmap_strp_partial_read(AF_INET, SOCK_STREAM);
+	if (test__start_subtest("sockmap strp tcp multiple packets"))
+		test_sockmap_strp_multiple_pkt(AF_INET, SOCK_STREAM);
+	if (test__start_subtest("sockmap strp tcp dispatch"))
+		test_sockmap_strp_dispatch_pkt(AF_INET, SOCK_STREAM);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index 05d0e07da394..ba6b3ec1156a 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -2,7 +2,7 @@
 #include <test_progs.h>
 #include "cgroup_helpers.h"
 
-#include <linux/tcp.h>
+#include <netinet/tcp.h>
 #include <linux/netlink.h>
 #include "sockopt_sk.skel.h"
 
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
index 2b0068742ef9..254fbfeab06a 100644
--- a/tools/testing/selftests/bpf/prog_tests/spin_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -13,22 +13,22 @@ static struct {
 	const char *err_msg;
 } spin_lock_fail_tests[] = {
 	{ "lock_id_kptr_preserve",
-	  "5: (bf) r1 = r0                       ; R0_w=ptr_foo(id=2,ref_obj_id=2) "
-	  "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
+	  "5: (bf) r1 = r0                       ; R0=ptr_foo(id=2,ref_obj_id=2) "
+	  "R1=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=ptr_ expected=percpu_ptr_" },
 	{ "lock_id_global_zero",
-	  "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
+	  "; R1=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=map_value expected=percpu_ptr_" },
 	{ "lock_id_mapval_preserve",
 	  "[0-9]\\+: (bf) r1 = r0                       ;"
-	  " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)"
-	  " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n"
+	  " R0=map_value(id=1,map=array_map,ks=4,vs=8)"
+	  " R1=map_value(id=1,map=array_map,ks=4,vs=8)\n"
 	  "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=map_value expected=percpu_ptr_" },
 	{ "lock_id_innermapval_preserve",
 	  "[0-9]\\+: (bf) r1 = r0                      ;"
 	  " R0=map_value(id=2,ks=4,vs=8)"
-	  " R1_w=map_value(id=2,ks=4,vs=8)\n"
+	  " R1=map_value(id=2,ks=4,vs=8)\n"
 	  "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
 	  "R1 type=map_value expected=percpu_ptr_" },
 	{ "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" },
@@ -50,6 +50,9 @@ static struct {
 	{ "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" },
 	{ "lock_global_subprog_call1", "global function calls are not allowed while holding a lock" },
 	{ "lock_global_subprog_call2", "global function calls are not allowed while holding a lock" },
+	{ "lock_global_sleepable_helper_subprog", "global function calls are not allowed while holding a lock" },
+	{ "lock_global_sleepable_kfunc_subprog", "global function calls are not allowed while holding a lock" },
+	{ "lock_global_sleepable_subprog_indirect", "global function calls are not allowed while holding a lock" },
 };
 
 static int match_regex(const char *pattern, const char *string)
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index b7ba5cd47d96..271b5cc9fc01 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -39,7 +39,7 @@ retry:
 	bpf_map_update_elem(control_map_fd, &key, &val, 0);
 
 	/* for every element in stackid_hmap, we can find a corresponding one
-	 * in stackmap, and vise versa.
+	 * in stackmap, and vice versa.
 	 */
 	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
 	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 0832fd787457..b277dddd5af7 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -66,7 +66,7 @@ retry:
 	bpf_map_update_elem(control_map_fd, &key, &val, 0);
 
 	/* for every element in stackid_hmap, we can find a corresponding one
-	 * in stackmap, and vise versa.
+	 * in stackmap, and vice versa.
 	 */
 	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
 	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
new file mode 100644
index 000000000000..c9efdd2a5b18
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "stacktrace_ips.skel.h"
+
+#ifdef __x86_64__
+static int check_stacktrace_ips(int fd, __u32 key, int cnt, ...)
+{
+	__u64 ips[PERF_MAX_STACK_DEPTH];
+	struct ksyms *ksyms = NULL;
+	int i, err = 0;
+	va_list args;
+
+	/* sorted by addr */
+	ksyms = load_kallsyms_local();
+	if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local"))
+		return -1;
+
+	/* unlikely, but... */
+	if (!ASSERT_LT(cnt, PERF_MAX_STACK_DEPTH, "check_max"))
+		return -1;
+
+	err = bpf_map_lookup_elem(fd, &key, ips);
+	if (err)
+		goto out;
+
+	/*
+	 * Compare all symbols provided via arguments with stacktrace ips,
+	 * and their related symbol addresses.t
+	 */
+	va_start(args, cnt);
+
+	for (i = 0; i < cnt; i++) {
+		unsigned long val;
+		struct ksym *ksym;
+
+		val = va_arg(args, unsigned long);
+		ksym = ksym_search_local(ksyms, ips[i]);
+		if (!ASSERT_OK_PTR(ksym, "ksym_search_local"))
+			break;
+		ASSERT_EQ(ksym->addr, val, "stack_cmp");
+	}
+
+	va_end(args);
+
+out:
+	free_kallsyms_local(ksyms);
+	return err;
+}
+
+static void test_stacktrace_ips_kprobe_multi(bool retprobe)
+{
+	LIBBPF_OPTS(bpf_kprobe_multi_opts, opts,
+		.retprobe = retprobe
+	);
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	struct stacktrace_ips *skel;
+
+	skel = stacktrace_ips__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load"))
+		return;
+
+	if (!skel->kconfig->CONFIG_UNWINDER_ORC) {
+		test__skip();
+		goto cleanup;
+	}
+
+	skel->links.kprobe_multi_test = bpf_program__attach_kprobe_multi_opts(
+							skel->progs.kprobe_multi_test,
+							"bpf_testmod_stacktrace_test", &opts);
+	if (!ASSERT_OK_PTR(skel->links.kprobe_multi_test, "bpf_program__attach_kprobe_multi_opts"))
+		goto cleanup;
+
+	trigger_module_test_read(1);
+
+	load_kallsyms();
+
+	check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4,
+			     ksym_get_addr("bpf_testmod_stacktrace_test_3"),
+			     ksym_get_addr("bpf_testmod_stacktrace_test_2"),
+			     ksym_get_addr("bpf_testmod_stacktrace_test_1"),
+			     ksym_get_addr("bpf_testmod_test_read"));
+
+cleanup:
+	stacktrace_ips__destroy(skel);
+}
+
+static void test_stacktrace_ips_raw_tp(void)
+{
+	__u32 info_len = sizeof(struct bpf_prog_info);
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	struct bpf_prog_info info = {};
+	struct stacktrace_ips *skel;
+	__u64 bpf_prog_ksym = 0;
+	int err;
+
+	skel = stacktrace_ips__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load"))
+		return;
+
+	if (!skel->kconfig->CONFIG_UNWINDER_ORC) {
+		test__skip();
+		goto cleanup;
+	}
+
+	skel->links.rawtp_test = bpf_program__attach_raw_tracepoint(
+							skel->progs.rawtp_test,
+							"bpf_testmod_test_read");
+	if (!ASSERT_OK_PTR(skel->links.rawtp_test, "bpf_program__attach_raw_tracepoint"))
+		goto cleanup;
+
+	/* get bpf program address */
+	info.jited_ksyms = ptr_to_u64(&bpf_prog_ksym);
+	info.nr_jited_ksyms = 1;
+	err = bpf_prog_get_info_by_fd(bpf_program__fd(skel->progs.rawtp_test),
+				      &info, &info_len);
+	if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+		goto cleanup;
+
+	trigger_module_test_read(1);
+
+	load_kallsyms();
+
+	check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 2,
+			     bpf_prog_ksym,
+			     ksym_get_addr("bpf_trace_run2"));
+
+cleanup:
+	stacktrace_ips__destroy(skel);
+}
+
+static void __test_stacktrace_ips(void)
+{
+	if (test__start_subtest("kprobe_multi"))
+		test_stacktrace_ips_kprobe_multi(false);
+	if (test__start_subtest("kretprobe_multi"))
+		test_stacktrace_ips_kprobe_multi(true);
+	if (test__start_subtest("raw_tp"))
+		test_stacktrace_ips_raw_tp();
+}
+#else
+static void __test_stacktrace_ips(void)
+{
+	test__skip();
+}
+#endif
+
+void test_stacktrace_ips(void)
+{
+	__test_stacktrace_ips();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index df59e4ae2951..c23b97414813 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -1,46 +1,27 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include "stacktrace_map.skel.h"
 
 void test_stacktrace_map(void)
 {
+	struct stacktrace_map *skel;
 	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
-	const char *prog_name = "oncpu";
-	int err, prog_fd, stack_trace_len;
-	const char *file = "./test_stacktrace_map.bpf.o";
-	__u32 key, val, duration = 0;
-	struct bpf_program *prog;
-	struct bpf_object *obj;
-	struct bpf_link *link;
+	int err, stack_trace_len;
+	__u32 key, val, stack_id, duration = 0;
+	__u64 stack[PERF_MAX_STACK_DEPTH];
 
-	err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
-	if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+	skel = stacktrace_map__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
 		return;
 
-	prog = bpf_object__find_program_by_name(obj, prog_name);
-	if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
-		goto close_prog;
-
-	link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch");
-	if (!ASSERT_OK_PTR(link, "attach_tp"))
-		goto close_prog;
-
-	/* find map fds */
-	control_map_fd = bpf_find_map(__func__, obj, "control_map");
-	if (CHECK_FAIL(control_map_fd < 0))
-		goto disable_pmu;
-
-	stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
-	if (CHECK_FAIL(stackid_hmap_fd < 0))
-		goto disable_pmu;
-
-	stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
-	if (CHECK_FAIL(stackmap_fd < 0))
-		goto disable_pmu;
-
-	stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
-	if (CHECK_FAIL(stack_amap_fd < 0))
-		goto disable_pmu;
+	control_map_fd = bpf_map__fd(skel->maps.control_map);
+	stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
+	stackmap_fd = bpf_map__fd(skel->maps.stackmap);
+	stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
 
+	err = stacktrace_map__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto out;
 	/* give some time for bpf program run */
 	sleep(1);
 
@@ -50,26 +31,32 @@ void test_stacktrace_map(void)
 	bpf_map_update_elem(control_map_fd, &key, &val, 0);
 
 	/* for every element in stackid_hmap, we can find a corresponding one
-	 * in stackmap, and vise versa.
+	 * in stackmap, and vice versa.
 	 */
 	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
 	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
 		  "err %d errno %d\n", err, errno))
-		goto disable_pmu;
+		goto out;
 
 	err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
 	if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
 		  "err %d errno %d\n", err, errno))
-		goto disable_pmu;
+		goto out;
 
 	stack_trace_len = PERF_MAX_STACK_DEPTH * sizeof(__u64);
 	err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
 	if (CHECK(err, "compare_stack_ips stackmap vs. stack_amap",
 		  "err %d errno %d\n", err, errno))
-		goto disable_pmu;
-
-disable_pmu:
-	bpf_link__destroy(link);
-close_prog:
-	bpf_object__close(obj);
+		goto out;
+
+	stack_id = skel->bss->stack_id;
+	err = bpf_map_lookup_and_delete_elem(stackmap_fd, &stack_id,  stack);
+	if (!ASSERT_OK(err, "lookup and delete target stack_id"))
+		goto out;
+
+	err = bpf_map_lookup_elem(stackmap_fd, &stack_id, stack);
+	if (!ASSERT_EQ(err, -ENOENT, "lookup deleted stack_id"))
+		goto out;
+out:
+	stacktrace_map__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index c6ef06f55cdb..e985d51d3d47 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -5,7 +5,7 @@ void test_stacktrace_map_raw_tp(void)
 {
 	const char *prog_name = "oncpu";
 	int control_map_fd, stackid_hmap_fd, stackmap_fd;
-	const char *file = "./test_stacktrace_map.bpf.o";
+	const char *file = "./stacktrace_map.bpf.o";
 	__u32 key, val, duration = 0;
 	int err, prog_fd;
 	struct bpf_program *prog;
@@ -46,7 +46,7 @@ void test_stacktrace_map_raw_tp(void)
 	bpf_map_update_elem(control_map_fd, &key, &val, 0);
 
 	/* for every element in stackid_hmap, we can find a corresponding one
-	 * in stackmap, and vise versa.
+	 * in stackmap, and vice versa.
 	 */
 	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
 	if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
index 1932b1e0685c..dc2ccf6a14d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
@@ -40,7 +40,7 @@ void test_stacktrace_map_skip(void)
 	skel->bss->control = 1;
 
 	/* for every element in stackid_hmap, we can find a corresponding one
-	 * in stackmap, and vise versa.
+	 * in stackmap, and vice versa.
 	 */
 	err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
 	if (!ASSERT_OK(err, "compare_map_keys stackid_hmap vs. stackmap"))
diff --git a/tools/testing/selftests/bpf/prog_tests/stream.c b/tools/testing/selftests/bpf/prog_tests/stream.c
new file mode 100644
index 000000000000..c3cce5c292bd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/stream.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <sys/mman.h>
+
+#include "stream.skel.h"
+#include "stream_fail.skel.h"
+
+void test_stream_failure(void)
+{
+	RUN_TESTS(stream_fail);
+}
+
+void test_stream_success(void)
+{
+	RUN_TESTS(stream);
+	return;
+}
+
+void test_stream_syscall(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts);
+	LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
+	struct stream *skel;
+	int ret, prog_fd;
+	char buf[64];
+
+	skel = stream__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
+		return;
+
+	prog_fd = bpf_program__fd(skel->progs.stream_syscall);
+	ret = bpf_prog_test_run_opts(prog_fd, &opts);
+	ASSERT_OK(ret, "ret");
+	ASSERT_OK(opts.retval, "retval");
+
+	ASSERT_LT(bpf_prog_stream_read(0, BPF_STREAM_STDOUT, buf, sizeof(buf), &ropts), 0, "error");
+	ret = -errno;
+	ASSERT_EQ(ret, -EINVAL, "bad prog_fd");
+
+	ASSERT_LT(bpf_prog_stream_read(prog_fd, 0, buf, sizeof(buf), &ropts), 0, "error");
+	ret = -errno;
+	ASSERT_EQ(ret, -ENOENT, "bad stream id");
+
+	ASSERT_LT(bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, NULL, sizeof(buf), NULL), 0, "error");
+	ret = -errno;
+	ASSERT_EQ(ret, -EFAULT, "bad stream buf");
+
+	ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL);
+	ASSERT_EQ(ret, 2, "bytes");
+	ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 2, NULL);
+	ASSERT_EQ(ret, 1, "bytes");
+	ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDOUT, buf, 1, &ropts);
+	ASSERT_EQ(ret, 0, "no bytes stdout");
+	ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, 1, &ropts);
+	ASSERT_EQ(ret, 0, "no bytes stderr");
+
+	stream__destroy(skel);
+}
+
+static void test_address(struct bpf_program *prog, unsigned long *fault_addr_p)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts);
+	LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
+	int ret, prog_fd;
+	char fault_addr[64];
+	char buf[1024];
+
+	prog_fd = bpf_program__fd(prog);
+
+	ret = bpf_prog_test_run_opts(prog_fd, &opts);
+	ASSERT_OK(ret, "ret");
+	ASSERT_OK(opts.retval, "retval");
+
+	sprintf(fault_addr, "0x%lx", *fault_addr_p);
+
+	ret = bpf_prog_stream_read(prog_fd, BPF_STREAM_STDERR, buf, sizeof(buf), &ropts);
+	ASSERT_GT(ret, 0, "stream read");
+	ASSERT_LE(ret, 1023, "len for buf");
+	buf[ret] = '\0';
+
+	if (!ASSERT_HAS_SUBSTR(buf, fault_addr, "fault_addr")) {
+		fprintf(stderr, "Output from stream:\n%s\n", buf);
+		fprintf(stderr, "Fault Addr: %s\n", fault_addr);
+	}
+}
+
+void test_stream_arena_fault_address(void)
+{
+	struct stream *skel;
+
+#if !defined(__x86_64__) && !defined(__aarch64__)
+	printf("%s:SKIP: arena fault reporting not supported\n", __func__);
+	test__skip();
+	return;
+#endif
+
+	skel = stream__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "stream__open_and_load"))
+		return;
+
+	if (test__start_subtest("read_fault"))
+		test_address(skel->progs.stream_arena_read_fault, &skel->bss->fault_addr);
+	if (test__start_subtest("write_fault"))
+		test_address(skel->progs.stream_arena_write_fault, &skel->bss->fault_addr);
+
+	stream__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
new file mode 100644
index 000000000000..0f3bf594e7a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/string_kfuncs.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include <test_progs.h>
+#include "string_kfuncs_success.skel.h"
+#include "string_kfuncs_failure1.skel.h"
+#include "string_kfuncs_failure2.skel.h"
+#include <sys/mman.h>
+
+static const char * const test_cases[] = {
+	"strcmp",
+	"strcasecmp",
+	"strchr",
+	"strchrnul",
+	"strnchr",
+	"strrchr",
+	"strlen",
+	"strnlen",
+	"strspn_str",
+	"strspn_accept",
+	"strcspn_str",
+	"strcspn_reject",
+	"strstr",
+	"strcasestr",
+	"strnstr",
+	"strncasestr",
+};
+
+void run_too_long_tests(void)
+{
+	struct string_kfuncs_failure2 *skel;
+	struct bpf_program *prog;
+	char test_name[256];
+	int err, i;
+
+	skel = string_kfuncs_failure2__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "string_kfuncs_failure2__open_and_load"))
+		return;
+
+	memset(skel->bss->long_str, 'a', sizeof(skel->bss->long_str));
+
+	for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+		sprintf(test_name, "test_%s_too_long", test_cases[i]);
+		if (!test__start_subtest(test_name))
+			continue;
+
+		prog = bpf_object__find_program_by_name(skel->obj, test_name);
+		if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+			goto cleanup;
+
+		LIBBPF_OPTS(bpf_test_run_opts, topts);
+		err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts);
+		if (!ASSERT_OK(err, "bpf_prog_test_run"))
+			goto cleanup;
+
+		ASSERT_EQ(topts.retval, -E2BIG, "reading too long string fails with -E2BIG");
+	}
+
+cleanup:
+	string_kfuncs_failure2__destroy(skel);
+}
+
+void test_string_kfuncs(void)
+{
+	RUN_TESTS(string_kfuncs_success);
+	RUN_TESTS(string_kfuncs_failure1);
+
+	run_too_long_tests();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/summarization.c b/tools/testing/selftests/bpf/prog_tests/summarization.c
new file mode 100644
index 000000000000..5dd6c120a838
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/summarization.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bpf/libbpf.h"
+#include "summarization_freplace.skel.h"
+#include "summarization.skel.h"
+#include <test_progs.h>
+
+static void print_verifier_log(const char *log)
+{
+	if (env.verbosity >= VERBOSE_VERY)
+		fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log);
+}
+
+static void test_aux(const char *main_prog_name,
+		     const char *to_be_replaced,
+		     const char *replacement,
+		     bool expect_load,
+		     const char *err_msg)
+{
+	struct summarization_freplace *freplace = NULL;
+	struct bpf_program *freplace_prog = NULL;
+	struct bpf_program *main_prog = NULL;
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+	struct summarization *main = NULL;
+	char log[16*1024];
+	int err;
+
+	opts.kernel_log_buf = log;
+	opts.kernel_log_size = sizeof(log);
+	if (env.verbosity >= VERBOSE_SUPER)
+		opts.kernel_log_level = 1 | 2 | 4;
+	main = summarization__open_opts(&opts);
+	if (!ASSERT_OK_PTR(main, "summarization__open"))
+		goto out;
+	main_prog = bpf_object__find_program_by_name(main->obj, main_prog_name);
+	if (!ASSERT_OK_PTR(main_prog, "main_prog"))
+		goto out;
+	bpf_program__set_autoload(main_prog, true);
+	err = summarization__load(main);
+	print_verifier_log(log);
+	if (!ASSERT_OK(err, "summarization__load"))
+		goto out;
+	freplace = summarization_freplace__open_opts(&opts);
+	if (!ASSERT_OK_PTR(freplace, "summarization_freplace__open"))
+		goto out;
+	freplace_prog = bpf_object__find_program_by_name(freplace->obj, replacement);
+	if (!ASSERT_OK_PTR(freplace_prog, "freplace_prog"))
+		goto out;
+	bpf_program__set_autoload(freplace_prog, true);
+	bpf_program__set_autoattach(freplace_prog, true);
+	bpf_program__set_attach_target(freplace_prog,
+				       bpf_program__fd(main_prog),
+				       to_be_replaced);
+	err = summarization_freplace__load(freplace);
+	print_verifier_log(log);
+
+	/* The might_sleep extension doesn't work yet as sleepable calls are not
+	 * allowed, but preserve the check in case it's supported later and then
+	 * this particular combination can be enabled.
+	 */
+	if (!strcmp("might_sleep", replacement) && err) {
+		ASSERT_HAS_SUBSTR(log, "helper call might sleep in a non-sleepable prog", "error log");
+		ASSERT_EQ(err, -EINVAL, "err");
+		test__skip();
+		goto out;
+	}
+
+	if (expect_load) {
+		ASSERT_OK(err, "summarization_freplace__load");
+	} else {
+		ASSERT_ERR(err, "summarization_freplace__load");
+		ASSERT_HAS_SUBSTR(log, err_msg, "error log");
+	}
+
+out:
+	summarization_freplace__destroy(freplace);
+	summarization__destroy(main);
+}
+
+/* There are two global subprograms in both summarization.skel.h:
+ * - one changes packet data;
+ * - another does not.
+ * It is ok to freplace subprograms that change packet data with those
+ * that either do or do not. It is only ok to freplace subprograms
+ * that do not change packet data with those that do not as well.
+ * The below tests check outcomes for each combination of such freplace.
+ * Also test a case when main subprogram itself is replaced and is a single
+ * subprogram in a program.
+ *
+ * This holds for might_sleep programs. It is ok to replace might_sleep with
+ * might_sleep and with does_not_sleep, but does_not_sleep cannot be replaced
+ * with might_sleep.
+ */
+void test_summarization_freplace(void)
+{
+	struct {
+		const char *main;
+		const char *to_be_replaced;
+		bool has_side_effect;
+	} mains[2][4] = {
+		{
+			{ "main_changes_with_subprogs",		"changes_pkt_data",	    true },
+			{ "main_changes_with_subprogs",		"does_not_change_pkt_data", false },
+			{ "main_changes",			"main_changes",             true },
+			{ "main_does_not_change",		"main_does_not_change",     false },
+		},
+		{
+			{ "main_might_sleep_with_subprogs",	"might_sleep",		    true },
+			{ "main_might_sleep_with_subprogs",	"does_not_sleep",	    false },
+			{ "main_might_sleep",			"main_might_sleep",	    true },
+			{ "main_does_not_sleep",		"main_does_not_sleep",	    false },
+		},
+	};
+	const char *pkt_err = "Extension program changes packet data";
+	const char *slp_err = "Extension program may sleep";
+	struct {
+		const char *func;
+		bool has_side_effect;
+		const char *err_msg;
+	} replacements[2][2] = {
+		{
+			{ "changes_pkt_data",	      true,	pkt_err },
+			{ "does_not_change_pkt_data", false,	pkt_err },
+		},
+		{
+			{ "might_sleep",	      true,	slp_err },
+			{ "does_not_sleep",	      false,	slp_err },
+		},
+	};
+	char buf[64];
+
+	for (int t = 0; t < 2; t++) {
+		for (int i = 0; i < ARRAY_SIZE(mains); ++i) {
+			for (int j = 0; j < ARRAY_SIZE(replacements); ++j) {
+				snprintf(buf, sizeof(buf), "%s_with_%s",
+					 mains[t][i].to_be_replaced, replacements[t][j].func);
+				if (!test__start_subtest(buf))
+					continue;
+				test_aux(mains[t][i].main, mains[t][i].to_be_replaced, replacements[t][j].func,
+					 mains[t][i].has_side_effect || !replacements[t][j].has_side_effect,
+					 replacements[t][j].err_msg);
+			}
+		}
+	}
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 544144620ca6..0ab36503c3b2 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -1195,7 +1195,7 @@ static void test_tailcall_hierarchy_count(const char *which, bool test_fentry,
 					  bool test_fexit,
 					  bool test_fentry_entry)
 {
-	int err, map_fd, prog_fd, main_data_fd, fentry_data_fd, fexit_data_fd, i, val;
+	int err, map_fd, prog_fd, main_data_fd, fentry_data_fd = 0, fexit_data_fd = 0, i, val;
 	struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL;
 	struct bpf_link *fentry_link = NULL, *fexit_link = NULL;
 	struct bpf_program *prog, *fentry_prog;
@@ -1600,6 +1600,7 @@ static void test_tailcall_bpf2bpf_freplace(void)
 		goto out;
 
 	err = bpf_link__destroy(freplace_link);
+	freplace_link = NULL;
 	if (!ASSERT_OK(err, "destroy link"))
 		goto out;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_data.h b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
new file mode 100644
index 000000000000..2de38776a2d4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_data.h
@@ -0,0 +1,386 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TASK_LOCAL_DATA_H
+#define __TASK_LOCAL_DATA_H
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdatomic.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+#include <pthread.h>
+#endif
+
+#include <bpf/bpf.h>
+
+/*
+ * OPTIONS
+ *
+ *   Define the option before including the header
+ *
+ *   TLD_FREE_DATA_ON_THREAD_EXIT - Frees memory on thread exit automatically
+ *
+ *   Thread-specific memory for storing TLD is allocated lazily on the first call to
+ *   tld_get_data(). The thread that calls it must also call tld_free() on thread exit
+ *   to prevent memory leak. Pthread will be included if the option is defined. A pthread
+ *   key will be registered with a destructor that calls tld_free().
+ *
+ *
+ *   TLD_DYN_DATA_SIZE - The maximum size of memory allocated for TLDs created dynamically
+ *   (default: 64 bytes)
+ *
+ *   A TLD can be defined statically using TLD_DEFINE_KEY() or created on the fly using
+ *   tld_create_key(). As the total size of TLDs created with tld_create_key() cannot be
+ *   possibly known statically, a memory area of size TLD_DYN_DATA_SIZE will be allocated
+ *   for these TLDs. This additional memory is allocated for every thread that calls
+ *   tld_get_data() even if no tld_create_key are actually called, so be mindful of
+ *   potential memory wastage. Use TLD_DEFINE_KEY() whenever possible as just enough memory
+ *   will be allocated for TLDs created with it.
+ *
+ *
+ *   TLD_NAME_LEN - The maximum length of the name of a TLD (default: 62)
+ *
+ *   Setting TLD_NAME_LEN will affect the maximum number of TLDs a process can store,
+ *   TLD_MAX_DATA_CNT.
+ *
+ *
+ *   TLD_DATA_USE_ALIGNED_ALLOC - Always use aligned_alloc() instead of malloc()
+ *
+ *   When allocating the memory for storing TLDs, we need to make sure there is a memory
+ *   region of the X bytes within a page. This is due to the limit posed by UPTR: memory
+ *   pinned to the kernel cannot exceed a page nor can it cross the page boundary. The
+ *   library normally calls malloc(2*X) given X bytes of total TLDs, and only uses
+ *   aligned_alloc(PAGE_SIZE, X) when X >= PAGE_SIZE / 2. This is to reduce memory wastage
+ *   as not all memory allocator can use the exact amount of memory requested to fulfill
+ *   aligned_alloc(). For example, some may round the size up to the alignment. Enable the
+ *   option to always use aligned_alloc() if the implementation has low memory overhead.
+ */
+
+#define TLD_PAGE_SIZE getpagesize()
+#define TLD_PAGE_MASK (~(TLD_PAGE_SIZE - 1))
+
+#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1))
+#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1)
+
+#define TLD_READ_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+#ifndef TLD_DYN_DATA_SIZE
+#define TLD_DYN_DATA_SIZE 64
+#endif
+
+#define TLD_MAX_DATA_CNT (TLD_PAGE_SIZE / sizeof(struct tld_metadata) - 1)
+
+#ifndef TLD_NAME_LEN
+#define TLD_NAME_LEN 62
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct {
+	__s16 off;
+} tld_key_t;
+
+struct tld_metadata {
+	char name[TLD_NAME_LEN];
+	_Atomic __u16 size;
+};
+
+struct tld_meta_u {
+	_Atomic __u8 cnt;
+	__u16 size;
+	struct tld_metadata metadata[];
+};
+
+struct tld_data_u {
+	__u64 start; /* offset of tld_data_u->data in a page */
+	char data[];
+};
+
+struct tld_map_value {
+	void *data;
+	struct tld_meta_u *meta;
+};
+
+struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak));
+__thread struct tld_data_u *tld_data_p __attribute__((weak));
+__thread void *tld_data_alloc_p __attribute__((weak));
+
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+pthread_key_t tld_pthread_key __attribute__((weak));
+
+static void tld_free(void);
+
+static void __tld_thread_exit_handler(void *unused)
+{
+	tld_free();
+}
+#endif
+
+static int __tld_init_meta_p(void)
+{
+	struct tld_meta_u *meta, *uninit = NULL;
+	int err = 0;
+
+	meta = (struct tld_meta_u *)aligned_alloc(TLD_PAGE_SIZE, TLD_PAGE_SIZE);
+	if (!meta) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	memset(meta, 0, TLD_PAGE_SIZE);
+	meta->size = TLD_DYN_DATA_SIZE;
+
+	if (!atomic_compare_exchange_strong(&tld_meta_p, &uninit, meta)) {
+		free(meta);
+		goto out;
+	}
+
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+	pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler);
+#endif
+out:
+	return err;
+}
+
+static int __tld_init_data_p(int map_fd)
+{
+	bool use_aligned_alloc = false;
+	struct tld_map_value map_val;
+	struct tld_data_u *data;
+	void *data_alloc = NULL;
+	int err, tid_fd = -1;
+
+	tid_fd = syscall(SYS_pidfd_open, sys_gettid(), O_EXCL);
+	if (tid_fd < 0) {
+		err = -errno;
+		goto out;
+	}
+
+#ifdef TLD_DATA_USE_ALIGNED_ALLOC
+	use_aligned_alloc = true;
+#endif
+
+	/*
+	 * tld_meta_p->size = TLD_DYN_DATA_SIZE +
+	 *          total size of TLDs defined via TLD_DEFINE_KEY()
+	 */
+	data_alloc = (use_aligned_alloc || tld_meta_p->size * 2 >= TLD_PAGE_SIZE) ?
+		aligned_alloc(TLD_PAGE_SIZE, tld_meta_p->size) :
+		malloc(tld_meta_p->size * 2);
+	if (!data_alloc) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	/*
+	 * Always pass a page-aligned address to UPTR since the size of tld_map_value::data
+	 * is a page in BTF. If data_alloc spans across two pages, use the page that contains large
+	 * enough memory.
+	 */
+	if (TLD_PAGE_SIZE - (~TLD_PAGE_MASK & (intptr_t)data_alloc) >= tld_meta_p->size) {
+		map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data_alloc);
+		data = data_alloc;
+		data->start = (~TLD_PAGE_MASK & (intptr_t)data_alloc) +
+			      offsetof(struct tld_data_u, data);
+	} else {
+		map_val.data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE));
+		data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE));
+		data->start = offsetof(struct tld_data_u, data);
+	}
+	map_val.meta = TLD_READ_ONCE(tld_meta_p);
+
+	err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0);
+	if (err) {
+		free(data_alloc);
+		goto out;
+	}
+
+	tld_data_p = data;
+	tld_data_alloc_p = data_alloc;
+#ifdef TLD_FREE_DATA_ON_THREAD_EXIT
+	pthread_setspecific(tld_pthread_key, (void *)1);
+#endif
+out:
+	if (tid_fd >= 0)
+		close(tid_fd);
+	return err;
+}
+
+static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data)
+{
+	int err, i, sz, off = 0;
+	__u8 cnt;
+
+	if (!TLD_READ_ONCE(tld_meta_p)) {
+		err = __tld_init_meta_p();
+		if (err)
+			return (tld_key_t){err};
+	}
+
+	for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
+retry:
+		cnt = atomic_load(&tld_meta_p->cnt);
+		if (i < cnt) {
+			/* A metadata is not ready until size is updated with a non-zero value */
+			while (!(sz = atomic_load(&tld_meta_p->metadata[i].size)))
+				sched_yield();
+
+			if (!strncmp(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN))
+				return (tld_key_t){-EEXIST};
+
+			off += TLD_ROUND_UP(sz, 8);
+			continue;
+		}
+
+		/*
+		 * TLD_DEFINE_KEY() is given memory upto a page while at most
+		 * TLD_DYN_DATA_SIZE is allocated for tld_create_key()
+		 */
+		if (dyn_data) {
+			if (off + TLD_ROUND_UP(size, 8) > tld_meta_p->size)
+				return (tld_key_t){-E2BIG};
+		} else {
+			if (off + TLD_ROUND_UP(size, 8) > TLD_PAGE_SIZE - sizeof(struct tld_data_u))
+				return (tld_key_t){-E2BIG};
+			tld_meta_p->size += TLD_ROUND_UP(size, 8);
+		}
+
+		/*
+		 * Only one tld_create_key() can increase the current cnt by one and
+		 * takes the latest available slot. Other threads will check again if a new
+		 * TLD can still be added, and then compete for the new slot after the
+		 * succeeding thread update the size.
+		 */
+		if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1))
+			goto retry;
+
+		strncpy(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN);
+		atomic_store(&tld_meta_p->metadata[i].size, size);
+		return (tld_key_t){(__s16)off};
+	}
+
+	return (tld_key_t){-ENOSPC};
+}
+
+/**
+ * TLD_DEFINE_KEY() - Define a TLD and a global variable key associated with the TLD.
+ *
+ * @name: The name of the TLD
+ * @size: The size of the TLD
+ * @key: The variable name of the key. Cannot exceed TLD_NAME_LEN
+ *
+ * The macro can only be used in file scope.
+ *
+ * A global variable key of opaque type, tld_key_t, will be declared and initialized before
+ * main() starts. Use tld_key_is_err() or tld_key_err_or_zero() later to check if the key
+ * creation succeeded. Pass the key to tld_get_data() to get a pointer to the TLD.
+ * bpf programs can also fetch the same key by name.
+ *
+ * The total size of TLDs created using TLD_DEFINE_KEY() cannot exceed a page. Just
+ * enough memory will be allocated for each thread on the first call to tld_get_data().
+ */
+#define TLD_DEFINE_KEY(key, name, size)			\
+tld_key_t key;						\
+							\
+__attribute__((constructor))				\
+void __tld_define_key_##key(void)			\
+{							\
+	key = __tld_create_key(name, size, false);	\
+}
+
+/**
+ * tld_create_key() - Create a TLD and return a key associated with the TLD.
+ *
+ * @name: The name the TLD
+ * @size: The size of the TLD
+ *
+ * Return an opaque object key. Use tld_key_is_err() or tld_key_err_or_zero() to check
+ * if the key creation succeeded. Pass the key to tld_get_data() to get a pointer to
+ * locate the TLD. bpf programs can also fetch the same key by name.
+ *
+ * Use tld_create_key() only when a TLD needs to be created dynamically (e.g., @name is
+ * not known statically or a TLD needs to be created conditionally)
+ *
+ * An additional TLD_DYN_DATA_SIZE bytes are allocated per-thread to accommodate TLDs
+ * created dynamically with tld_create_key(). Since only a user page is pinned to the
+ * kernel, when TLDs created with TLD_DEFINE_KEY() uses more than TLD_PAGE_SIZE -
+ * TLD_DYN_DATA_SIZE, the buffer size will be limited to the rest of the page.
+ */
+__attribute__((unused))
+static tld_key_t tld_create_key(const char *name, size_t size)
+{
+	return __tld_create_key(name, size, true);
+}
+
+__attribute__((unused))
+static inline bool tld_key_is_err(tld_key_t key)
+{
+	return key.off < 0;
+}
+
+__attribute__((unused))
+static inline int tld_key_err_or_zero(tld_key_t key)
+{
+	return tld_key_is_err(key) ? key.off : 0;
+}
+
+/**
+ * tld_get_data() - Get a pointer to the TLD associated with the given key of the
+ * calling thread.
+ *
+ * @map_fd: A file descriptor of tld_data_map, the underlying BPF task local storage map
+ * of task local data.
+ * @key: A key object created by TLD_DEFINE_KEY() or tld_create_key().
+ *
+ * Return a pointer to the TLD if the key is valid; NULL if not enough memory for TLD
+ * for this thread, or the key is invalid. The returned pointer is guaranteed to be 8-byte
+ * aligned.
+ *
+ * Threads that call tld_get_data() must call tld_free() on exit to prevent
+ * memory leak if TLD_FREE_DATA_ON_THREAD_EXIT is not defined.
+ */
+__attribute__((unused))
+static void *tld_get_data(int map_fd, tld_key_t key)
+{
+	if (!TLD_READ_ONCE(tld_meta_p))
+		return NULL;
+
+	/* tld_data_p is allocated on the first invocation of tld_get_data() */
+	if (!tld_data_p && __tld_init_data_p(map_fd))
+		return NULL;
+
+	return tld_data_p->data + key.off;
+}
+
+/**
+ * tld_free() - Free task local data memory of the calling thread
+ *
+ * For the calling thread, all pointers to TLDs acquired before will become invalid.
+ *
+ * Users must call tld_free() on thread exit to prevent memory leak. Alternatively,
+ * define TLD_FREE_DATA_ON_THREAD_EXIT and a thread exit handler will be registered
+ * to free the memory automatically.
+ */
+__attribute__((unused))
+static void tld_free(void)
+{
+	if (tld_data_alloc_p) {
+		free(tld_data_alloc_p);
+		tld_data_alloc_p = NULL;
+		tld_data_p = NULL;
+	}
+}
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __TASK_LOCAL_DATA_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
index 60f474d965a9..42e822ea352f 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -197,7 +197,7 @@ static void test_nodeadlock(void)
 	/* Unnecessary recursion and deadlock detection are reproducible
 	 * in the preemptible kernel.
 	 */
-	if (!skel->kconfig->CONFIG_PREEMPT) {
+	if (!skel->kconfig->CONFIG_PREEMPTION) {
 		test__skip();
 		goto done;
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_work_stress.c b/tools/testing/selftests/bpf/prog_tests/task_work_stress.c
new file mode 100644
index 000000000000..450d17d91a56
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_work_stress.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <string.h>
+#include <stdio.h>
+#include "task_work_stress.skel.h"
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <time.h>
+#include <stdlib.h>
+#include <stdatomic.h>
+
+struct test_data {
+	int prog_fd;
+	atomic_int exit;
+};
+
+void *runner(void *test_data)
+{
+	struct test_data *td = test_data;
+	int err = 0;
+	LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+	while (!err && !atomic_load(&td->exit))
+		err = bpf_prog_test_run_opts(td->prog_fd, &opts);
+
+	return NULL;
+}
+
+static int get_env_int(const char *str, int def)
+{
+	const char *s = getenv(str);
+	char *end;
+	int retval;
+
+	if (!s || !*s)
+		return def;
+	errno = 0;
+	retval = strtol(s, &end, 10);
+	if (errno || *end || retval < 0)
+		return def;
+	return retval;
+}
+
+static void task_work_run(bool enable_delete)
+{
+	struct task_work_stress *skel;
+	struct bpf_program *scheduler, *deleter;
+	int nthreads = 16;
+	int test_time_s = get_env_int("BPF_TASK_WORK_TEST_TIME", 1);
+	pthread_t tid[nthreads], tid_del;
+	bool started[nthreads], started_del = false;
+	struct test_data td_sched = { .exit = 0 }, td_del = { .exit = 1 };
+	int i, err;
+
+	skel = task_work_stress__open();
+	if (!ASSERT_OK_PTR(skel, "task_work__open"))
+		return;
+
+	scheduler = bpf_object__find_program_by_name(skel->obj, "schedule_task_work");
+	bpf_program__set_autoload(scheduler, true);
+
+	deleter = bpf_object__find_program_by_name(skel->obj, "delete_task_work");
+	bpf_program__set_autoload(deleter, true);
+
+	err = task_work_stress__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	for (i = 0; i < nthreads; ++i)
+		started[i] = false;
+
+	td_sched.prog_fd = bpf_program__fd(scheduler);
+	for (i = 0; i < nthreads; ++i) {
+		if (pthread_create(&tid[i], NULL, runner, &td_sched) != 0) {
+			fprintf(stderr, "could not start thread");
+			goto cancel;
+		}
+		started[i] = true;
+	}
+
+	if (enable_delete)
+		atomic_store(&td_del.exit, 0);
+
+	td_del.prog_fd = bpf_program__fd(deleter);
+	if (pthread_create(&tid_del, NULL, runner, &td_del) != 0) {
+		fprintf(stderr, "could not start thread");
+		goto cancel;
+	}
+	started_del = true;
+
+	/* Run stress test for some time */
+	sleep(test_time_s);
+
+cancel:
+	atomic_store(&td_sched.exit, 1);
+	atomic_store(&td_del.exit, 1);
+	for (i = 0; i < nthreads; ++i) {
+		if (started[i])
+			pthread_join(tid[i], NULL);
+	}
+
+	if (started_del)
+		pthread_join(tid_del, NULL);
+
+	ASSERT_GT(skel->bss->callback_scheduled, 0, "work scheduled");
+	/* Some scheduling attempts should have failed due to contention */
+	ASSERT_GT(skel->bss->schedule_error, 0, "schedule error");
+
+	if (enable_delete) {
+		/* If delete thread is enabled, it has cancelled some callbacks */
+		ASSERT_GT(skel->bss->delete_success, 0, "delete success");
+		ASSERT_LT(skel->bss->callback_success, skel->bss->callback_scheduled, "callbacks");
+	} else {
+		/* Without delete thread number of scheduled callbacks is the same as fired */
+		ASSERT_EQ(skel->bss->callback_success, skel->bss->callback_scheduled, "callbacks");
+	}
+
+cleanup:
+	task_work_stress__destroy(skel);
+}
+
+void test_task_work_stress(void)
+{
+	if (test__start_subtest("no_delete"))
+		task_work_run(false);
+	if (test__start_subtest("with_delete"))
+		task_work_run(true);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c b/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c
new file mode 100644
index 000000000000..74752233e779
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_change_tail.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <error.h>
+#include <test_progs.h>
+#include <linux/pkt_cls.h>
+
+#include "test_tc_change_tail.skel.h"
+#include "socket_helpers.h"
+
+#define LO_IFINDEX 1
+
+void test_tc_change_tail(void)
+{
+	LIBBPF_OPTS(bpf_tcx_opts, tcx_opts);
+	struct test_tc_change_tail *skel = NULL;
+	struct bpf_link *link;
+	int c1, p1;
+	char buf[2];
+	int ret;
+
+	skel = test_tc_change_tail__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_tc_change_tail__open_and_load"))
+		return;
+
+	link = bpf_program__attach_tcx(skel->progs.change_tail, LO_IFINDEX,
+				     &tcx_opts);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_tcx"))
+		goto destroy;
+
+	skel->links.change_tail = link;
+	ret = create_pair(AF_INET, SOCK_DGRAM, &c1, &p1);
+	if (!ASSERT_OK(ret, "create_pair"))
+		goto destroy;
+
+	ret = xsend(p1, "Tr", 2, 0);
+	ASSERT_EQ(ret, 2, "xsend(p1)");
+	ret = recv(c1, buf, 2, 0);
+	ASSERT_EQ(ret, 2, "recv(c1)");
+	ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
+
+	ret = xsend(p1, "G", 1, 0);
+	ASSERT_EQ(ret, 1, "xsend(p1)");
+	ret = recv(c1, buf, 2, 0);
+	ASSERT_EQ(ret, 1, "recv(c1)");
+	ASSERT_EQ(skel->data->change_tail_ret, 0, "change_tail_ret");
+
+	ret = xsend(p1, "E", 1, 0);
+	ASSERT_EQ(ret, 1, "xsend(p1)");
+	ret = recv(c1, buf, 1, 0);
+	ASSERT_EQ(ret, 1, "recv(c1)");
+	ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret");
+
+	ret = xsend(p1, "Z", 1, 0);
+	ASSERT_EQ(ret, 1, "xsend(p1)");
+	ret = recv(c1, buf, 1, 0);
+	ASSERT_EQ(ret, 1, "recv(c1)");
+	ASSERT_EQ(skel->data->change_tail_ret, -EINVAL, "change_tail_ret");
+
+	close(c1);
+	close(p1);
+destroy:
+	test_tc_change_tail__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
index 924d0e25320c..d52a62af77bf 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
+++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
@@ -8,34 +8,6 @@
 # define loopback 1
 #endif
 
-static inline __u32 id_from_prog_fd(int fd)
-{
-	struct bpf_prog_info prog_info = {};
-	__u32 prog_info_len = sizeof(prog_info);
-	int err;
-
-	err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len);
-	if (!ASSERT_OK(err, "id_from_prog_fd"))
-		return 0;
-
-	ASSERT_NEQ(prog_info.id, 0, "prog_info.id");
-	return prog_info.id;
-}
-
-static inline __u32 id_from_link_fd(int fd)
-{
-	struct bpf_link_info link_info = {};
-	__u32 link_info_len = sizeof(link_info);
-	int err;
-
-	err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
-	if (!ASSERT_OK(err, "id_from_link_fd"))
-		return 0;
-
-	ASSERT_NEQ(link_info.id, 0, "link_info.id");
-	return link_info.id;
-}
-
 static inline __u32 ifindex_from_link_fd(int fd)
 {
 	struct bpf_link_info link_info = {};
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c
index 1af9ec1149aa..2186a24e7d8a 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_links.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c
@@ -13,7 +13,7 @@
 #include "netlink_helpers.h"
 #include "tc_helpers.h"
 
-void serial_test_tc_links_basic(void)
+void test_ns_tc_links_basic(void)
 {
 	LIBBPF_OPTS(bpf_prog_query_opts, optq);
 	LIBBPF_OPTS(bpf_tcx_opts, optl);
@@ -260,7 +260,7 @@ cleanup:
 	assert_mprog_count(target, 0);
 }
 
-void serial_test_tc_links_before(void)
+void test_ns_tc_links_before(void)
 {
 	test_tc_links_before_target(BPF_TCX_INGRESS);
 	test_tc_links_before_target(BPF_TCX_EGRESS);
@@ -414,7 +414,7 @@ cleanup:
 	assert_mprog_count(target, 0);
 }
 
-void serial_test_tc_links_after(void)
+void test_ns_tc_links_after(void)
 {
 	test_tc_links_after_target(BPF_TCX_INGRESS);
 	test_tc_links_after_target(BPF_TCX_EGRESS);
@@ -514,7 +514,7 @@ cleanup:
 	assert_mprog_count(target, 0);
 }
 
-void serial_test_tc_links_revision(void)
+void test_ns_tc_links_revision(void)
 {
 	test_tc_links_revision_target(BPF_TCX_INGRESS);
 	test_tc_links_revision_target(BPF_TCX_EGRESS);
@@ -618,7 +618,7 @@ cleanup:
 	assert_mprog_count(target, 0);
 }
 
-void serial_test_tc_links_chain_classic(void)
+void test_ns_tc_links_chain_classic(void)
 {
 	test_tc_chain_classic(BPF_TCX_INGRESS, false);
 	test_tc_chain_classic(BPF_TCX_EGRESS, false);
@@ -846,7 +846,7 @@ cleanup:
 	assert_mprog_count(target, 0);
 }
 
-void serial_test_tc_links_replace(void)
+void test_ns_tc_links_replace(void)
 {
 	test_tc_links_replace_target(BPF_TCX_INGRESS);
 	test_tc_links_replace_target(BPF_TCX_EGRESS);
@@ -1158,7 +1158,7 @@ cleanup:
 	assert_mprog_count(target, 0);
 }
 
-void serial_test_tc_links_invalid(void)
+void test_ns_tc_links_invalid(void)
 {
 	test_tc_links_invalid_target(BPF_TCX_INGRESS);
 	test_tc_links_invalid_target(BPF_TCX_EGRESS);
@@ -1314,7 +1314,7 @@ cleanup:
 	assert_mprog_count(target, 0);
 }
 
-void serial_test_tc_links_prepend(void)
+void test_ns_tc_links_prepend(void)
 {
 	test_tc_links_prepend_target(BPF_TCX_INGRESS);
 	test_tc_links_prepend_target(BPF_TCX_EGRESS);
@@ -1470,7 +1470,7 @@ cleanup:
 	assert_mprog_count(target, 0);
 }
 
-void serial_test_tc_links_append(void)
+void test_ns_tc_links_append(void)
 {
 	test_tc_links_append_target(BPF_TCX_INGRESS);
 	test_tc_links_append_target(BPF_TCX_EGRESS);
@@ -1568,7 +1568,7 @@ cleanup:
 	ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
 }
 
-void serial_test_tc_links_dev_cleanup(void)
+void test_ns_tc_links_dev_cleanup(void)
 {
 	test_tc_links_dev_cleanup_target(BPF_TCX_INGRESS);
 	test_tc_links_dev_cleanup_target(BPF_TCX_EGRESS);
@@ -1672,7 +1672,7 @@ cleanup:
 	test_tc_link__destroy(skel);
 }
 
-void serial_test_tc_links_chain_mixed(void)
+void test_ns_tc_links_chain_mixed(void)
 {
 	test_tc_chain_mixed(BPF_TCX_INGRESS);
 	test_tc_chain_mixed(BPF_TCX_EGRESS);
@@ -1782,7 +1782,7 @@ cleanup:
 	assert_mprog_count(target, 0);
 }
 
-void serial_test_tc_links_ingress(void)
+void test_ns_tc_links_ingress(void)
 {
 	test_tc_links_ingress(BPF_TCX_INGRESS, true, true);
 	test_tc_links_ingress(BPF_TCX_INGRESS, true, false);
@@ -1823,7 +1823,7 @@ static int qdisc_replace(int ifindex, const char *kind, bool block)
 	return err;
 }
 
-void serial_test_tc_links_dev_chain0(void)
+void test_ns_tc_links_dev_chain0(void)
 {
 	int err, ifindex;
 
@@ -1955,7 +1955,7 @@ cleanup:
 	ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
 }
 
-void serial_test_tc_links_dev_mixed(void)
+void test_ns_tc_links_dev_mixed(void)
 {
 	test_tc_links_dev_mixed(BPF_TCX_INGRESS);
 	test_tc_links_dev_mixed(BPF_TCX_EGRESS);
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
index 151a4210028f..2461d183dee5 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
@@ -14,10 +14,16 @@
 #include "netlink_helpers.h"
 #include "tc_helpers.h"
 
+#define NETKIT_HEADROOM	32
+#define NETKIT_TAILROOM	8
+
 #define MARK		42
 #define PRIO		0xeb9f
 #define ICMP_ECHO	8
 
+#define FLAG_ADJUST_ROOM (1 << 0)
+#define FLAG_SAME_NETNS  (1 << 1)
+
 struct icmphdr {
 	__u8		type;
 	__u8		code;
@@ -35,7 +41,7 @@ struct iplink_req {
 };
 
 static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
-			 bool same_netns, int scrub, int peer_scrub)
+			 int scrub, int peer_scrub, __u32 flags)
 {
 	struct rtnl_handle rth = { .fd = -1 };
 	struct iplink_req req = {};
@@ -63,6 +69,10 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
 	addattr32(&req.n, sizeof(req), IFLA_NETKIT_SCRUB, scrub);
 	addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_SCRUB, peer_scrub);
 	addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
+	if (flags & FLAG_ADJUST_ROOM) {
+		addattr16(&req.n, sizeof(req), IFLA_NETKIT_HEADROOM, NETKIT_HEADROOM);
+		addattr16(&req.n, sizeof(req), IFLA_NETKIT_TAILROOM, NETKIT_TAILROOM);
+	}
 	addattr_nest_end(&req.n, data);
 	addattr_nest_end(&req.n, linkinfo);
 
@@ -87,7 +97,7 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
 				 " addr ee:ff:bb:cc:aa:dd"),
 				 "set hwaddress");
 	}
-	if (same_netns) {
+	if (flags & FLAG_SAME_NETNS) {
 		ASSERT_OK(system("ip link set dev " netkit_peer " up"),
 				 "up peer");
 		ASSERT_OK(system("ip addr add dev " netkit_peer " 10.0.0.2/24"),
@@ -184,8 +194,8 @@ void serial_test_tc_netkit_basic(void)
 	int err, ifindex;
 
 	err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
-			    &ifindex, false, NETKIT_SCRUB_DEFAULT,
-			    NETKIT_SCRUB_DEFAULT);
+			    &ifindex, NETKIT_SCRUB_DEFAULT,
+			    NETKIT_SCRUB_DEFAULT, 0);
 	if (err)
 		return;
 
@@ -299,8 +309,8 @@ static void serial_test_tc_netkit_multi_links_target(int mode, int target)
 	int err, ifindex;
 
 	err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
-			    &ifindex, false, NETKIT_SCRUB_DEFAULT,
-			    NETKIT_SCRUB_DEFAULT);
+			    &ifindex, NETKIT_SCRUB_DEFAULT,
+			    NETKIT_SCRUB_DEFAULT, 0);
 	if (err)
 		return;
 
@@ -428,8 +438,8 @@ static void serial_test_tc_netkit_multi_opts_target(int mode, int target)
 	int err, ifindex;
 
 	err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
-			    &ifindex, false, NETKIT_SCRUB_DEFAULT,
-			    NETKIT_SCRUB_DEFAULT);
+			    &ifindex, NETKIT_SCRUB_DEFAULT,
+			    NETKIT_SCRUB_DEFAULT, 0);
 	if (err)
 		return;
 
@@ -543,8 +553,8 @@ void serial_test_tc_netkit_device(void)
 	int err, ifindex, ifindex2;
 
 	err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS,
-			    &ifindex, true, NETKIT_SCRUB_DEFAULT,
-			    NETKIT_SCRUB_DEFAULT);
+			    &ifindex, NETKIT_SCRUB_DEFAULT,
+			    NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS);
 	if (err)
 		return;
 
@@ -655,8 +665,8 @@ static void serial_test_tc_netkit_neigh_links_target(int mode, int target)
 	int err, ifindex;
 
 	err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
-			    &ifindex, false, NETKIT_SCRUB_DEFAULT,
-			    NETKIT_SCRUB_DEFAULT);
+			    &ifindex, NETKIT_SCRUB_DEFAULT,
+			    NETKIT_SCRUB_DEFAULT, 0);
 	if (err)
 		return;
 
@@ -733,8 +743,8 @@ static void serial_test_tc_netkit_pkt_type_mode(int mode)
 	struct bpf_link *link;
 
 	err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
-			    &ifindex, true, NETKIT_SCRUB_DEFAULT,
-			    NETKIT_SCRUB_DEFAULT);
+			    &ifindex, NETKIT_SCRUB_DEFAULT,
+			    NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS);
 	if (err)
 		return;
 
@@ -799,7 +809,7 @@ void serial_test_tc_netkit_pkt_type(void)
 	serial_test_tc_netkit_pkt_type_mode(NETKIT_L3);
 }
 
-static void serial_test_tc_netkit_scrub_type(int scrub)
+static void serial_test_tc_netkit_scrub_type(int scrub, bool room)
 {
 	LIBBPF_OPTS(bpf_netkit_opts, optl);
 	struct test_tc_link *skel;
@@ -807,7 +817,8 @@ static void serial_test_tc_netkit_scrub_type(int scrub)
 	int err, ifindex;
 
 	err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
-			    &ifindex, false, scrub, scrub);
+			    &ifindex, scrub, scrub,
+			    room ? FLAG_ADJUST_ROOM : 0);
 	if (err)
 		return;
 
@@ -842,6 +853,8 @@ static void serial_test_tc_netkit_scrub_type(int scrub)
 	ASSERT_EQ(skel->bss->seen_tc8, true, "seen_tc8");
 	ASSERT_EQ(skel->bss->mark, scrub == NETKIT_SCRUB_NONE ? MARK : 0, "mark");
 	ASSERT_EQ(skel->bss->prio, scrub == NETKIT_SCRUB_NONE ? PRIO : 0, "prio");
+	ASSERT_EQ(skel->bss->headroom, room ? NETKIT_HEADROOM : 0, "headroom");
+	ASSERT_EQ(skel->bss->tailroom, room ? NETKIT_TAILROOM : 0, "tailroom");
 cleanup:
 	test_tc_link__destroy(skel);
 
@@ -852,6 +865,6 @@ cleanup:
 
 void serial_test_tc_netkit_scrub(void)
 {
-	serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT);
-	serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE);
+	serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT, false);
+	serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE, true);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
index f77f604389aa..dd7a138d8c3d 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_opts.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
@@ -10,7 +10,7 @@
 #include "test_tc_link.skel.h"
 #include "tc_helpers.h"
 
-void serial_test_tc_opts_basic(void)
+void test_ns_tc_opts_basic(void)
 {
 	LIBBPF_OPTS(bpf_prog_attach_opts, opta);
 	LIBBPF_OPTS(bpf_prog_detach_opts, optd);
@@ -254,7 +254,7 @@ cleanup:
 	test_tc_link__destroy(skel);
 }
 
-void serial_test_tc_opts_before(void)
+void test_ns_tc_opts_before(void)
 {
 	test_tc_opts_before_target(BPF_TCX_INGRESS);
 	test_tc_opts_before_target(BPF_TCX_EGRESS);
@@ -445,7 +445,7 @@ cleanup:
 	test_tc_link__destroy(skel);
 }
 
-void serial_test_tc_opts_after(void)
+void test_ns_tc_opts_after(void)
 {
 	test_tc_opts_after_target(BPF_TCX_INGRESS);
 	test_tc_opts_after_target(BPF_TCX_EGRESS);
@@ -554,7 +554,7 @@ cleanup:
 	test_tc_link__destroy(skel);
 }
 
-void serial_test_tc_opts_revision(void)
+void test_ns_tc_opts_revision(void)
 {
 	test_tc_opts_revision_target(BPF_TCX_INGRESS);
 	test_tc_opts_revision_target(BPF_TCX_EGRESS);
@@ -655,7 +655,7 @@ cleanup:
 	assert_mprog_count(target, 0);
 }
 
-void serial_test_tc_opts_chain_classic(void)
+void test_ns_tc_opts_chain_classic(void)
 {
 	test_tc_chain_classic(BPF_TCX_INGRESS, false);
 	test_tc_chain_classic(BPF_TCX_EGRESS, false);
@@ -864,7 +864,7 @@ cleanup:
 	test_tc_link__destroy(skel);
 }
 
-void serial_test_tc_opts_replace(void)
+void test_ns_tc_opts_replace(void)
 {
 	test_tc_opts_replace_target(BPF_TCX_INGRESS);
 	test_tc_opts_replace_target(BPF_TCX_EGRESS);
@@ -1017,7 +1017,7 @@ cleanup:
 	test_tc_link__destroy(skel);
 }
 
-void serial_test_tc_opts_invalid(void)
+void test_ns_tc_opts_invalid(void)
 {
 	test_tc_opts_invalid_target(BPF_TCX_INGRESS);
 	test_tc_opts_invalid_target(BPF_TCX_EGRESS);
@@ -1157,7 +1157,7 @@ cleanup:
 	test_tc_link__destroy(skel);
 }
 
-void serial_test_tc_opts_prepend(void)
+void test_ns_tc_opts_prepend(void)
 {
 	test_tc_opts_prepend_target(BPF_TCX_INGRESS);
 	test_tc_opts_prepend_target(BPF_TCX_EGRESS);
@@ -1297,7 +1297,7 @@ cleanup:
 	test_tc_link__destroy(skel);
 }
 
-void serial_test_tc_opts_append(void)
+void test_ns_tc_opts_append(void)
 {
 	test_tc_opts_append_target(BPF_TCX_INGRESS);
 	test_tc_opts_append_target(BPF_TCX_EGRESS);
@@ -1387,7 +1387,7 @@ cleanup:
 	ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
 }
 
-void serial_test_tc_opts_dev_cleanup(void)
+void test_ns_tc_opts_dev_cleanup(void)
 {
 	test_tc_opts_dev_cleanup_target(BPF_TCX_INGRESS);
 	test_tc_opts_dev_cleanup_target(BPF_TCX_EGRESS);
@@ -1563,7 +1563,7 @@ cleanup:
 	assert_mprog_count(target, 0);
 }
 
-void serial_test_tc_opts_mixed(void)
+void test_ns_tc_opts_mixed(void)
 {
 	test_tc_opts_mixed_target(BPF_TCX_INGRESS);
 	test_tc_opts_mixed_target(BPF_TCX_EGRESS);
@@ -1642,7 +1642,7 @@ cleanup:
 	assert_mprog_count(target, 0);
 }
 
-void serial_test_tc_opts_demixed(void)
+void test_ns_tc_opts_demixed(void)
 {
 	test_tc_opts_demixed_target(BPF_TCX_INGRESS);
 	test_tc_opts_demixed_target(BPF_TCX_EGRESS);
@@ -1813,7 +1813,7 @@ cleanup:
 	test_tc_link__destroy(skel);
 }
 
-void serial_test_tc_opts_detach(void)
+void test_ns_tc_opts_detach(void)
 {
 	test_tc_opts_detach_target(BPF_TCX_INGRESS);
 	test_tc_opts_detach_target(BPF_TCX_EGRESS);
@@ -2020,7 +2020,7 @@ cleanup:
 	test_tc_link__destroy(skel);
 }
 
-void serial_test_tc_opts_detach_before(void)
+void test_ns_tc_opts_detach_before(void)
 {
 	test_tc_opts_detach_before_target(BPF_TCX_INGRESS);
 	test_tc_opts_detach_before_target(BPF_TCX_EGRESS);
@@ -2236,7 +2236,7 @@ cleanup:
 	test_tc_link__destroy(skel);
 }
 
-void serial_test_tc_opts_detach_after(void)
+void test_ns_tc_opts_detach_after(void)
 {
 	test_tc_opts_detach_after_target(BPF_TCX_INGRESS);
 	test_tc_opts_detach_after_target(BPF_TCX_EGRESS);
@@ -2265,7 +2265,7 @@ static void test_tc_opts_delete_empty(int target, bool chain_tc_old)
 	assert_mprog_count(target, 0);
 }
 
-void serial_test_tc_opts_delete_empty(void)
+void test_ns_tc_opts_delete_empty(void)
 {
 	test_tc_opts_delete_empty(BPF_TCX_INGRESS, false);
 	test_tc_opts_delete_empty(BPF_TCX_EGRESS, false);
@@ -2372,7 +2372,7 @@ cleanup:
 	test_tc_link__destroy(skel);
 }
 
-void serial_test_tc_opts_chain_mixed(void)
+void test_ns_tc_opts_chain_mixed(void)
 {
 	test_tc_chain_mixed(BPF_TCX_INGRESS);
 	test_tc_chain_mixed(BPF_TCX_EGRESS);
@@ -2446,7 +2446,7 @@ cleanup:
 	ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
 }
 
-void serial_test_tc_opts_max(void)
+void test_ns_tc_opts_max(void)
 {
 	test_tc_opts_max_target(BPF_TCX_INGRESS, 0, false);
 	test_tc_opts_max_target(BPF_TCX_EGRESS, 0, false);
@@ -2748,7 +2748,7 @@ cleanup:
 	test_tc_link__destroy(skel);
 }
 
-void serial_test_tc_opts_query(void)
+void test_ns_tc_opts_query(void)
 {
 	test_tc_opts_query_target(BPF_TCX_INGRESS);
 	test_tc_opts_query_target(BPF_TCX_EGRESS);
@@ -2807,7 +2807,7 @@ cleanup:
 	test_tc_link__destroy(skel);
 }
 
-void serial_test_tc_opts_query_attach(void)
+void test_ns_tc_opts_query_attach(void)
 {
 	test_tc_opts_query_attach_target(BPF_TCX_INGRESS);
 	test_tc_opts_query_attach_target(BPF_TCX_EGRESS);
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index c85798966aec..76d72a59365e 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -56,6 +56,8 @@
 
 #define MAC_DST_FWD "00:11:22:33:44:55"
 #define MAC_DST "00:22:33:44:55:66"
+#define MAC_SRC_FWD "00:33:44:55:66:77"
+#define MAC_SRC "00:44:55:66:77:88"
 
 #define IFADDR_STR_LEN 18
 #define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
@@ -207,11 +209,10 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result)
 	int err;
 
 	if (result->dev_mode == MODE_VETH) {
-		SYS(fail, "ip link add src type veth peer name src_fwd");
-		SYS(fail, "ip link add dst type veth peer name dst_fwd");
-
-		SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
-		SYS(fail, "ip link set dst address " MAC_DST);
+		SYS(fail, "ip link add src address " MAC_SRC " type veth "
+			  "peer name src_fwd address " MAC_SRC_FWD);
+		SYS(fail, "ip link add dst address " MAC_DST " type veth "
+			  "peer name dst_fwd address " MAC_DST_FWD);
 	} else if (result->dev_mode == MODE_NETKIT) {
 		err = create_netkit(NETKIT_L3, "src", "src_fwd");
 		if (!ASSERT_OK(err, "create_ifindex_src"))
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c
new file mode 100644
index 000000000000..de22734abc4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_smc.c
@@ -0,0 +1,390 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <linux/genetlink.h>
+#include "network_helpers.h"
+#include "bpf_smc.skel.h"
+
+#ifndef IPPROTO_SMC
+#define IPPROTO_SMC 256
+#endif
+
+#define CLIENT_IP			"127.0.0.1"
+#define SERVER_IP			"127.0.1.0"
+#define SERVER_IP_VIA_RISK_PATH	"127.0.2.0"
+
+#define SERVICE_1	80
+#define SERVICE_2	443
+#define SERVICE_3	8443
+
+#define TEST_NS	"bpf_smc_netns"
+
+static struct netns_obj *test_netns;
+
+struct smc_policy_ip_key {
+	__u32  sip;
+	__u32  dip;
+};
+
+struct smc_policy_ip_value {
+	__u8	mode;
+};
+
+#if defined(__s390x__)
+/* s390x has default seid  */
+static bool setup_ueid(void) { return true; }
+static void cleanup_ueid(void) {}
+#else
+enum {
+	SMC_NETLINK_ADD_UEID = 10,
+	SMC_NETLINK_REMOVE_UEID
+};
+
+enum {
+	SMC_NLA_EID_TABLE_UNSPEC,
+	SMC_NLA_EID_TABLE_ENTRY,    /* string */
+};
+
+struct msgtemplate {
+	struct nlmsghdr n;
+	struct genlmsghdr g;
+	char buf[1024];
+};
+
+#define GENLMSG_DATA(glh)	((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
+#define GENLMSG_PAYLOAD(glh)	(NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
+#define NLA_DATA(na)		((void *)((char *)(na) + NLA_HDRLEN))
+#define NLA_PAYLOAD(len)	((len) - NLA_HDRLEN)
+
+#define SMC_GENL_FAMILY_NAME	"SMC_GEN_NETLINK"
+#define SMC_BPFTEST_UEID	"SMC-BPFTEST-UEID"
+
+static uint16_t smc_nl_family_id = -1;
+
+static int send_cmd(int fd, __u16 nlmsg_type, __u32 nlmsg_pid,
+		    __u16 nlmsg_flags, __u8 genl_cmd, __u16 nla_type,
+		    void *nla_data, int nla_len)
+{
+	struct nlattr *na;
+	struct sockaddr_nl nladdr;
+	int r, buflen;
+	char *buf;
+
+	struct msgtemplate msg = {0};
+
+	msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+	msg.n.nlmsg_type = nlmsg_type;
+	msg.n.nlmsg_flags = nlmsg_flags;
+	msg.n.nlmsg_seq = 0;
+	msg.n.nlmsg_pid = nlmsg_pid;
+	msg.g.cmd = genl_cmd;
+	msg.g.version = 1;
+	na = (struct nlattr *)GENLMSG_DATA(&msg);
+	na->nla_type = nla_type;
+	na->nla_len = nla_len + 1 + NLA_HDRLEN;
+	memcpy(NLA_DATA(na), nla_data, nla_len);
+	msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
+
+	buf = (char *)&msg;
+	buflen = msg.n.nlmsg_len;
+	memset(&nladdr, 0, sizeof(nladdr));
+	nladdr.nl_family = AF_NETLINK;
+
+	while ((r = sendto(fd, buf, buflen, 0, (struct sockaddr *)&nladdr,
+			   sizeof(nladdr))) < buflen) {
+		if (r > 0) {
+			buf += r;
+			buflen -= r;
+		} else if (errno != EAGAIN) {
+			return -1;
+		}
+	}
+	return 0;
+}
+
+static bool get_smc_nl_family_id(void)
+{
+	struct sockaddr_nl nl_src;
+	struct msgtemplate msg;
+	struct nlattr *nl;
+	int fd, ret;
+	pid_t pid;
+
+	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+	if (!ASSERT_OK_FD(fd, "nl_family socket"))
+		return false;
+
+	pid = getpid();
+
+	memset(&nl_src, 0, sizeof(nl_src));
+	nl_src.nl_family = AF_NETLINK;
+	nl_src.nl_pid = pid;
+
+	ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src));
+	if (!ASSERT_OK(ret, "nl_family bind"))
+		goto fail;
+
+	ret = send_cmd(fd, GENL_ID_CTRL, pid,
+		       NLM_F_REQUEST, CTRL_CMD_GETFAMILY,
+		       CTRL_ATTR_FAMILY_NAME, (void *)SMC_GENL_FAMILY_NAME,
+		       strlen(SMC_GENL_FAMILY_NAME));
+	if (!ASSERT_OK(ret, "nl_family query"))
+		goto fail;
+
+	ret = recv(fd, &msg, sizeof(msg), 0);
+	if (!ASSERT_FALSE(msg.n.nlmsg_type == NLMSG_ERROR || ret < 0 ||
+			  !NLMSG_OK(&msg.n, ret), "nl_family response"))
+		goto fail;
+
+	nl = (struct nlattr *)GENLMSG_DATA(&msg);
+	nl = (struct nlattr *)((char *)nl + NLA_ALIGN(nl->nla_len));
+	if (!ASSERT_EQ(nl->nla_type, CTRL_ATTR_FAMILY_ID, "nl_family nla type"))
+		goto fail;
+
+	smc_nl_family_id = *(uint16_t *)NLA_DATA(nl);
+	close(fd);
+	return true;
+fail:
+	close(fd);
+	return false;
+}
+
+static bool smc_ueid(int op)
+{
+	struct sockaddr_nl nl_src;
+	struct msgtemplate msg;
+	struct nlmsgerr *err;
+	char test_ueid[32];
+	int fd, ret;
+	pid_t pid;
+
+	/* UEID required */
+	memset(test_ueid, '\x20', sizeof(test_ueid));
+	memcpy(test_ueid, SMC_BPFTEST_UEID, strlen(SMC_BPFTEST_UEID));
+	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+	if (!ASSERT_OK_FD(fd, "ueid socket"))
+		return false;
+
+	pid = getpid();
+	memset(&nl_src, 0, sizeof(nl_src));
+	nl_src.nl_family = AF_NETLINK;
+	nl_src.nl_pid = pid;
+
+	ret = bind(fd, (struct sockaddr *)&nl_src, sizeof(nl_src));
+	if (!ASSERT_OK(ret, "ueid bind"))
+		goto fail;
+
+	ret = send_cmd(fd, smc_nl_family_id, pid,
+		       NLM_F_REQUEST | NLM_F_ACK, op, SMC_NLA_EID_TABLE_ENTRY,
+		       (void *)test_ueid, sizeof(test_ueid));
+	if (!ASSERT_OK(ret, "ueid cmd"))
+		goto fail;
+
+	ret = recv(fd, &msg, sizeof(msg), 0);
+	if (!ASSERT_FALSE(ret < 0 ||
+			  !NLMSG_OK(&msg.n, ret), "ueid response"))
+		goto fail;
+
+	if (msg.n.nlmsg_type == NLMSG_ERROR) {
+		err = NLMSG_DATA(&msg);
+		switch (op) {
+		case SMC_NETLINK_REMOVE_UEID:
+			if (!ASSERT_FALSE((err->error && err->error != -ENOENT),
+					  "ueid remove"))
+				goto fail;
+			break;
+		case SMC_NETLINK_ADD_UEID:
+			if (!ASSERT_OK(err->error, "ueid add"))
+				goto fail;
+			break;
+		default:
+			break;
+		}
+	}
+	close(fd);
+	return true;
+fail:
+	close(fd);
+	return false;
+}
+
+static bool setup_ueid(void)
+{
+	/* get smc nl id */
+	if (!get_smc_nl_family_id())
+		return false;
+	/* clear old ueid for bpftest */
+	smc_ueid(SMC_NETLINK_REMOVE_UEID);
+	/* smc-loopback required ueid */
+	return smc_ueid(SMC_NETLINK_ADD_UEID);
+}
+
+static void cleanup_ueid(void)
+{
+	smc_ueid(SMC_NETLINK_REMOVE_UEID);
+}
+#endif /* __s390x__ */
+
+static bool setup_netns(void)
+{
+	test_netns = netns_new(TEST_NS, true);
+	if (!ASSERT_OK_PTR(test_netns, "open net namespace"))
+		goto fail_netns;
+
+	SYS(fail_ip, "ip addr add 127.0.1.0/8 dev lo");
+	SYS(fail_ip, "ip addr add 127.0.2.0/8 dev lo");
+
+	return true;
+fail_ip:
+	netns_free(test_netns);
+fail_netns:
+	return false;
+}
+
+static void cleanup_netns(void)
+{
+	netns_free(test_netns);
+}
+
+static bool setup_smc(void)
+{
+	if (!setup_ueid())
+		return false;
+
+	if (!setup_netns())
+		goto fail_netns;
+
+	return true;
+fail_netns:
+	cleanup_ueid();
+	return false;
+}
+
+static int set_client_addr_cb(int fd, void *opts)
+{
+	const char *src = (const char *)opts;
+	struct sockaddr_in localaddr;
+
+	localaddr.sin_family = AF_INET;
+	localaddr.sin_port = htons(0);
+	localaddr.sin_addr.s_addr = inet_addr(src);
+	return !ASSERT_OK(bind(fd, &localaddr, sizeof(localaddr)), "client bind");
+}
+
+static void run_link(const char *src, const char *dst, int port)
+{
+	struct network_helper_opts opts = {0};
+	int server, client;
+
+	server = start_server_str(AF_INET, SOCK_STREAM, dst, port, NULL);
+	if (!ASSERT_OK_FD(server, "start service_1"))
+		return;
+
+	opts.proto = IPPROTO_TCP;
+	opts.post_socket_cb = set_client_addr_cb;
+	opts.cb_opts = (void *)src;
+
+	client = connect_to_fd_opts(server, &opts);
+	if (!ASSERT_OK_FD(client, "start connect"))
+		goto fail_client;
+
+	close(client);
+fail_client:
+	close(server);
+}
+
+static void block_link(int map_fd, const char *src, const char *dst)
+{
+	struct smc_policy_ip_value val = { .mode = /* block */ 0 };
+	struct smc_policy_ip_key key = {
+		.sip = inet_addr(src),
+		.dip = inet_addr(dst),
+	};
+
+	bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
+}
+
+/*
+ * This test describes a real-life service topology as follows:
+ *
+ *                             +-------------> service_1
+ *            link 1           |                     |
+ *   +--------------------> server                   |  link 2
+ *   |                         |                     V
+ *   |                         +-------------> service_2
+ *   |        link 3
+ *  client -------------------> server_via_unsafe_path -> service_3
+ *
+ * Among them,
+ * 1. link-1 is very suitable for using SMC.
+ * 2. link-2 is not suitable for using SMC, because the mode of this link is
+ *    kind of short-link services.
+ * 3. link-3 is also not suitable for using SMC, because the RDMA link is
+ *    unavailable and needs to go through a long timeout before it can fallback
+ *    to TCP.
+ * To achieve this goal, we use a customized SMC ip strategy via smc_hs_ctrl.
+ */
+static void test_topo(void)
+{
+	struct bpf_smc *skel;
+	int rc, map_fd;
+
+	skel = bpf_smc__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "bpf_smc__open_and_load"))
+		return;
+
+	rc = bpf_smc__attach(skel);
+	if (!ASSERT_OK(rc, "bpf_smc__attach"))
+		goto fail;
+
+	map_fd = bpf_map__fd(skel->maps.smc_policy_ip);
+	if (!ASSERT_OK_FD(map_fd, "bpf_map__fd"))
+		goto fail;
+
+	/* Mock the process of transparent replacement, since we will modify
+	 * protocol to ipproto_smc accropding to it via
+	 * fmod_ret/update_socket_protocol.
+	 */
+	write_sysctl("/proc/sys/net/smc/hs_ctrl", "linkcheck");
+
+	/* Configure ip strat */
+	block_link(map_fd, CLIENT_IP, SERVER_IP_VIA_RISK_PATH);
+	block_link(map_fd, SERVER_IP, SERVER_IP);
+
+	/* should go with smc */
+	run_link(CLIENT_IP, SERVER_IP, SERVICE_1);
+	/* should go with smc fallback */
+	run_link(SERVER_IP, SERVER_IP, SERVICE_2);
+
+	ASSERT_EQ(skel->bss->smc_cnt, 2, "smc count");
+	ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count");
+
+	/* should go with smc */
+	run_link(CLIENT_IP, SERVER_IP, SERVICE_2);
+
+	ASSERT_EQ(skel->bss->smc_cnt, 3, "smc count");
+	ASSERT_EQ(skel->bss->fallback_cnt, 1, "fallback count");
+
+	/* should go with smc fallback */
+	run_link(CLIENT_IP, SERVER_IP_VIA_RISK_PATH, SERVICE_3);
+
+	ASSERT_EQ(skel->bss->smc_cnt, 4, "smc count");
+	ASSERT_EQ(skel->bss->fallback_cnt, 2, "fallback count");
+
+fail:
+	bpf_smc__destroy(skel);
+}
+
+void test_bpf_smc(void)
+{
+	if (!setup_smc()) {
+		printf("setup for smc test failed, test SKIP:\n");
+		test__skip();
+		return;
+	}
+
+	if (test__start_subtest("topo"))
+		test_topo();
+
+	cleanup_ueid();
+	cleanup_netns();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c
new file mode 100644
index 000000000000..7d1b478c99a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_btf_ext.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms Inc. */
+#include <test_progs.h>
+#include "test_btf_ext.skel.h"
+#include "btf_helpers.h"
+
+static void subtest_line_func_info(void)
+{
+	struct test_btf_ext *skel;
+	struct bpf_prog_info info;
+	struct bpf_line_info line_info[128], *libbpf_line_info;
+	struct bpf_func_info func_info[128], *libbpf_func_info;
+	__u32 info_len = sizeof(info), libbbpf_line_info_cnt, libbbpf_func_info_cnt;
+	int err, fd;
+
+	skel = test_btf_ext__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		return;
+
+	fd = bpf_program__fd(skel->progs.global_func);
+
+	memset(&info, 0, sizeof(info));
+	info.line_info = ptr_to_u64(&line_info);
+	info.nr_line_info = sizeof(line_info);
+	info.line_info_rec_size = sizeof(*line_info);
+	err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+	if (!ASSERT_OK(err, "prog_line_info"))
+		goto out;
+
+	libbpf_line_info = bpf_program__line_info(skel->progs.global_func);
+	libbbpf_line_info_cnt = bpf_program__line_info_cnt(skel->progs.global_func);
+
+	memset(&info, 0, sizeof(info));
+	info.func_info = ptr_to_u64(&func_info);
+	info.nr_func_info = sizeof(func_info);
+	info.func_info_rec_size = sizeof(*func_info);
+	err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+	if (!ASSERT_OK(err, "prog_func_info"))
+		goto out;
+
+	libbpf_func_info = bpf_program__func_info(skel->progs.global_func);
+	libbbpf_func_info_cnt = bpf_program__func_info_cnt(skel->progs.global_func);
+
+	if (!ASSERT_OK_PTR(libbpf_line_info, "bpf_program__line_info"))
+		goto out;
+	if (!ASSERT_EQ(libbbpf_line_info_cnt, info.nr_line_info, "line_info_cnt"))
+		goto out;
+	if (!ASSERT_OK_PTR(libbpf_func_info, "bpf_program__func_info"))
+		goto out;
+	if (!ASSERT_EQ(libbbpf_func_info_cnt, info.nr_func_info, "func_info_cnt"))
+		goto out;
+	ASSERT_MEMEQ(libbpf_line_info, line_info, libbbpf_line_info_cnt * sizeof(*line_info),
+		     "line_info");
+	ASSERT_MEMEQ(libbpf_func_info, func_info, libbbpf_func_info_cnt * sizeof(*func_info),
+		     "func_info");
+out:
+	test_btf_ext__destroy(skel);
+}
+
+void test_btf_ext(void)
+{
+	if (test__start_subtest("line_func_info"))
+		subtest_line_func_info();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
index 2a27f3714f5c..bdc4fc06bc5a 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -139,7 +139,7 @@ static void test_lsm_tailcall(void)
 	if (CHECK_FAIL(!err))
 		goto close_prog;
 
-	prog_fd = bpf_program__fd(skel->progs.lsm_file_alloc_security_prog);
+	prog_fd = bpf_program__fd(skel->progs.lsm_kernfs_init_security_prog);
 	if (CHECK_FAIL(prog_fd < 0))
 		goto close_prog;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c
new file mode 100644
index 000000000000..fd8762ba4b67
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "struct_ops_id_ops_mapping1.skel.h"
+#include "struct_ops_id_ops_mapping2.skel.h"
+
+static void test_st_ops_id_ops_mapping(void)
+{
+	struct struct_ops_id_ops_mapping1 *skel1 = NULL;
+	struct struct_ops_id_ops_mapping2 *skel2 = NULL;
+	struct bpf_map_info info = {};
+	__u32 len = sizeof(info);
+	int err, pid, prog1_fd, prog2_fd;
+
+	skel1 = struct_ops_id_ops_mapping1__open_and_load();
+	if (!ASSERT_OK_PTR(skel1, "struct_ops_id_ops_mapping1__open"))
+		goto out;
+
+	skel2 = struct_ops_id_ops_mapping2__open_and_load();
+	if (!ASSERT_OK_PTR(skel2, "struct_ops_id_ops_mapping2__open"))
+		goto out;
+
+	err = bpf_map_get_info_by_fd(bpf_map__fd(skel1->maps.st_ops_map),
+				     &info, &len);
+	if (!ASSERT_OK(err, "bpf_map_get_info_by_fd"))
+		goto out;
+
+	skel1->bss->st_ops_id = info.id;
+
+	err = bpf_map_get_info_by_fd(bpf_map__fd(skel2->maps.st_ops_map),
+				     &info, &len);
+	if (!ASSERT_OK(err, "bpf_map_get_info_by_fd"))
+		goto out;
+
+	skel2->bss->st_ops_id = info.id;
+
+	err = struct_ops_id_ops_mapping1__attach(skel1);
+	if (!ASSERT_OK(err, "struct_ops_id_ops_mapping1__attach"))
+		goto out;
+
+	err = struct_ops_id_ops_mapping2__attach(skel2);
+	if (!ASSERT_OK(err, "struct_ops_id_ops_mapping2__attach"))
+		goto out;
+
+	/* run tracing prog that calls .test_1 and checks return */
+	pid = getpid();
+	skel1->bss->test_pid = pid;
+	skel2->bss->test_pid = pid;
+	sys_gettid();
+	skel1->bss->test_pid = 0;
+	skel2->bss->test_pid = 0;
+
+	/* run syscall_prog that calls .test_1 and checks return */
+	prog1_fd = bpf_program__fd(skel1->progs.syscall_prog);
+	err = bpf_prog_test_run_opts(prog1_fd, NULL);
+	ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+	prog2_fd = bpf_program__fd(skel2->progs.syscall_prog);
+	err = bpf_prog_test_run_opts(prog2_fd, NULL);
+	ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+	ASSERT_EQ(skel1->bss->test_err, 0, "skel1->bss->test_err");
+	ASSERT_EQ(skel2->bss->test_err, 0, "skel2->bss->test_err");
+
+out:
+	struct_ops_id_ops_mapping1__destroy(skel1);
+	struct_ops_id_ops_mapping2__destroy(skel2);
+}
+
+void test_struct_ops_id_ops_mapping(void)
+{
+	if (test__start_subtest("st_ops_id_ops_mapping"))
+		test_st_ops_id_ops_mapping();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c
new file mode 100644
index 000000000000..467cc72a3588
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_kptr_return.c
@@ -0,0 +1,16 @@
+#include <test_progs.h>
+
+#include "struct_ops_kptr_return.skel.h"
+#include "struct_ops_kptr_return_fail__wrong_type.skel.h"
+#include "struct_ops_kptr_return_fail__invalid_scalar.skel.h"
+#include "struct_ops_kptr_return_fail__nonzero_offset.skel.h"
+#include "struct_ops_kptr_return_fail__local_kptr.skel.h"
+
+void test_struct_ops_kptr_return(void)
+{
+	RUN_TESTS(struct_ops_kptr_return);
+	RUN_TESTS(struct_ops_kptr_return_fail__wrong_type);
+	RUN_TESTS(struct_ops_kptr_return_fail__invalid_scalar);
+	RUN_TESTS(struct_ops_kptr_return_fail__nonzero_offset);
+	RUN_TESTS(struct_ops_kptr_return_fail__local_kptr);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c
new file mode 100644
index 000000000000..da60c715fc59
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_refcounted.c
@@ -0,0 +1,14 @@
+#include <test_progs.h>
+
+#include "struct_ops_refcounted.skel.h"
+#include "struct_ops_refcounted_fail__ref_leak.skel.h"
+#include "struct_ops_refcounted_fail__global_subprog.skel.h"
+#include "struct_ops_refcounted_fail__tail_call.skel.h"
+
+void test_struct_ops_refcounted(void)
+{
+	RUN_TESTS(struct_ops_refcounted);
+	RUN_TESTS(struct_ops_refcounted_fail__ref_leak);
+	RUN_TESTS(struct_ops_refcounted_fail__global_subprog);
+	RUN_TESTS(struct_ops_refcounted_fail__tail_call);
+}
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c
index bcdbd27f22f0..273dd41ca09e 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_sysctl.c
@@ -1,22 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Facebook
 
-#include <fcntl.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <linux/filter.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include <bpf/bpf_endian.h>
-#include "bpf_util.h"
+#include "test_progs.h"
 #include "cgroup_helpers.h"
-#include "testing_helpers.h"
 
 #define CG_PATH			"/foo"
 #define MAX_INSNS		512
@@ -1608,26 +1594,19 @@ static int run_tests(int cgfd)
 	return fails ? -1 : 0;
 }
 
-int main(int argc, char **argv)
+void test_sysctl(void)
 {
-	int cgfd = -1;
-	int err = 0;
+	int cgfd;
 
 	cgfd = cgroup_setup_and_join(CG_PATH);
-	if (cgfd < 0)
-		goto err;
+	if (!ASSERT_OK_FD(cgfd < 0, "create_cgroup"))
+		goto out;
 
-	/* Use libbpf 1.0 API mode */
-	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+	if (!ASSERT_OK(run_tests(cgfd), "run_tests"))
+		goto out;
 
-	if (run_tests(cgfd))
-		goto err;
-
-	goto out;
-err:
-	err = -1;
 out:
 	close(cgfd);
 	cleanup_cgroup_environment();
-	return err;
+	return;
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
new file mode 100644
index 000000000000..9fd6306b455c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c
@@ -0,0 +1,297 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <pthread.h>
+#include <bpf/btf.h>
+#include <test_progs.h>
+
+#define TLD_FREE_DATA_ON_THREAD_EXIT
+#define TLD_DYN_DATA_SIZE 4096
+#include "task_local_data.h"
+
+struct test_tld_struct {
+	__u64 a;
+	__u64 b;
+	__u64 c;
+	__u64 d;
+};
+
+#include "test_task_local_data.skel.h"
+
+TLD_DEFINE_KEY(value0_key, "value0", sizeof(int));
+
+/*
+ * Reset task local data between subtests by clearing metadata other
+ * than the statically defined value0. This is safe as subtests run
+ * sequentially. Users of task local data library should not touch
+ * library internal.
+ */
+static void reset_tld(void)
+{
+	if (TLD_READ_ONCE(tld_meta_p)) {
+		/* Remove TLDs created by tld_create_key() */
+		tld_meta_p->cnt = 1;
+		tld_meta_p->size = TLD_DYN_DATA_SIZE;
+		memset(&tld_meta_p->metadata[1], 0,
+		       (TLD_MAX_DATA_CNT - 1) * sizeof(struct tld_metadata));
+	}
+}
+
+/* Serialize access to bpf program's global variables */
+static pthread_mutex_t global_mutex;
+
+static tld_key_t *tld_keys;
+
+#define TEST_BASIC_THREAD_NUM 32
+
+void *test_task_local_data_basic_thread(void *arg)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts);
+	struct test_task_local_data *skel = (struct test_task_local_data *)arg;
+	int fd, err, tid, *value0, *value1;
+	struct test_tld_struct *value2;
+
+	fd = bpf_map__fd(skel->maps.tld_data_map);
+
+	value0 = tld_get_data(fd, value0_key);
+	if (!ASSERT_OK_PTR(value0, "tld_get_data"))
+		goto out;
+
+	value1 = tld_get_data(fd, tld_keys[1]);
+	if (!ASSERT_OK_PTR(value1, "tld_get_data"))
+		goto out;
+
+	value2 = tld_get_data(fd, tld_keys[2]);
+	if (!ASSERT_OK_PTR(value2, "tld_get_data"))
+		goto out;
+
+	tid = sys_gettid();
+
+	*value0 = tid + 0;
+	*value1 = tid + 1;
+	value2->a = tid + 2;
+	value2->b = tid + 3;
+	value2->c = tid + 4;
+	value2->d = tid + 5;
+
+	pthread_mutex_lock(&global_mutex);
+	/* Run task_main that read task local data and save to global variables */
+	err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts);
+	ASSERT_OK(err, "run task_main");
+	ASSERT_OK(opts.retval, "task_main retval");
+
+	ASSERT_EQ(skel->bss->test_value0, tid + 0, "tld_get_data value0");
+	ASSERT_EQ(skel->bss->test_value1, tid + 1, "tld_get_data value1");
+	ASSERT_EQ(skel->bss->test_value2.a, tid + 2, "tld_get_data value2.a");
+	ASSERT_EQ(skel->bss->test_value2.b, tid + 3, "tld_get_data value2.b");
+	ASSERT_EQ(skel->bss->test_value2.c, tid + 4, "tld_get_data value2.c");
+	ASSERT_EQ(skel->bss->test_value2.d, tid + 5, "tld_get_data value2.d");
+	pthread_mutex_unlock(&global_mutex);
+
+	/* Make sure valueX are indeed local to threads */
+	ASSERT_EQ(*value0, tid + 0, "value0");
+	ASSERT_EQ(*value1, tid + 1, "value1");
+	ASSERT_EQ(value2->a, tid + 2, "value2.a");
+	ASSERT_EQ(value2->b, tid + 3, "value2.b");
+	ASSERT_EQ(value2->c, tid + 4, "value2.c");
+	ASSERT_EQ(value2->d, tid + 5, "value2.d");
+
+	*value0 = tid + 5;
+	*value1 = tid + 4;
+	value2->a = tid + 3;
+	value2->b = tid + 2;
+	value2->c = tid + 1;
+	value2->d = tid + 0;
+
+	/* Run task_main again */
+	pthread_mutex_lock(&global_mutex);
+	err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts);
+	ASSERT_OK(err, "run task_main");
+	ASSERT_OK(opts.retval, "task_main retval");
+
+	ASSERT_EQ(skel->bss->test_value0, tid + 5, "tld_get_data value0");
+	ASSERT_EQ(skel->bss->test_value1, tid + 4, "tld_get_data value1");
+	ASSERT_EQ(skel->bss->test_value2.a, tid + 3, "tld_get_data value2.a");
+	ASSERT_EQ(skel->bss->test_value2.b, tid + 2, "tld_get_data value2.b");
+	ASSERT_EQ(skel->bss->test_value2.c, tid + 1, "tld_get_data value2.c");
+	ASSERT_EQ(skel->bss->test_value2.d, tid + 0, "tld_get_data value2.d");
+	pthread_mutex_unlock(&global_mutex);
+
+out:
+	pthread_exit(NULL);
+}
+
+static void test_task_local_data_basic(void)
+{
+	struct test_task_local_data *skel;
+	pthread_t thread[TEST_BASIC_THREAD_NUM];
+	char dummy_key_name[TLD_NAME_LEN];
+	tld_key_t key;
+	int i, err;
+
+	reset_tld();
+
+	ASSERT_OK(pthread_mutex_init(&global_mutex, NULL), "pthread_mutex_init");
+
+	skel = test_task_local_data__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		return;
+
+	tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t));
+	if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys"))
+		goto out;
+
+	ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY");
+	tld_keys[1] = tld_create_key("value1", sizeof(int));
+	ASSERT_FALSE(tld_key_is_err(tld_keys[1]), "tld_create_key");
+	tld_keys[2] = tld_create_key("value2", sizeof(struct test_tld_struct));
+	ASSERT_FALSE(tld_key_is_err(tld_keys[2]), "tld_create_key");
+
+	/*
+	 * Shouldn't be able to store data exceed a page. Create a TLD just big
+	 * enough to exceed a page. TLDs already created are int value0, int
+	 * value1, and struct test_tld_struct value2.
+	 */
+	key = tld_create_key("value_not_exist",
+			     TLD_PAGE_SIZE - 2 * sizeof(int) - sizeof(struct test_tld_struct) + 1);
+	ASSERT_EQ(tld_key_err_or_zero(key), -E2BIG, "tld_create_key");
+
+	key = tld_create_key("value2", sizeof(struct test_tld_struct));
+	ASSERT_EQ(tld_key_err_or_zero(key), -EEXIST, "tld_create_key");
+
+	/* Shouldn't be able to create the (TLD_MAX_DATA_CNT+1)-th TLD */
+	for (i = 3; i < TLD_MAX_DATA_CNT; i++) {
+		snprintf(dummy_key_name, TLD_NAME_LEN, "dummy_value%d", i);
+		tld_keys[i] = tld_create_key(dummy_key_name, sizeof(int));
+		ASSERT_FALSE(tld_key_is_err(tld_keys[i]), "tld_create_key");
+	}
+	key = tld_create_key("value_not_exist", sizeof(struct test_tld_struct));
+	ASSERT_EQ(tld_key_err_or_zero(key), -ENOSPC, "tld_create_key");
+
+	/* Access TLDs from multiple threads and check if they are thread-specific */
+	for (i = 0; i < TEST_BASIC_THREAD_NUM; i++) {
+		err = pthread_create(&thread[i], NULL, test_task_local_data_basic_thread, skel);
+		if (!ASSERT_OK(err, "pthread_create"))
+			goto out;
+	}
+
+out:
+	for (i = 0; i < TEST_BASIC_THREAD_NUM; i++)
+		pthread_join(thread[i], NULL);
+
+	if (tld_keys) {
+		free(tld_keys);
+		tld_keys = NULL;
+	}
+	tld_free();
+	test_task_local_data__destroy(skel);
+}
+
+#define TEST_RACE_THREAD_NUM (TLD_MAX_DATA_CNT - 3)
+
+void *test_task_local_data_race_thread(void *arg)
+{
+	int err = 0, id = (intptr_t)arg;
+	char key_name[32];
+	tld_key_t key;
+
+	key = tld_create_key("value_not_exist", TLD_PAGE_SIZE + 1);
+	if (tld_key_err_or_zero(key) != -E2BIG) {
+		err = 1;
+		goto out;
+	}
+
+	/* Only one thread will succeed in creating value1 */
+	key = tld_create_key("value1", sizeof(int));
+	if (!tld_key_is_err(key))
+		tld_keys[1] = key;
+
+	/* Only one thread will succeed in creating value2 */
+	key = tld_create_key("value2", sizeof(struct test_tld_struct));
+	if (!tld_key_is_err(key))
+		tld_keys[2] = key;
+
+	snprintf(key_name, 32, "thread_%d", id);
+	tld_keys[id] = tld_create_key(key_name, sizeof(int));
+	if (tld_key_is_err(tld_keys[id]))
+		err = 2;
+out:
+	return (void *)(intptr_t)err;
+}
+
+static void test_task_local_data_race(void)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, opts);
+	pthread_t thread[TEST_RACE_THREAD_NUM];
+	struct test_task_local_data *skel;
+	int fd, i, j, err, *data;
+	void *ret = NULL;
+
+	skel = test_task_local_data__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		return;
+
+	tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t));
+	if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys"))
+		goto out;
+
+	fd = bpf_map__fd(skel->maps.tld_data_map);
+
+	ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY");
+	tld_keys[0] = value0_key;
+
+	for (j = 0; j < 100; j++) {
+		reset_tld();
+
+		for (i = 0; i < TEST_RACE_THREAD_NUM; i++) {
+			/*
+			 * Try to make tld_create_key() race with each other. Call
+			 * tld_create_key(), both valid and invalid, from different threads.
+			 */
+			err = pthread_create(&thread[i], NULL, test_task_local_data_race_thread,
+					     (void *)(intptr_t)(i + 3));
+			if (CHECK_FAIL(err))
+				break;
+		}
+
+		/* Wait for all tld_create_key() to return */
+		for (i = 0; i < TEST_RACE_THREAD_NUM; i++) {
+			pthread_join(thread[i], &ret);
+			if (CHECK_FAIL(ret))
+				break;
+		}
+
+		/* Write a unique number to each TLD */
+		for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
+			data = tld_get_data(fd, tld_keys[i]);
+			if (CHECK_FAIL(!data))
+				break;
+			*data = i;
+		}
+
+		/* Read TLDs and check the value to see if any address collides with another */
+		for (i = 0; i < TLD_MAX_DATA_CNT; i++) {
+			data = tld_get_data(fd, tld_keys[i]);
+			if (CHECK_FAIL(*data != i))
+				break;
+		}
+
+		/* Run task_main to make sure no invalid TLDs are added */
+		err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts);
+		ASSERT_OK(err, "run task_main");
+		ASSERT_OK(opts.retval, "task_main retval");
+	}
+out:
+	if (tld_keys) {
+		free(tld_keys);
+		tld_keys = NULL;
+	}
+	tld_free();
+	test_task_local_data__destroy(skel);
+}
+
+void test_task_local_data(void)
+{
+	if (test__start_subtest("task_local_data_basic"))
+		test_task_local_data_basic();
+	if (test__start_subtest("task_local_data_race"))
+		test_task_local_data_race();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_work.c b/tools/testing/selftests/bpf/prog_tests/test_task_work.c
new file mode 100644
index 000000000000..774b31a5f6ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_task_work.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <string.h>
+#include <stdio.h>
+#include "task_work.skel.h"
+#include "task_work_fail.skel.h"
+#include <linux/bpf.h>
+#include <linux/perf_event.h>
+#include <sys/syscall.h>
+#include <time.h>
+
+static int perf_event_open(__u32 type, __u64 config, int pid)
+{
+	struct perf_event_attr attr = {
+		.type = type,
+		.config = config,
+		.size = sizeof(struct perf_event_attr),
+		.sample_period = 100000,
+	};
+
+	return syscall(__NR_perf_event_open, &attr, pid, -1, -1, 0);
+}
+
+struct elem {
+	char data[128];
+	struct bpf_task_work tw;
+};
+
+static int verify_map(struct bpf_map *map, const char *expected_data)
+{
+	int err;
+	struct elem value;
+	int processed_values = 0;
+	int k, sz;
+
+	sz = bpf_map__max_entries(map);
+	for (k = 0; k < sz; ++k) {
+		err = bpf_map__lookup_elem(map, &k, sizeof(int), &value, sizeof(struct elem), 0);
+		if (err)
+			continue;
+		if (!ASSERT_EQ(strcmp(expected_data, value.data), 0, "map data")) {
+			fprintf(stderr, "expected '%s', found '%s' in %s map", expected_data,
+				value.data, bpf_map__name(map));
+			return 2;
+		}
+		processed_values++;
+	}
+
+	return processed_values == 0;
+}
+
+static void task_work_run(const char *prog_name, const char *map_name)
+{
+	struct task_work *skel;
+	struct bpf_program *prog;
+	struct bpf_map *map;
+	struct bpf_link *link = NULL;
+	int err, pe_fd = -1, pid, status, pipefd[2];
+	char user_string[] = "hello world";
+
+	if (!ASSERT_NEQ(pipe(pipefd), -1, "pipe"))
+		return;
+
+	pid = fork();
+	if (pid == 0) {
+		__u64 num = 1;
+		int i;
+		char buf;
+
+		close(pipefd[1]);
+		read(pipefd[0], &buf, sizeof(buf));
+		close(pipefd[0]);
+
+		for (i = 0; i < 10000; ++i)
+			num *= time(0) % 7;
+		(void)num;
+		exit(0);
+	}
+	if (!ASSERT_GT(pid, 0, "fork() failed")) {
+		close(pipefd[0]);
+		close(pipefd[1]);
+		return;
+	}
+
+	skel = task_work__open();
+	if (!ASSERT_OK_PTR(skel, "task_work__open"))
+		return;
+
+	bpf_object__for_each_program(prog, skel->obj) {
+		bpf_program__set_autoload(prog, false);
+	}
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "prog_name"))
+		goto cleanup;
+	bpf_program__set_autoload(prog, true);
+	skel->bss->user_ptr = (char *)user_string;
+
+	err = task_work__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	pe_fd = perf_event_open(PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, pid);
+	if (pe_fd == -1 && (errno == ENOENT || errno == EOPNOTSUPP)) {
+		printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__);
+		test__skip();
+		goto cleanup;
+	}
+	if (!ASSERT_NEQ(pe_fd, -1, "pe_fd")) {
+		fprintf(stderr, "perf_event_open errno: %d, pid: %d\n", errno, pid);
+		goto cleanup;
+	}
+
+	link = bpf_program__attach_perf_event(prog, pe_fd);
+	if (!ASSERT_OK_PTR(link, "attach_perf_event"))
+		goto cleanup;
+
+	/* perf event fd ownership is passed to bpf_link */
+	pe_fd = -1;
+	close(pipefd[0]);
+	write(pipefd[1], user_string, 1);
+	close(pipefd[1]);
+	/* Wait to collect some samples */
+	waitpid(pid, &status, 0);
+	pid = 0;
+	map = bpf_object__find_map_by_name(skel->obj, map_name);
+	if (!ASSERT_OK_PTR(map, "find map_name"))
+		goto cleanup;
+	if (!ASSERT_OK(verify_map(map, user_string), "verify map"))
+		goto cleanup;
+cleanup:
+	if (pe_fd >= 0)
+		close(pe_fd);
+	bpf_link__destroy(link);
+	task_work__destroy(skel);
+	if (pid > 0) {
+		close(pipefd[0]);
+		write(pipefd[1], user_string, 1);
+		close(pipefd[1]);
+		waitpid(pid, &status, 0);
+	}
+}
+
+void test_task_work(void)
+{
+	if (test__start_subtest("test_task_work_hash_map"))
+		task_work_run("oncpu_hash_map", "hmap");
+
+	if (test__start_subtest("test_task_work_array_map"))
+		task_work_run("oncpu_array_map", "arrmap");
+
+	if (test__start_subtest("test_task_work_lru_map"))
+		task_work_run("oncpu_lru_map", "lrumap");
+
+	RUN_TESTS(task_work_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c b/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c
new file mode 100644
index 000000000000..462512fb191f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_tc_edt.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * BPF-based flow shaping
+ *
+ * The test brings up two veth in two isolated namespaces, attach some flow
+ * shaping program onto it, and ensures that a manual speedtest maximum
+ * value matches the rate set in the BPF shapers.
+ */
+
+#include <asm-generic/socket.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <math.h>
+#include <sys/time.h>
+#include <sys/socket.h>
+#include <bpf/libbpf.h>
+#include <pthread.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_tc_edt.skel.h"
+
+#define SERVER_NS "tc-edt-server-ns"
+#define CLIENT_NS "tc-edt-client-ns"
+#define IP4_ADDR_VETH1 "192.168.1.1"
+#define IP4_ADDR_VETH2 "192.168.1.2"
+#define IP4_ADDR_VETH2_HEX 0xC0A80102
+
+#define TIMEOUT_MS		2000
+#define TEST_PORT		9000
+#define TARGET_RATE_MBPS	5.0
+#define TX_BYTES_COUNT		(1 * 1000 * 1000)
+#define RATE_ERROR_PERCENT	2.0
+
+struct connection {
+	int server_listen_fd;
+	int server_conn_fd;
+	int client_conn_fd;
+};
+
+static int setup(struct test_tc_edt *skel)
+{
+	struct nstoken *nstoken_client, *nstoken_server;
+	int ret;
+
+	if (!ASSERT_OK(make_netns(CLIENT_NS), "create client ns"))
+		goto fail;
+	if (!ASSERT_OK(make_netns(SERVER_NS), "create server ns"))
+		goto fail_delete_client_ns;
+
+	nstoken_client = open_netns(CLIENT_NS);
+	if (!ASSERT_OK_PTR(nstoken_client, "open client ns"))
+		goto fail_delete_server_ns;
+	SYS(fail_close_client_ns, "ip link add veth1 type veth peer name %s",
+	    "veth2 netns " SERVER_NS);
+	SYS(fail_close_client_ns, "ip -4 addr add " IP4_ADDR_VETH1 "/24 dev veth1");
+	SYS(fail_close_client_ns, "ip link set veth1 up");
+
+	nstoken_server = open_netns(SERVER_NS);
+	if (!ASSERT_OK_PTR(nstoken_server, "enter server ns"))
+		goto fail_close_client_ns;
+	SYS(fail_close_server_ns, "ip -4 addr add " IP4_ADDR_VETH2 "/24 dev veth2");
+	SYS(fail_close_server_ns, "ip link set veth2 up");
+	SYS(fail_close_server_ns, "tc qdisc add dev veth2 root fq");
+	ret = tc_prog_attach("veth2", -1, bpf_program__fd(skel->progs.tc_prog));
+	if (!ASSERT_OK(ret, "attach bpf prog"))
+		goto fail_close_server_ns;
+	skel->bss->target_rate = TARGET_RATE_MBPS * 1000 * 1000;
+	close_netns(nstoken_server);
+	close_netns(nstoken_client);
+
+	return 0;
+
+fail_close_server_ns:
+	close_netns(nstoken_server);
+fail_close_client_ns:
+	close_netns(nstoken_client);
+fail_delete_server_ns:
+	remove_netns(SERVER_NS);
+fail_delete_client_ns:
+	remove_netns(CLIENT_NS);
+fail:
+	return -1;
+}
+
+static void cleanup(void)
+{
+	remove_netns(CLIENT_NS);
+	remove_netns(SERVER_NS);
+}
+
+static void run_test(void)
+{
+	int server_fd, client_fd, err;
+	double rate_mbps, rate_error;
+	struct nstoken *nstoken;
+	__u64 ts_start, ts_end;
+
+	nstoken = open_netns(SERVER_NS);
+	if (!ASSERT_OK_PTR(nstoken, "open server ns"))
+		return;
+	server_fd = start_server(AF_INET, SOCK_STREAM, IP4_ADDR_VETH2,
+			TEST_PORT, TIMEOUT_MS);
+	if (!ASSERT_OK_FD(server_fd, "start server"))
+		return;
+
+	close_netns(nstoken);
+	nstoken = open_netns(CLIENT_NS);
+	if (!ASSERT_OK_PTR(nstoken, "open client ns"))
+		return;
+	client_fd = connect_to_fd(server_fd, 0);
+	if (!ASSERT_OK_FD(client_fd, "connect client"))
+		return;
+
+	ts_start = get_time_ns();
+	err = send_recv_data(server_fd, client_fd, TX_BYTES_COUNT);
+	ts_end = get_time_ns();
+	close_netns(nstoken);
+	ASSERT_OK(err, "send_recv_data");
+
+	rate_mbps = TX_BYTES_COUNT / ((ts_end - ts_start) / 1000.0);
+	rate_error =
+		fabs((rate_mbps - TARGET_RATE_MBPS) * 100.0 / TARGET_RATE_MBPS);
+
+	ASSERT_LE(rate_error, RATE_ERROR_PERCENT,
+		  "rate error is lower than threshold");
+}
+
+void test_tc_edt(void)
+{
+	struct test_tc_edt *skel;
+
+	skel = test_tc_edt__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel open and load"))
+		return;
+
+	if (!ASSERT_OK(setup(skel), "global setup"))
+		return;
+
+	run_test();
+
+	cleanup();
+	test_tc_edt__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
new file mode 100644
index 000000000000..0fe0a8f62486
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_tc_tunnel.c
@@ -0,0 +1,714 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * End-to-end eBPF tunnel test suite
+ *   The file tests BPF network tunnels implementation. For each tunnel
+ *   type, the test validates that:
+ *   - basic communication can first be established between the two veths
+ *   - when adding a BPF-based encapsulation on client egress, it now fails
+ *   to communicate with the server
+ *   - when adding a kernel-based decapsulation on server ingress, client
+ *   can now connect
+ *   - when replacing the kernel-based decapsulation with a BPF-based one,
+ *   the client can still connect
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <bpf/libbpf.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_tc_tunnel.skel.h"
+
+#define SERVER_NS	"tc-tunnel-server-ns"
+#define CLIENT_NS	"tc-tunnel-client-ns"
+#define MAC_ADDR_VETH1	"00:11:22:33:44:55"
+#define IP4_ADDR_VETH1	"192.168.1.1"
+#define IP6_ADDR_VETH1	"fd::1"
+#define MAC_ADDR_VETH2	"66:77:88:99:AA:BB"
+#define IP4_ADDR_VETH2	"192.168.1.2"
+#define IP6_ADDR_VETH2	"fd::2"
+
+#define TEST_NAME_MAX_LEN	64
+#define PROG_NAME_MAX_LEN	64
+#define TUNNEL_ARGS_MAX_LEN	128
+#define BUFFER_LEN		2000
+#define DEFAULT_TEST_DATA_SIZE	100
+#define GSO_TEST_DATA_SIZE	BUFFER_LEN
+
+#define TIMEOUT_MS			1000
+#define TEST_PORT			8000
+#define UDP_PORT			5555
+#define MPLS_UDP_PORT			6635
+#define FOU_MPLS_PROTO			137
+#define VXLAN_ID			1
+#define VXLAN_PORT			8472
+#define MPLS_TABLE_ENTRIES_COUNT	65536
+
+static char tx_buffer[BUFFER_LEN], rx_buffer[BUFFER_LEN];
+
+struct subtest_cfg {
+	char *ebpf_tun_type;
+	char *iproute_tun_type;
+	char *mac_tun_type;
+	int ipproto;
+	void (*extra_decap_mod_args_cb)(struct subtest_cfg *cfg, char *dst);
+	bool tunnel_need_veth_mac;
+	bool configure_fou_rx_port;
+	char *tmode;
+	bool expect_kern_decap_failure;
+	bool configure_mpls;
+	bool test_gso;
+	char *tunnel_client_addr;
+	char *tunnel_server_addr;
+	char name[TEST_NAME_MAX_LEN];
+	char *server_addr;
+	int client_egress_prog_fd;
+	int server_ingress_prog_fd;
+	char extra_decap_mod_args[TUNNEL_ARGS_MAX_LEN];
+	int server_fd;
+};
+
+struct connection {
+	int client_fd;
+	int server_fd;
+};
+
+static int build_subtest_name(struct subtest_cfg *cfg, char *dst, size_t size)
+{
+	int ret;
+
+	ret = snprintf(dst, size, "%s_%s", cfg->ebpf_tun_type,
+		       cfg->mac_tun_type);
+
+	return ret < 0 ? ret : 0;
+}
+
+static int set_subtest_progs(struct subtest_cfg *cfg, struct test_tc_tunnel *skel)
+{
+	char prog_name[PROG_NAME_MAX_LEN];
+	struct bpf_program *prog;
+	int ret;
+
+	ret = snprintf(prog_name, PROG_NAME_MAX_LEN, "__encap_");
+	if (ret < 0)
+		return ret;
+	ret = build_subtest_name(cfg, prog_name + ret, PROG_NAME_MAX_LEN - ret);
+	if (ret < 0)
+		return ret;
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!prog)
+		return -1;
+
+	cfg->client_egress_prog_fd = bpf_program__fd(prog);
+	cfg->server_ingress_prog_fd = bpf_program__fd(skel->progs.decap_f);
+	return 0;
+}
+
+static void set_subtest_addresses(struct subtest_cfg *cfg)
+{
+	if (cfg->ipproto == 6)
+		cfg->server_addr = IP6_ADDR_VETH2;
+	else
+		cfg->server_addr = IP4_ADDR_VETH2;
+
+	/* Some specific tunnel types need specific addressing, it then
+	 * has been already set in the configuration table. Otherwise,
+	 * deduce the relevant addressing from the ipproto
+	 */
+	if (cfg->tunnel_client_addr && cfg->tunnel_server_addr)
+		return;
+
+	if (cfg->ipproto == 6) {
+		cfg->tunnel_client_addr = IP6_ADDR_VETH1;
+		cfg->tunnel_server_addr = IP6_ADDR_VETH2;
+	} else {
+		cfg->tunnel_client_addr = IP4_ADDR_VETH1;
+		cfg->tunnel_server_addr = IP4_ADDR_VETH2;
+	}
+}
+
+static int run_server(struct subtest_cfg *cfg)
+{
+	int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET;
+	struct nstoken *nstoken;
+	struct network_helper_opts opts = {
+		.timeout_ms = TIMEOUT_MS
+	};
+
+	nstoken = open_netns(SERVER_NS);
+	if (!ASSERT_OK_PTR(nstoken, "open server ns"))
+		return -1;
+
+	cfg->server_fd = start_server_str(family, SOCK_STREAM, cfg->server_addr,
+					  TEST_PORT, &opts);
+	close_netns(nstoken);
+	if (!ASSERT_OK_FD(cfg->server_fd, "start server"))
+		return -1;
+
+	return 0;
+}
+
+static int check_server_rx_data(struct subtest_cfg *cfg,
+				struct connection *conn, int len)
+{
+	int err;
+
+	memset(rx_buffer, 0, BUFFER_LEN);
+	err = recv(conn->server_fd, rx_buffer, len, 0);
+	if (!ASSERT_EQ(err, len, "check rx data len"))
+		return 1;
+	if (!ASSERT_MEMEQ(tx_buffer, rx_buffer, len, "check received data"))
+		return 1;
+	return 0;
+}
+
+static struct connection *connect_client_to_server(struct subtest_cfg *cfg)
+{
+	struct network_helper_opts opts = {.timeout_ms = 500};
+	int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET;
+	struct connection *conn = NULL;
+	int client_fd, server_fd;
+
+	conn = malloc(sizeof(struct connection));
+	if (!conn)
+		return conn;
+
+	client_fd = connect_to_addr_str(family, SOCK_STREAM, cfg->server_addr,
+					TEST_PORT, &opts);
+
+	if (client_fd < 0) {
+		free(conn);
+		return NULL;
+	}
+
+	server_fd = accept(cfg->server_fd, NULL, NULL);
+	if (server_fd < 0) {
+		close(client_fd);
+		free(conn);
+		return NULL;
+	}
+
+	conn->server_fd = server_fd;
+	conn->client_fd = client_fd;
+
+	return conn;
+}
+
+static void disconnect_client_from_server(struct subtest_cfg *cfg,
+					  struct connection *conn)
+{
+	close(conn->server_fd);
+	close(conn->client_fd);
+	free(conn);
+}
+
+static int send_and_test_data(struct subtest_cfg *cfg, bool must_succeed)
+{
+	struct connection *conn;
+	int err, res = -1;
+
+	conn = connect_client_to_server(cfg);
+	if (!must_succeed && !ASSERT_ERR_PTR(conn, "connection that must fail"))
+		goto end;
+	else if (!must_succeed)
+		return 0;
+
+	if (!ASSERT_OK_PTR(conn, "connection that must succeed"))
+		return -1;
+
+	err = send(conn->client_fd, tx_buffer, DEFAULT_TEST_DATA_SIZE, 0);
+	if (!ASSERT_EQ(err, DEFAULT_TEST_DATA_SIZE, "send data from client"))
+		goto end;
+	if (check_server_rx_data(cfg, conn, DEFAULT_TEST_DATA_SIZE))
+		goto end;
+
+	if (!cfg->test_gso) {
+		res = 0;
+		goto end;
+	}
+
+	err = send(conn->client_fd, tx_buffer, GSO_TEST_DATA_SIZE, 0);
+	if (!ASSERT_EQ(err, GSO_TEST_DATA_SIZE, "send (large) data from client"))
+		goto end;
+	if (check_server_rx_data(cfg, conn, DEFAULT_TEST_DATA_SIZE))
+		goto end;
+
+	res = 0;
+end:
+	disconnect_client_from_server(cfg, conn);
+	return res;
+}
+
+static void vxlan_decap_mod_args_cb(struct subtest_cfg *cfg, char *dst)
+{
+	snprintf(dst, TUNNEL_ARGS_MAX_LEN, "id %d dstport %d udp6zerocsumrx",
+		 VXLAN_ID, VXLAN_PORT);
+}
+
+static void udp_decap_mod_args_cb(struct subtest_cfg *cfg, char *dst)
+{
+	bool is_mpls = !strcmp(cfg->mac_tun_type, "mpls");
+
+	snprintf(dst, TUNNEL_ARGS_MAX_LEN,
+		 "encap fou encap-sport auto encap-dport %d",
+		 is_mpls ? MPLS_UDP_PORT : UDP_PORT);
+}
+
+static int configure_fou_rx_port(struct subtest_cfg *cfg, bool add)
+{
+	bool is_mpls = strcmp(cfg->mac_tun_type, "mpls") == 0;
+	int fou_proto;
+
+	if (is_mpls)
+		fou_proto = FOU_MPLS_PROTO;
+	else
+		fou_proto = cfg->ipproto == 6 ? 41 : 4;
+
+	SYS(fail, "ip fou %s port %d ipproto %d%s", add ? "add" : "del",
+	    is_mpls ? MPLS_UDP_PORT : UDP_PORT, fou_proto,
+	    cfg->ipproto == 6 ? " -6" : "");
+
+	return 0;
+fail:
+	return 1;
+}
+
+static int add_fou_rx_port(struct subtest_cfg *cfg)
+{
+	return configure_fou_rx_port(cfg, true);
+}
+
+static int del_fou_rx_port(struct subtest_cfg *cfg)
+{
+	return configure_fou_rx_port(cfg, false);
+}
+
+static int update_tunnel_intf_addr(struct subtest_cfg *cfg)
+{
+	SYS(fail, "ip link set dev testtun0 address " MAC_ADDR_VETH2);
+	return 0;
+fail:
+	return -1;
+}
+
+static int configure_kernel_for_mpls(struct subtest_cfg *cfg)
+{
+	SYS(fail, "sysctl -qw net.mpls.platform_labels=%d",
+	    MPLS_TABLE_ENTRIES_COUNT);
+	SYS(fail, "ip -f mpls route add 1000 dev lo");
+	SYS(fail, "ip link set lo up");
+	SYS(fail, "sysctl -qw net.mpls.conf.testtun0.input=1");
+	SYS(fail, "sysctl -qw net.ipv4.conf.lo.rp_filter=0");
+	return 0;
+fail:
+	return -1;
+}
+
+static int configure_encapsulation(struct subtest_cfg *cfg)
+{
+	int ret;
+
+	ret = tc_prog_attach("veth1", -1, cfg->client_egress_prog_fd);
+
+	return ret;
+}
+
+static int configure_kernel_decapsulation(struct subtest_cfg *cfg)
+{
+	struct nstoken *nstoken = open_netns(SERVER_NS);
+	int ret = -1;
+
+	if (!ASSERT_OK_PTR(nstoken, "open server ns"))
+		return ret;
+
+	if (cfg->configure_fou_rx_port &&
+	    !ASSERT_OK(add_fou_rx_port(cfg), "configure FOU RX port"))
+		goto fail;
+	SYS(fail, "ip link add name testtun0 type %s %s remote %s local %s %s",
+	    cfg->iproute_tun_type, cfg->tmode ? cfg->tmode : "",
+	    cfg->tunnel_client_addr, cfg->tunnel_server_addr,
+	    cfg->extra_decap_mod_args);
+	if (cfg->tunnel_need_veth_mac &&
+	    !ASSERT_OK(update_tunnel_intf_addr(cfg), "update testtun0 mac"))
+		goto fail;
+	if (cfg->configure_mpls &&
+	    (!ASSERT_OK(configure_kernel_for_mpls(cfg),
+			"configure MPLS decap")))
+		goto fail;
+	SYS(fail, "sysctl -qw net.ipv4.conf.all.rp_filter=0");
+	SYS(fail, "sysctl -qw net.ipv4.conf.testtun0.rp_filter=0");
+	SYS(fail, "ip link set dev testtun0 up");
+
+	ret = 0;
+fail:
+	close_netns(nstoken);
+	return ret;
+}
+
+static void remove_kernel_decapsulation(struct subtest_cfg *cfg)
+{
+	SYS_NOFAIL("ip link del testtun0");
+	if (cfg->configure_mpls)
+		SYS_NOFAIL("ip -f mpls route del 1000 dev lo");
+	if (cfg->configure_fou_rx_port)
+		del_fou_rx_port(cfg);
+}
+
+static int configure_ebpf_decapsulation(struct subtest_cfg *cfg)
+{
+	struct nstoken *nstoken = open_netns(SERVER_NS);
+	int ret = -1;
+
+	if (!ASSERT_OK_PTR(nstoken, "open server ns"))
+		return ret;
+
+	if (!cfg->expect_kern_decap_failure)
+		SYS(fail, "ip link del testtun0");
+
+	if (!ASSERT_OK(tc_prog_attach("veth2", cfg->server_ingress_prog_fd, -1),
+		       "attach_program"))
+		goto fail;
+
+	ret = 0;
+fail:
+	close_netns(nstoken);
+	return ret;
+}
+
+static void run_test(struct subtest_cfg *cfg)
+{
+	struct nstoken *nstoken;
+
+	if (!ASSERT_OK(run_server(cfg), "run server"))
+		return;
+
+	nstoken = open_netns(CLIENT_NS);
+	if (!ASSERT_OK_PTR(nstoken, "open client ns"))
+		goto fail;
+
+	/* Basic communication must work */
+	if (!ASSERT_OK(send_and_test_data(cfg, true), "connect without any encap"))
+		goto fail;
+
+	/* Attach encapsulation program to client */
+	if (!ASSERT_OK(configure_encapsulation(cfg), "configure encapsulation"))
+		goto fail;
+
+	/* If supported, insert kernel decap module, connection must succeed */
+	if (!cfg->expect_kern_decap_failure) {
+		if (!ASSERT_OK(configure_kernel_decapsulation(cfg),
+					"configure kernel decapsulation"))
+			goto fail;
+		if (!ASSERT_OK(send_and_test_data(cfg, true),
+			       "connect with encap prog and kern decap"))
+			goto fail;
+	}
+
+	/* Replace kernel decapsulation with BPF decapsulation, test must pass */
+	if (!ASSERT_OK(configure_ebpf_decapsulation(cfg), "configure ebpf decapsulation"))
+		goto fail;
+	ASSERT_OK(send_and_test_data(cfg, true), "connect with encap and decap progs");
+
+fail:
+	close_netns(nstoken);
+	close(cfg->server_fd);
+}
+
+static int setup(void)
+{
+	struct nstoken *nstoken_client, *nstoken_server;
+	int fd, err;
+
+	fd = open("/dev/urandom", O_RDONLY);
+	if (!ASSERT_OK_FD(fd, "open urandom"))
+		goto fail;
+	err = read(fd, tx_buffer, BUFFER_LEN);
+	close(fd);
+
+	if (!ASSERT_EQ(err, BUFFER_LEN, "read random bytes"))
+		goto fail;
+
+	/* Configure the testing network */
+	if (!ASSERT_OK(make_netns(CLIENT_NS), "create client ns") ||
+	    !ASSERT_OK(make_netns(SERVER_NS), "create server ns"))
+		goto fail;
+
+	nstoken_client = open_netns(CLIENT_NS);
+	if (!ASSERT_OK_PTR(nstoken_client, "open client ns"))
+		goto fail_delete_ns;
+	SYS(fail_close_ns_client, "ip link add %s type veth peer name %s",
+	    "veth1 mtu 1500 netns " CLIENT_NS " address " MAC_ADDR_VETH1,
+	    "veth2 mtu 1500 netns " SERVER_NS " address " MAC_ADDR_VETH2);
+	SYS(fail_close_ns_client, "ethtool -K veth1 tso off");
+	SYS(fail_close_ns_client, "ip link set veth1 up");
+	nstoken_server = open_netns(SERVER_NS);
+	if (!ASSERT_OK_PTR(nstoken_server, "open server ns"))
+		goto fail_close_ns_client;
+	SYS(fail_close_ns_server, "ip link set veth2 up");
+
+	close_netns(nstoken_server);
+	close_netns(nstoken_client);
+	return 0;
+
+fail_close_ns_server:
+	close_netns(nstoken_server);
+fail_close_ns_client:
+	close_netns(nstoken_client);
+fail_delete_ns:
+	SYS_NOFAIL("ip netns del " CLIENT_NS);
+	SYS_NOFAIL("ip netns del " SERVER_NS);
+fail:
+	return -1;
+}
+
+static int subtest_setup(struct test_tc_tunnel *skel, struct subtest_cfg *cfg)
+{
+	struct nstoken *nstoken_client, *nstoken_server;
+	int ret = -1;
+
+	set_subtest_addresses(cfg);
+	if (!ASSERT_OK(set_subtest_progs(cfg, skel),
+		       "find subtest progs"))
+		goto fail;
+	if (cfg->extra_decap_mod_args_cb)
+		cfg->extra_decap_mod_args_cb(cfg, cfg->extra_decap_mod_args);
+
+	nstoken_client = open_netns(CLIENT_NS);
+	if (!ASSERT_OK_PTR(nstoken_client, "open client ns"))
+		goto fail;
+	SYS(fail_close_client_ns,
+	    "ip -4 addr add " IP4_ADDR_VETH1 "/24 dev veth1");
+	SYS(fail_close_client_ns, "ip -4 route flush table main");
+	SYS(fail_close_client_ns,
+	    "ip -4 route add " IP4_ADDR_VETH2 " mtu 1450 dev veth1");
+	SYS(fail_close_client_ns,
+	    "ip -6 addr add " IP6_ADDR_VETH1 "/64 dev veth1 nodad");
+	SYS(fail_close_client_ns, "ip -6 route flush table main");
+	SYS(fail_close_client_ns,
+	    "ip -6 route add " IP6_ADDR_VETH2 " mtu 1430 dev veth1");
+	nstoken_server = open_netns(SERVER_NS);
+	if (!ASSERT_OK_PTR(nstoken_server, "open server ns"))
+		goto fail_close_client_ns;
+	SYS(fail_close_server_ns,
+	    "ip -4 addr add " IP4_ADDR_VETH2 "/24 dev veth2");
+	SYS(fail_close_server_ns,
+	    "ip -6 addr add " IP6_ADDR_VETH2 "/64 dev veth2 nodad");
+
+	ret = 0;
+
+fail_close_server_ns:
+	close_netns(nstoken_server);
+fail_close_client_ns:
+	close_netns(nstoken_client);
+fail:
+	return ret;
+}
+
+
+static void subtest_cleanup(struct subtest_cfg *cfg)
+{
+	struct nstoken *nstoken;
+
+	nstoken = open_netns(CLIENT_NS);
+	if (ASSERT_OK_PTR(nstoken, "open clien ns")) {
+		SYS_NOFAIL("tc qdisc delete dev veth1 parent ffff:fff1");
+		SYS_NOFAIL("ip a flush veth1");
+		close_netns(nstoken);
+	}
+	nstoken = open_netns(SERVER_NS);
+	if (ASSERT_OK_PTR(nstoken, "open clien ns")) {
+		SYS_NOFAIL("tc qdisc delete dev veth2 parent ffff:fff1");
+		SYS_NOFAIL("ip a flush veth2");
+		if (!cfg->expect_kern_decap_failure)
+			remove_kernel_decapsulation(cfg);
+		close_netns(nstoken);
+	}
+}
+
+static void cleanup(void)
+{
+	remove_netns(CLIENT_NS);
+	remove_netns(SERVER_NS);
+}
+
+static struct subtest_cfg subtests_cfg[] = {
+	{
+		.ebpf_tun_type = "ipip",
+		.mac_tun_type = "none",
+		.iproute_tun_type = "ipip",
+		.ipproto = 4,
+	},
+	{
+		.ebpf_tun_type = "ipip6",
+		.mac_tun_type = "none",
+		.iproute_tun_type = "ip6tnl",
+		.ipproto = 4,
+		.tunnel_client_addr = IP6_ADDR_VETH1,
+		.tunnel_server_addr = IP6_ADDR_VETH2,
+	},
+	{
+		.ebpf_tun_type = "ip6tnl",
+		.iproute_tun_type = "ip6tnl",
+		.mac_tun_type = "none",
+		.ipproto = 6,
+	},
+	{
+		.mac_tun_type = "none",
+		.ebpf_tun_type = "sit",
+		.iproute_tun_type = "sit",
+		.ipproto = 6,
+		.tunnel_client_addr = IP4_ADDR_VETH1,
+		.tunnel_server_addr = IP4_ADDR_VETH2,
+	},
+	{
+		.ebpf_tun_type = "vxlan",
+		.mac_tun_type = "eth",
+		.iproute_tun_type = "vxlan",
+		.ipproto = 4,
+		.extra_decap_mod_args_cb = vxlan_decap_mod_args_cb,
+		.tunnel_need_veth_mac = true
+	},
+	{
+		.ebpf_tun_type = "ip6vxlan",
+		.mac_tun_type = "eth",
+		.iproute_tun_type = "vxlan",
+		.ipproto = 6,
+		.extra_decap_mod_args_cb = vxlan_decap_mod_args_cb,
+		.tunnel_need_veth_mac = true
+	},
+	{
+		.ebpf_tun_type = "gre",
+		.mac_tun_type = "none",
+		.iproute_tun_type = "gre",
+		.ipproto = 4,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "gre",
+		.mac_tun_type = "eth",
+		.iproute_tun_type = "gretap",
+		.ipproto = 4,
+		.tunnel_need_veth_mac = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "gre",
+		.mac_tun_type = "mpls",
+		.iproute_tun_type = "gre",
+		.ipproto = 4,
+		.configure_mpls = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "ip6gre",
+		.mac_tun_type = "none",
+		.iproute_tun_type = "ip6gre",
+		.ipproto = 6,
+		.test_gso = true,
+	},
+	{
+		.ebpf_tun_type = "ip6gre",
+		.mac_tun_type = "eth",
+		.iproute_tun_type = "ip6gretap",
+		.ipproto = 6,
+		.tunnel_need_veth_mac = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "ip6gre",
+		.mac_tun_type = "mpls",
+		.iproute_tun_type = "ip6gre",
+		.ipproto = 6,
+		.configure_mpls = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "udp",
+		.mac_tun_type = "none",
+		.iproute_tun_type = "ipip",
+		.ipproto = 4,
+		.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+		.configure_fou_rx_port = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "udp",
+		.mac_tun_type = "eth",
+		.iproute_tun_type = "ipip",
+		.ipproto = 4,
+		.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+		.configure_fou_rx_port = true,
+		.expect_kern_decap_failure = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "udp",
+		.mac_tun_type = "mpls",
+		.iproute_tun_type = "ipip",
+		.ipproto = 4,
+		.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+		.configure_fou_rx_port = true,
+		.tmode = "mode any ttl 255",
+		.configure_mpls = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "ip6udp",
+		.mac_tun_type = "none",
+		.iproute_tun_type = "ip6tnl",
+		.ipproto = 6,
+		.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+		.configure_fou_rx_port = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "ip6udp",
+		.mac_tun_type = "eth",
+		.iproute_tun_type = "ip6tnl",
+		.ipproto = 6,
+		.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+		.configure_fou_rx_port = true,
+		.expect_kern_decap_failure = true,
+		.test_gso = true
+	},
+	{
+		.ebpf_tun_type = "ip6udp",
+		.mac_tun_type = "mpls",
+		.iproute_tun_type = "ip6tnl",
+		.ipproto = 6,
+		.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
+		.configure_fou_rx_port = true,
+		.tmode = "mode any ttl 255",
+		.expect_kern_decap_failure = true,
+		.test_gso = true
+	},
+};
+
+void test_tc_tunnel(void)
+{
+	struct test_tc_tunnel *skel;
+	struct subtest_cfg *cfg;
+	int i, ret;
+
+	skel = test_tc_tunnel__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel open and load"))
+		return;
+
+	if (!ASSERT_OK(setup(), "global setup"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(subtests_cfg); i++) {
+		cfg = &subtests_cfg[i];
+		ret = build_subtest_name(cfg, cfg->name, TEST_NAME_MAX_LEN);
+		if (ret < 0 || !test__start_subtest(cfg->name))
+			continue;
+		if (subtest_setup(skel, cfg) == 0)
+			run_test(cfg);
+		subtest_cleanup(cfg);
+	}
+	cleanup();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index cec746e77cd3..eb9309931272 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -71,6 +71,8 @@
 #define IP4_ADDR2_VETH1 "172.16.1.20"
 #define IP4_ADDR_TUNL_DEV0 "10.1.1.100"
 #define IP4_ADDR_TUNL_DEV1 "10.1.1.200"
+#define IP6_ADDR_TUNL_DEV0 "fc80::100"
+#define IP6_ADDR_TUNL_DEV1 "fc80::200"
 
 #define IP6_ADDR_VETH0 "::11"
 #define IP6_ADDR1_VETH1 "::22"
@@ -98,6 +100,27 @@
 #define XFRM_SPI_IN_TO_OUT 0x1
 #define XFRM_SPI_OUT_TO_IN 0x2
 
+#define GRE_TUNL_DEV0 "gre00"
+#define GRE_TUNL_DEV1 "gre11"
+
+#define IP6GRE_TUNL_DEV0 "ip6gre00"
+#define IP6GRE_TUNL_DEV1 "ip6gre11"
+
+#define ERSPAN_TUNL_DEV0 "erspan00"
+#define ERSPAN_TUNL_DEV1 "erspan11"
+
+#define IP6ERSPAN_TUNL_DEV0 "ip6erspan00"
+#define IP6ERSPAN_TUNL_DEV1 "ip6erspan11"
+
+#define GENEVE_TUNL_DEV0 "geneve00"
+#define GENEVE_TUNL_DEV1 "geneve11"
+
+#define IP6GENEVE_TUNL_DEV0 "ip6geneve00"
+#define IP6GENEVE_TUNL_DEV1 "ip6geneve11"
+
+#define IP6TNL_TUNL_DEV0 "ip6tnl00"
+#define IP6TNL_TUNL_DEV1 "ip6tnl11"
+
 #define PING_ARGS "-i 0.01 -c 3 -w 10 -q"
 
 static int config_device(void)
@@ -216,6 +239,18 @@ fail:
 	return -1;
 }
 
+static int set_ipv4_addr(const char *dev0, const char *dev1)
+{
+	SYS(fail, "ip -n at_ns0 link set dev %s up", dev0);
+	SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0);
+	SYS(fail, "ip link set dev %s up", dev1);
+	SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1);
+
+	return 0;
+fail:
+	return 1;
+}
+
 static int add_ipip_tunnel(enum ipip_encap encap)
 {
 	int err;
@@ -356,6 +391,99 @@ static void delete_xfrm_tunnel(void)
 		   IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN);
 }
 
+static int add_ipv4_tunnel(const char *dev0, const char *dev1,
+			   const char *type, const char *opt)
+{
+	if (!type || !opt || !dev0 || !dev1)
+		return -1;
+
+	SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s",
+	    dev0, type, opt, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
+
+	SYS(fail, "ip link add dev %s type %s external", dev1, type);
+
+	return set_ipv4_addr(dev0, dev1);
+fail:
+	return -1;
+}
+
+static void delete_tunnel(const char *dev0, const char *dev1)
+{
+	if (!dev0 || !dev1)
+		return;
+
+	SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s", dev0);
+	SYS_NOFAIL("ip link delete dev %s", dev1);
+}
+
+static int set_ipv6_addr(const char *dev0, const char *dev1)
+{
+	/* disable IPv6 DAD because it might take too long and fail tests */
+	SYS(fail, "ip -n at_ns0 addr add %s/96 dev veth0 nodad", IP6_ADDR_VETH0);
+	SYS(fail, "ip -n at_ns0 link set dev veth0 up");
+	SYS(fail, "ip addr add %s/96 dev veth1 nodad", IP6_ADDR1_VETH1);
+	SYS(fail, "ip link set dev veth1 up");
+
+	SYS(fail, "ip -n at_ns0 addr add dev %s %s/24", dev0, IP4_ADDR_TUNL_DEV0);
+	SYS(fail, "ip -n at_ns0 addr add dev %s %s/96 nodad", dev0, IP6_ADDR_TUNL_DEV0);
+	SYS(fail, "ip -n at_ns0 link set dev %s up", dev0);
+
+	SYS(fail, "ip addr add dev %s %s/24", dev1, IP4_ADDR_TUNL_DEV1);
+	SYS(fail, "ip addr add dev %s %s/96 nodad", dev1, IP6_ADDR_TUNL_DEV1);
+	SYS(fail, "ip link set dev %s up", dev1);
+	return 0;
+fail:
+	return 1;
+}
+
+static int add_ipv6_tunnel(const char *dev0, const char *dev1,
+			   const char *type, const char *opt)
+{
+	if (!type || !opt || !dev0 || !dev1)
+		return -1;
+
+	SYS(fail, "ip -n at_ns0 link add dev %s type %s %s local %s remote %s",
+	    dev0, type, opt, IP6_ADDR_VETH0, IP6_ADDR1_VETH1);
+
+	SYS(fail, "ip link add dev %s type %s external", dev1, type);
+
+	return set_ipv6_addr(dev0, dev1);
+fail:
+	return -1;
+}
+
+static int add_geneve_tunnel(const char *dev0, const char *dev1,
+			     const char *type, const char *opt)
+{
+	if (!type || !opt || !dev0 || !dev1)
+		return -1;
+
+	SYS(fail, "ip -n at_ns0 link add dev %s type %s id 2 %s remote %s",
+	    dev0, type, opt, IP4_ADDR1_VETH1);
+
+	SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt);
+
+	return set_ipv4_addr(dev0, dev1);
+fail:
+	return -1;
+}
+
+static int add_ip6geneve_tunnel(const char *dev0, const char *dev1,
+			     const char *type, const char *opt)
+{
+	if (!type || !opt || !dev0 || !dev1)
+		return -1;
+
+	SYS(fail, "ip -n at_ns0 link add dev %s type %s id 22 %s remote %s",
+	    dev0, type, opt, IP6_ADDR1_VETH1);
+
+	SYS(fail, "ip link add dev %s type %s %s external", dev1, type, opt);
+
+	return set_ipv6_addr(dev0, dev1);
+fail:
+	return -1;
+}
+
 static int test_ping(int family, const char *addr)
 {
 	SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
@@ -364,37 +492,46 @@ fail:
 	return -1;
 }
 
-static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd)
+static void ping_dev0(void)
 {
-	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
-			    .priority = 1, .prog_fd = igr_fd);
-	DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
-			    .priority = 1, .prog_fd = egr_fd);
-	int ret;
+	/* ping from root namespace test */
+	test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
+}
 
-	ret = bpf_tc_hook_create(hook);
-	if (!ASSERT_OK(ret, "create tc hook"))
-		return ret;
+static void ping_dev1(void)
+{
+	struct nstoken *nstoken;
 
-	if (igr_fd >= 0) {
-		hook->attach_point = BPF_TC_INGRESS;
-		ret = bpf_tc_attach(hook, &opts1);
-		if (!ASSERT_OK(ret, "bpf_tc_attach")) {
-			bpf_tc_hook_destroy(hook);
-			return ret;
-		}
-	}
+	/* ping from at_ns0 namespace test */
+	nstoken = open_netns("at_ns0");
+	if (!ASSERT_OK_PTR(nstoken, "setns"))
+		return;
 
-	if (egr_fd >= 0) {
-		hook->attach_point = BPF_TC_EGRESS;
-		ret = bpf_tc_attach(hook, &opts2);
-		if (!ASSERT_OK(ret, "bpf_tc_attach")) {
-			bpf_tc_hook_destroy(hook);
-			return ret;
-		}
-	}
+	test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
+	close_netns(nstoken);
+}
 
-	return 0;
+static void ping6_veth0(void)
+{
+	test_ping(AF_INET6, IP6_ADDR_VETH0);
+}
+
+static void ping6_dev0(void)
+{
+	test_ping(AF_INET6, IP6_ADDR_TUNL_DEV0);
+}
+
+static void ping6_dev1(void)
+{
+	struct nstoken *nstoken;
+
+	/* ping from at_ns0 namespace test */
+	nstoken = open_netns("at_ns0");
+	if (!ASSERT_OK_PTR(nstoken, "setns"))
+		return;
+
+	test_ping(AF_INET, IP6_ADDR_TUNL_DEV1);
+	close_netns(nstoken);
 }
 
 static void test_vxlan_tunnel(void)
@@ -404,11 +541,9 @@ static void test_vxlan_tunnel(void)
 	int local_ip_map_fd = -1;
 	int set_src_prog_fd, get_src_prog_fd;
 	int set_dst_prog_fd;
-	int key = 0, ifindex = -1;
+	int key = 0;
 	uint local_ip;
 	int err;
-	DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
-			    .attach_point = BPF_TC_INGRESS);
 
 	/* add vxlan tunnel */
 	err = add_vxlan_tunnel();
@@ -419,42 +554,22 @@ static void test_vxlan_tunnel(void)
 	skel = test_tunnel_kern__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
 		goto done;
-	ifindex = if_nametoindex(VXLAN_TUNL_DEV1);
-	if (!ASSERT_NEQ(ifindex, 0, "vxlan11 ifindex"))
-		goto done;
-	tc_hook.ifindex = ifindex;
 	get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src);
 	set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src);
-	if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
-		goto done;
-	if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
-		goto done;
-	if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+	if (tc_prog_attach(VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
 		goto done;
 
 	/* load and attach bpf prog to veth dev tc hook point */
-	ifindex = if_nametoindex("veth1");
-	if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
-		goto done;
-	tc_hook.ifindex = ifindex;
 	set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst);
-	if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
-		goto done;
-	if (attach_tc_prog(&tc_hook, set_dst_prog_fd, -1))
+	if (tc_prog_attach("veth1", set_dst_prog_fd, -1))
 		goto done;
 
 	/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
 	nstoken = open_netns("at_ns0");
 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
 		goto done;
-	ifindex = if_nametoindex(VXLAN_TUNL_DEV0);
-	if (!ASSERT_NEQ(ifindex, 0, "vxlan00 ifindex"))
-		goto done;
-	tc_hook.ifindex = ifindex;
 	set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst);
-	if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
-		goto done;
-	if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd))
+	if (tc_prog_attach(VXLAN_TUNL_DEV0, -1, set_dst_prog_fd))
 		goto done;
 	close_netns(nstoken);
 
@@ -468,9 +583,7 @@ static void test_vxlan_tunnel(void)
 		goto done;
 
 	/* ping test */
-	err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
-	if (!ASSERT_OK(err, "test_ping"))
-		goto done;
+	ping_dev0();
 
 done:
 	/* delete vxlan tunnel */
@@ -488,11 +601,9 @@ static void test_ip6vxlan_tunnel(void)
 	int local_ip_map_fd = -1;
 	int set_src_prog_fd, get_src_prog_fd;
 	int set_dst_prog_fd;
-	int key = 0, ifindex = -1;
+	int key = 0;
 	uint local_ip;
 	int err;
-	DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
-			    .attach_point = BPF_TC_INGRESS);
 
 	/* add vxlan tunnel */
 	err = add_ip6vxlan_tunnel();
@@ -503,31 +614,17 @@ static void test_ip6vxlan_tunnel(void)
 	skel = test_tunnel_kern__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
 		goto done;
-	ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV1);
-	if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan11 ifindex"))
-		goto done;
-	tc_hook.ifindex = ifindex;
 	get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src);
 	set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src);
-	if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
-		goto done;
-	if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
-		goto done;
-	if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+	if (tc_prog_attach(IP6VXLAN_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
 		goto done;
 
 	/* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
 	nstoken = open_netns("at_ns0");
 	if (!ASSERT_OK_PTR(nstoken, "setns src"))
 		goto done;
-	ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV0);
-	if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan00 ifindex"))
-		goto done;
-	tc_hook.ifindex = ifindex;
 	set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst);
-	if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
-		goto done;
-	if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd))
+	if (tc_prog_attach(IP6VXLAN_TUNL_DEV0, -1, set_dst_prog_fd))
 		goto done;
 	close_netns(nstoken);
 
@@ -541,9 +638,7 @@ static void test_ip6vxlan_tunnel(void)
 		goto done;
 
 	/* ping test */
-	err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
-	if (!ASSERT_OK(err, "test_ping"))
-		goto done;
+	ping_dev0();
 
 done:
 	/* delete ipv6 vxlan tunnel */
@@ -557,12 +652,8 @@ done:
 static void test_ipip_tunnel(enum ipip_encap encap)
 {
 	struct test_tunnel_kern *skel = NULL;
-	struct nstoken *nstoken;
 	int set_src_prog_fd, get_src_prog_fd;
-	int ifindex = -1;
 	int err;
-	DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
-			    .attach_point = BPF_TC_INGRESS);
 
 	/* add ipip tunnel */
 	err = add_ipip_tunnel(encap);
@@ -573,10 +664,6 @@ static void test_ipip_tunnel(enum ipip_encap encap)
 	skel = test_tunnel_kern__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
 		goto done;
-	ifindex = if_nametoindex(IPIP_TUNL_DEV1);
-	if (!ASSERT_NEQ(ifindex, 0, "ipip11 ifindex"))
-		goto done;
-	tc_hook.ifindex = ifindex;
 
 	switch (encap) {
 	case FOU:
@@ -598,26 +685,11 @@ static void test_ipip_tunnel(enum ipip_encap encap)
 			skel->progs.ipip_set_tunnel);
 	}
 
-	if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
-		goto done;
-	if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
-		goto done;
-	if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
-		goto done;
-
-	/* ping from root namespace test */
-	err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
-	if (!ASSERT_OK(err, "test_ping"))
+	if (tc_prog_attach(IPIP_TUNL_DEV1, get_src_prog_fd, set_src_prog_fd))
 		goto done;
 
-	/* ping from at_ns0 namespace test */
-	nstoken = open_netns("at_ns0");
-	if (!ASSERT_OK_PTR(nstoken, "setns"))
-		goto done;
-	err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
-	if (!ASSERT_OK(err, "test_ping"))
-		goto done;
-	close_netns(nstoken);
+	ping_dev0();
+	ping_dev1();
 
 done:
 	/* delete ipip tunnel */
@@ -628,11 +700,8 @@ done:
 
 static void test_xfrm_tunnel(void)
 {
-	DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
-			    .attach_point = BPF_TC_INGRESS);
 	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
 	struct test_tunnel_kern *skel = NULL;
-	struct nstoken *nstoken;
 	int xdp_prog_fd;
 	int tc_prog_fd;
 	int ifindex;
@@ -646,19 +715,16 @@ static void test_xfrm_tunnel(void)
 	if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
 		goto done;
 
-	ifindex = if_nametoindex("veth1");
-	if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
-		goto done;
 
 	/* attach tc prog to tunnel dev */
-	tc_hook.ifindex = ifindex;
 	tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state);
-	if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd"))
-		goto done;
-	if (attach_tc_prog(&tc_hook, tc_prog_fd, -1))
+	if (tc_prog_attach("veth1", tc_prog_fd, -1))
 		goto done;
 
 	/* attach xdp prog to tunnel dev */
+	ifindex = if_nametoindex("veth1");
+	if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
+		goto done;
 	xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp);
 	if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd"))
 		goto done;
@@ -666,14 +732,7 @@ static void test_xfrm_tunnel(void)
 	if (!ASSERT_OK(err, "bpf_xdp_attach"))
 		goto done;
 
-	/* ping from at_ns0 namespace test */
-	nstoken = open_netns("at_ns0");
-	if (!ASSERT_OK_PTR(nstoken, "setns"))
-		goto done;
-	err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
-	close_netns(nstoken);
-	if (!ASSERT_OK(err, "test_ping"))
-		goto done;
+	ping_dev1();
 
 	if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id"))
 		goto done;
@@ -690,6 +749,281 @@ done:
 		test_tunnel_kern__destroy(skel);
 }
 
+enum gre_test {
+	GRE,
+	GRE_NOKEY,
+	GRETAP,
+	GRETAP_NOKEY,
+};
+
+static void test_gre_tunnel(enum gre_test test)
+{
+	struct test_tunnel_kern *skel;
+	int set_fd, get_fd;
+	int err;
+
+	skel = test_tunnel_kern__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+		return;
+
+	switch (test) {
+	case GRE:
+		err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq");
+		set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key);
+		get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+		break;
+	case GRE_NOKEY:
+		err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gre", "seq key 2");
+		set_fd = bpf_program__fd(skel->progs.gre_set_tunnel);
+		get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+		break;
+	case GRETAP:
+		err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq");
+		set_fd = bpf_program__fd(skel->progs.gre_set_tunnel_no_key);
+		get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+		break;
+	case GRETAP_NOKEY:
+		err = add_ipv4_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1, "gretap", "seq key 2");
+		set_fd = bpf_program__fd(skel->progs.gre_set_tunnel);
+		get_fd = bpf_program__fd(skel->progs.gre_get_tunnel);
+		break;
+	}
+	if (!ASSERT_OK(err, "add tunnel"))
+		goto done;
+
+	if (tc_prog_attach(GRE_TUNL_DEV1, get_fd, set_fd))
+		goto done;
+
+	ping_dev0();
+	ping_dev1();
+
+done:
+	delete_tunnel(GRE_TUNL_DEV0, GRE_TUNL_DEV1);
+	test_tunnel_kern__destroy(skel);
+}
+
+enum ip6gre_test {
+	IP6GRE,
+	IP6GRETAP
+};
+
+static void test_ip6gre_tunnel(enum ip6gre_test test)
+{
+	struct test_tunnel_kern *skel;
+	int set_fd, get_fd;
+	int err;
+
+	skel = test_tunnel_kern__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+		return;
+
+	switch (test) {
+	case IP6GRE:
+		err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1,
+				      "ip6gre", "flowlabel 0xbcdef key 2");
+		break;
+	case IP6GRETAP:
+		err = add_ipv6_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1,
+				      "ip6gretap", "flowlabel 0xbcdef key 2");
+		break;
+	}
+	if (!ASSERT_OK(err, "add tunnel"))
+		goto done;
+
+	set_fd = bpf_program__fd(skel->progs.ip6gretap_set_tunnel);
+	get_fd = bpf_program__fd(skel->progs.ip6gretap_get_tunnel);
+	if (tc_prog_attach(IP6GRE_TUNL_DEV1, get_fd, set_fd))
+		goto done;
+
+	ping6_veth0();
+	ping6_dev1();
+	ping_dev0();
+	ping_dev1();
+done:
+	delete_tunnel(IP6GRE_TUNL_DEV0, IP6GRE_TUNL_DEV1);
+	test_tunnel_kern__destroy(skel);
+}
+
+enum erspan_test {
+	V1,
+	V2
+};
+
+static void test_erspan_tunnel(enum erspan_test test)
+{
+	struct test_tunnel_kern *skel;
+	int set_fd, get_fd;
+	int err;
+
+	skel = test_tunnel_kern__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+		return;
+
+	switch (test) {
+	case V1:
+		err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1,
+				      "erspan", "seq key 2 erspan_ver 1 erspan 123");
+		break;
+	case V2:
+		err = add_ipv4_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1,
+				      "erspan",
+				      "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 3");
+		break;
+	}
+	if (!ASSERT_OK(err, "add tunnel"))
+		goto done;
+
+	set_fd = bpf_program__fd(skel->progs.erspan_set_tunnel);
+	get_fd = bpf_program__fd(skel->progs.erspan_get_tunnel);
+	if (tc_prog_attach(ERSPAN_TUNL_DEV1, get_fd, set_fd))
+		goto done;
+
+	ping_dev0();
+	ping_dev1();
+done:
+	delete_tunnel(ERSPAN_TUNL_DEV0, ERSPAN_TUNL_DEV1);
+	test_tunnel_kern__destroy(skel);
+}
+
+static void test_ip6erspan_tunnel(enum erspan_test test)
+{
+	struct test_tunnel_kern *skel;
+	int set_fd, get_fd;
+	int err;
+
+	skel = test_tunnel_kern__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+		return;
+
+	switch (test) {
+	case V1:
+		err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1,
+				      "ip6erspan", "seq key 2 erspan_ver 1 erspan 123");
+		break;
+	case V2:
+		err = add_ipv6_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1,
+				      "ip6erspan",
+				      "seq key 2 erspan_ver 2 erspan_dir egress erspan_hwid 7");
+		break;
+	}
+	if (!ASSERT_OK(err, "add tunnel"))
+		goto done;
+
+	set_fd = bpf_program__fd(skel->progs.ip4ip6erspan_set_tunnel);
+	get_fd = bpf_program__fd(skel->progs.ip4ip6erspan_get_tunnel);
+	if (tc_prog_attach(IP6ERSPAN_TUNL_DEV1, get_fd, set_fd))
+		goto done;
+
+	ping6_veth0();
+	ping_dev1();
+done:
+	delete_tunnel(IP6ERSPAN_TUNL_DEV0, IP6ERSPAN_TUNL_DEV1);
+	test_tunnel_kern__destroy(skel);
+}
+
+static void test_geneve_tunnel(void)
+{
+	struct test_tunnel_kern *skel;
+	int set_fd, get_fd;
+	int err;
+
+	skel = test_tunnel_kern__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+		return;
+
+	err = add_geneve_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1,
+				"geneve", "dstport 6081");
+	if (!ASSERT_OK(err, "add tunnel"))
+		goto done;
+
+	set_fd = bpf_program__fd(skel->progs.geneve_set_tunnel);
+	get_fd = bpf_program__fd(skel->progs.geneve_get_tunnel);
+	if (tc_prog_attach(GENEVE_TUNL_DEV1, get_fd, set_fd))
+		goto done;
+
+	ping_dev0();
+	ping_dev1();
+done:
+	delete_tunnel(GENEVE_TUNL_DEV0, GENEVE_TUNL_DEV1);
+	test_tunnel_kern__destroy(skel);
+}
+
+static void test_ip6geneve_tunnel(void)
+{
+	struct test_tunnel_kern *skel;
+	int set_fd, get_fd;
+	int err;
+
+	skel = test_tunnel_kern__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+		return;
+
+	err = add_ip6geneve_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1,
+				   "geneve", "");
+	if (!ASSERT_OK(err, "add tunnel"))
+		goto done;
+
+	set_fd = bpf_program__fd(skel->progs.ip6geneve_set_tunnel);
+	get_fd = bpf_program__fd(skel->progs.ip6geneve_get_tunnel);
+	if (tc_prog_attach(IP6GENEVE_TUNL_DEV1, get_fd, set_fd))
+		goto done;
+
+	ping_dev0();
+	ping_dev1();
+done:
+	delete_tunnel(IP6GENEVE_TUNL_DEV0, IP6GENEVE_TUNL_DEV1);
+	test_tunnel_kern__destroy(skel);
+}
+
+enum ip6tnl_test {
+	IPIP6,
+	IP6IP6
+};
+
+static void test_ip6tnl_tunnel(enum ip6tnl_test test)
+{
+	struct test_tunnel_kern *skel;
+	int set_fd, get_fd;
+	int err;
+
+	skel = test_tunnel_kern__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+		return;
+
+	err = add_ipv6_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1, "ip6tnl", "");
+	if (!ASSERT_OK(err, "add tunnel"))
+		goto done;
+
+	switch (test) {
+	case IPIP6:
+		set_fd = bpf_program__fd(skel->progs.ipip6_set_tunnel);
+		get_fd = bpf_program__fd(skel->progs.ipip6_get_tunnel);
+		break;
+	case IP6IP6:
+		set_fd = bpf_program__fd(skel->progs.ip6ip6_set_tunnel);
+		get_fd = bpf_program__fd(skel->progs.ip6ip6_get_tunnel);
+		break;
+	}
+	if (tc_prog_attach(IP6TNL_TUNL_DEV1, get_fd, set_fd))
+		goto done;
+
+	ping6_veth0();
+	switch (test) {
+	case IPIP6:
+		ping_dev0();
+		ping_dev1();
+		break;
+	case IP6IP6:
+		ping6_dev0();
+		ping6_dev1();
+		break;
+	}
+
+done:
+	delete_tunnel(IP6TNL_TUNL_DEV0, IP6TNL_TUNL_DEV1);
+	test_tunnel_kern__destroy(skel);
+}
+
 #define RUN_TEST(name, ...)						\
 	({								\
 		if (test__start_subtest(#name)) {			\
@@ -707,6 +1041,20 @@ static void *test_tunnel_run_tests(void *arg)
 	RUN_TEST(ipip_tunnel, FOU);
 	RUN_TEST(ipip_tunnel, GUE);
 	RUN_TEST(xfrm_tunnel);
+	RUN_TEST(gre_tunnel, GRE);
+	RUN_TEST(gre_tunnel, GRE_NOKEY);
+	RUN_TEST(gre_tunnel, GRETAP);
+	RUN_TEST(gre_tunnel, GRETAP_NOKEY);
+	RUN_TEST(ip6gre_tunnel, IP6GRE);
+	RUN_TEST(ip6gre_tunnel, IP6GRETAP);
+	RUN_TEST(erspan_tunnel, V1);
+	RUN_TEST(erspan_tunnel, V2);
+	RUN_TEST(ip6erspan_tunnel, V1);
+	RUN_TEST(ip6erspan_tunnel, V2);
+	RUN_TEST(geneve_tunnel);
+	RUN_TEST(ip6geneve_tunnel);
+	RUN_TEST(ip6tnl_tunnel, IPIP6);
+	RUN_TEST(ip6tnl_tunnel, IP6IP6);
 
 	return NULL;
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/test_veristat.c b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
new file mode 100644
index 000000000000..b38c16b4247f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_veristat.c
@@ -0,0 +1,261 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <string.h>
+#include <stdio.h>
+
+#define __CHECK_STR(str, name)					    \
+	do {							    \
+		if (!ASSERT_HAS_SUBSTR(fix->output, (str), (name))) \
+			goto out;				    \
+	} while (0)
+
+struct fixture {
+	char tmpfile[80];
+	int fd;
+	char *output;
+	size_t sz;
+	char veristat[80];
+};
+
+static struct fixture *init_fixture(void)
+{
+	struct fixture *fix = malloc(sizeof(struct fixture));
+
+	/* for no_alu32 and cpuv4 veristat is in parent folder */
+	if (access("./veristat", F_OK) == 0)
+		strcpy(fix->veristat, "./veristat");
+	else if (access("../veristat", F_OK) == 0)
+		strcpy(fix->veristat, "../veristat");
+	else
+		PRINT_FAIL("Can't find veristat binary");
+
+	snprintf(fix->tmpfile, sizeof(fix->tmpfile), "/tmp/test_veristat.XXXXXX");
+	fix->fd = mkstemp(fix->tmpfile);
+	fix->sz = 1000000;
+	fix->output = malloc(fix->sz);
+	return fix;
+}
+
+static void teardown_fixture(struct fixture *fix)
+{
+	free(fix->output);
+	close(fix->fd);
+	remove(fix->tmpfile);
+	free(fix);
+}
+
+static void test_set_global_vars_succeeds(void)
+{
+	struct fixture *fix = init_fixture();
+
+	SYS(out,
+	    "%s set_global_vars.bpf.o"\
+	    " -G \"var_s64 = 0xf000000000000001\" "\
+	    " -G \"var_u64 = 0xfedcba9876543210\" "\
+	    " -G \"var_s32 = -0x80000000\" "\
+	    " -G \"var_u32 = 0x76543210\" "\
+	    " -G \"var_s16 = -32768\" "\
+	    " -G \"var_u16 = 60652\" "\
+	    " -G \"var_s8 = -128\" "\
+	    " -G \"var_u8 = 255\" "\
+	    " -G \"var_ea = EA2\" "\
+	    " -G \"var_eb  =  EB2\" "\
+	    " -G \"var_ec=EC2\" "\
+	    " -G \"var_b = 1\" "\
+	    " -G \"struct1[2].struct2[1][2].u.var_u8[2]=170\" "\
+	    " -G \"union1.struct3.var_u8_l = 0xaa\" "\
+	    " -G \"union1.struct3.var_u8_h = 0xaa\" "\
+	    " -G \"arr[3]= 171\" "	\
+	    " -G \"arr[EA2] =172\" "	\
+	    " -G \"enum_arr[EC2]=EA3\" " \
+	    " -G \"three_d[31][7][EA2]=173\"" \
+	    " -G \"struct1[2].struct2[1][2].u.mat[5][3]=174\" " \
+	    " -G \"struct11 [ 7 ] [ 5 ] .struct2[0][1].u.mat[3][0] = 175\" " \
+	    " -vl2 > %s", fix->veristat, fix->tmpfile);
+
+	read(fix->fd, fix->output, fix->sz);
+	__CHECK_STR("=0xf000000000000001 ", "var_s64 = 0xf000000000000001");
+	__CHECK_STR("=0xfedcba9876543210 ", "var_u64 = 0xfedcba9876543210");
+	__CHECK_STR("=0x80000000 ", "var_s32 = -0x80000000");
+	__CHECK_STR("=0x76543210 ", "var_u32 = 0x76543210");
+	__CHECK_STR("=0x8000 ", "var_s16 = -32768");
+	__CHECK_STR("=0xecec ", "var_u16 = 60652");
+	__CHECK_STR("=128 ", "var_s8 = -128");
+	__CHECK_STR("=255 ", "var_u8 = 255");
+	__CHECK_STR("=11 ", "var_ea = EA2");
+	__CHECK_STR("=12 ", "var_eb = EB2");
+	__CHECK_STR("=13 ", "var_ec = EC2");
+	__CHECK_STR("=1 ", "var_b = 1");
+	__CHECK_STR("=170 ", "struct1[2].struct2[1][2].u.var_u8[2]=170");
+	__CHECK_STR("=0xaaaa ", "union1.var_u16 = 0xaaaa");
+	__CHECK_STR("=171 ", "arr[3]= 171");
+	__CHECK_STR("=172 ", "arr[EA2] =172");
+	__CHECK_STR("=10 ", "enum_arr[EC2]=EA3");
+	__CHECK_STR("=173 ", "matrix[31][7][11]=173");
+	__CHECK_STR("=174 ", "struct1[2].struct2[1][2].u.mat[5][3]=174");
+	__CHECK_STR("=175 ", "struct11[7][5].struct2[0][1].u.mat[3][0]=175");
+
+out:
+	teardown_fixture(fix);
+}
+
+static void test_set_global_vars_from_file_succeeds(void)
+{
+	struct fixture *fix = init_fixture();
+	char input_file[80];
+	const char *vars = "var_s16 = -32768\nvar_u16 = 60652";
+	int fd;
+
+	snprintf(input_file, sizeof(input_file), "/tmp/veristat_input.XXXXXX");
+	fd = mkstemp(input_file);
+	if (!ASSERT_GE(fd, 0, "valid fd"))
+		goto out;
+
+	write(fd, vars, strlen(vars));
+	syncfs(fd);
+	SYS(out, "%s set_global_vars.bpf.o -G \"@%s\" -vl2 > %s",
+	    fix->veristat, input_file, fix->tmpfile);
+	read(fix->fd, fix->output, fix->sz);
+	__CHECK_STR("=0x8000 ", "var_s16 = -32768");
+	__CHECK_STR("=0xecec ", "var_u16 = 60652");
+
+out:
+	close(fd);
+	remove(input_file);
+	teardown_fixture(fix);
+}
+
+static void test_set_global_vars_out_of_range(void)
+{
+	struct fixture *fix = init_fixture();
+
+	SYS_FAIL(out,
+		 "%s set_global_vars.bpf.o -G \"var_s32 = 2147483648\" -vl2 2> %s",
+		 fix->veristat, fix->tmpfile);
+
+	read(fix->fd, fix->output, fix->sz);
+	__CHECK_STR("is out of range [-2147483648; 2147483647]", "out of range");
+
+out:
+	teardown_fixture(fix);
+}
+
+static void test_unsupported_ptr_array_type(void)
+{
+	struct fixture *fix = init_fixture();
+
+	SYS_FAIL(out,
+		 "%s set_global_vars.bpf.o -G \"ptr_arr[0] = 0\" -vl2 2> %s",
+		 fix->veristat, fix->tmpfile);
+
+	read(fix->fd, fix->output, fix->sz);
+	__CHECK_STR("Can't set ptr_arr[0]. Only ints and enums are supported", "ptr_arr");
+
+out:
+	teardown_fixture(fix);
+}
+
+static void test_array_out_of_bounds(void)
+{
+	struct fixture *fix = init_fixture();
+
+	SYS_FAIL(out,
+		 "%s set_global_vars.bpf.o -G \"arr[99] = 0\" -vl2 2> %s",
+		 fix->veristat, fix->tmpfile);
+
+	read(fix->fd, fix->output, fix->sz);
+	__CHECK_STR("Array index 99 is out of bounds", "arr[99]");
+
+out:
+	teardown_fixture(fix);
+}
+
+static void test_array_index_not_found(void)
+{
+	struct fixture *fix = init_fixture();
+
+	SYS_FAIL(out,
+		 "%s set_global_vars.bpf.o -G \"arr[EG2] = 0\" -vl2 2> %s",
+		 fix->veristat, fix->tmpfile);
+
+	read(fix->fd, fix->output, fix->sz);
+	__CHECK_STR("Can't resolve enum value EG2", "arr[EG2]");
+
+out:
+	teardown_fixture(fix);
+}
+
+static void test_array_index_for_non_array(void)
+{
+	struct fixture *fix = init_fixture();
+
+	SYS_FAIL(out,
+		 "%s set_global_vars.bpf.o -G \"var_b[0] = 1\" -vl2 2> %s",
+		 fix->veristat, fix->tmpfile);
+
+	pread(fix->fd, fix->output, fix->sz, 0);
+	__CHECK_STR("Array index is not expected for var_b", "var_b[0] = 1");
+
+	SYS_FAIL(out,
+		 "%s set_global_vars.bpf.o -G \"union1.struct3[0].var_u8_l=1\" -vl2 2> %s",
+		 fix->veristat, fix->tmpfile);
+
+	pread(fix->fd, fix->output, fix->sz, 0);
+	__CHECK_STR("Array index is not expected for struct3", "union1.struct3[0].var_u8_l=1");
+
+out:
+	teardown_fixture(fix);
+}
+
+static void test_no_array_index_for_array(void)
+{
+	struct fixture *fix = init_fixture();
+
+	SYS_FAIL(out,
+		 "%s set_global_vars.bpf.o -G \"arr = 1\" -vl2 2> %s",
+		 fix->veristat, fix->tmpfile);
+
+	pread(fix->fd, fix->output, fix->sz, 0);
+	__CHECK_STR("Can't set arr. Only ints and enums are supported", "arr = 1");
+
+	SYS_FAIL(out,
+		 "%s set_global_vars.bpf.o -G \"struct1[0].struct2.u.var_u8[2]=1\" -vl2 2> %s",
+		 fix->veristat, fix->tmpfile);
+
+	pread(fix->fd, fix->output, fix->sz, 0);
+	__CHECK_STR("Can't resolve field u for non-composite type", "struct1[0].struct2.u.var_u8[2]=1");
+
+out:
+	teardown_fixture(fix);
+}
+
+void test_veristat(void)
+{
+	if (test__start_subtest("set_global_vars_succeeds"))
+		test_set_global_vars_succeeds();
+
+	if (test__start_subtest("set_global_vars_out_of_range"))
+		test_set_global_vars_out_of_range();
+
+	if (test__start_subtest("set_global_vars_from_file_succeeds"))
+		test_set_global_vars_from_file_succeeds();
+
+	if (test__start_subtest("test_unsupported_ptr_array_type"))
+		test_unsupported_ptr_array_type();
+
+	if (test__start_subtest("test_array_out_of_bounds"))
+		test_array_out_of_bounds();
+
+	if (test__start_subtest("test_array_index_not_found"))
+		test_array_index_not_found();
+
+	if (test__start_subtest("test_array_index_for_non_array"))
+		test_array_index_for_non_array();
+
+	if (test__start_subtest("test_no_array_index_for_array"))
+		test_no_array_index_for_array();
+
+}
+
+#undef __CHECK_STR
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
index 8d75424fe6bc..3e98a1665936 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_xdp_veth.c
@@ -3,17 +3,50 @@
 /* Create 3 namespaces with 3 veth peers, and forward packets in-between using
  * native XDP
  *
- *                      XDP_TX
- * NS1(veth11)        NS2(veth22)        NS3(veth33)
- *      |                  |                  |
- *      |                  |                  |
- *   (veth1,            (veth2,            (veth3,
- *   id:111)            id:122)            id:133)
- *     ^ |                ^ |                ^ |
- *     | |  XDP_REDIRECT  | |  XDP_REDIRECT  | |
- *     | ------------------ ------------------ |
- *     -----------------------------------------
- *                    XDP_REDIRECT
+ * Network topology:
+ *  ----------        ----------       ----------
+ *  |  NS1   |        |  NS2   |       |  NS3   |
+ *  | veth11 |        | veth22 |       | veth33 |
+ *  ----|-----        -----|----       -----|----
+ *      |                  |                |
+ *  ----|------------------|----------------|----
+ *  | veth1              veth2            veth3 |
+ *  |                                           |
+ *  |                     NSO                   |
+ *  ---------------------------------------------
+ *
+ * Test cases:
+ *  - [test_xdp_veth_redirect] : ping veth33 from veth11
+ *
+ *    veth11             veth22              veth33
+ *  (XDP_PASS)          (XDP_TX)           (XDP_PASS)
+ *       |                  |                  |
+ *       |                  |                  |
+ *     veth1             veth2              veth3
+ * (XDP_REDIRECT)     (XDP_REDIRECT)     (XDP_REDIRECT)
+ *      ^ |                ^ |                ^ |
+ *      | |                | |                | |
+ *      | ------------------ ------------------ |
+ *      -----------------------------------------
+ *
+ * - [test_xdp_veth_broadcast_redirect]: broadcast from veth11
+ *     - IPv4 ping : BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS
+ *          -> echo request received by all except veth11
+ *     - IPv4 ping : BPF_F_BROADCAST
+ *          -> echo request received by all veth
+ * - [test_xdp_veth_egress]:
+ *     - all src mac should be the magic mac
+ *
+ *    veth11             veth22              veth33
+ *  (XDP_PASS)         (XDP_PASS)          (XDP_PASS)
+ *       |                  |                  |
+ *       |                  |                  |
+ *     veth1		  veth2              veth3
+ * (XDP_REDIRECT)     (XDP_REDIRECT)     (XDP_REDIRECT)
+ *      |                   ^                  ^
+ *      |                   |                  |
+ *      ----------------------------------------
+ *
  */
 
 #define _GNU_SOURCE
@@ -22,192 +55,545 @@
 #include "network_helpers.h"
 #include "xdp_dummy.skel.h"
 #include "xdp_redirect_map.skel.h"
+#include "xdp_redirect_multi_kern.skel.h"
 #include "xdp_tx.skel.h"
+#include <uapi/linux/if_link.h>
 
 #define VETH_PAIRS_COUNT	3
-#define NS_SUFFIX_LEN		6
-#define VETH_NAME_MAX_LEN	16
+#define VETH_NAME_MAX_LEN	32
+#define IP_MAX_LEN		16
 #define IP_SRC				"10.1.1.11"
 #define IP_DST				"10.1.1.33"
-#define IP_CMD_MAX_LEN		128
-
-struct skeletons {
-	struct xdp_dummy *xdp_dummy;
-	struct xdp_tx *xdp_tx;
-	struct xdp_redirect_map *xdp_redirect_maps;
-};
+#define IP_NEIGH			"10.1.1.253"
+#define PROG_NAME_MAX_LEN	128
+#define NS_NAME_MAX_LEN		32
 
 struct veth_configuration {
 	char local_veth[VETH_NAME_MAX_LEN]; /* Interface in main namespace */
 	char remote_veth[VETH_NAME_MAX_LEN]; /* Peer interface in dedicated namespace*/
-	const char *namespace; /* Namespace for the remote veth */
-	char next_veth[VETH_NAME_MAX_LEN]; /* Local interface to redirect traffic to */
-	char *remote_addr; /* IP address of the remote veth */
+	char namespace[NS_NAME_MAX_LEN]; /* Namespace for the remote veth */
+	int next_veth; /* Local interface to redirect traffic to */
+	char remote_addr[IP_MAX_LEN]; /* IP address of the remote veth */
 };
 
-static struct veth_configuration config[VETH_PAIRS_COUNT] = {
-	{
-		.local_veth = "veth1",
-		.remote_veth = "veth11",
-		.next_veth = "veth2",
-		.remote_addr = IP_SRC,
-		.namespace = "ns-veth11"
-	},
-	{
-		.local_veth = "veth2",
-		.remote_veth = "veth22",
-		.next_veth = "veth3",
-		.remote_addr = NULL,
-		.namespace = "ns-veth22"
-	},
+struct net_configuration {
+	char ns0_name[NS_NAME_MAX_LEN];
+	struct veth_configuration veth_cfg[VETH_PAIRS_COUNT];
+};
+
+static const struct net_configuration default_config = {
+	.ns0_name = "ns0-",
 	{
-		.local_veth = "veth3",
-		.remote_veth = "veth33",
-		.next_veth = "veth1",
-		.remote_addr = IP_DST,
-		.namespace = "ns-veth33"
+		{
+			.local_veth = "veth1-",
+			.remote_veth = "veth11",
+			.next_veth = 1,
+			.remote_addr = IP_SRC,
+			.namespace = "ns-veth11-"
+		},
+		{
+			.local_veth = "veth2-",
+			.remote_veth = "veth22",
+			.next_veth = 2,
+			.remote_addr = "",
+			.namespace = "ns-veth22-"
+		},
+		{
+			.local_veth = "veth3-",
+			.remote_veth = "veth33",
+			.next_veth = 0,
+			.remote_addr = IP_DST,
+			.namespace = "ns-veth33-"
+		}
 	}
 };
 
-static int attach_programs_to_veth_pair(struct skeletons *skeletons, int index)
+struct prog_configuration {
+	char local_name[PROG_NAME_MAX_LEN]; /* BPF prog to attach to local_veth */
+	char remote_name[PROG_NAME_MAX_LEN]; /* BPF prog to attach to remote_veth */
+	u32 local_flags; /* XDP flags to use on local_veth */
+	u32 remote_flags; /* XDP flags to use on remote_veth */
+};
+
+static int attach_programs_to_veth_pair(struct bpf_object **objs, size_t nb_obj,
+					struct net_configuration *net_config,
+					struct prog_configuration *prog, int index)
 {
 	struct bpf_program *local_prog, *remote_prog;
-	struct bpf_link **local_link, **remote_link;
 	struct nstoken *nstoken;
-	struct bpf_link *link;
-	int interface;
-
-	switch (index) {
-	case 0:
-		local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_0;
-		local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_0;
-		remote_prog = skeletons->xdp_dummy->progs.xdp_dummy_prog;
-		remote_link = &skeletons->xdp_dummy->links.xdp_dummy_prog;
-		break;
-	case 1:
-		local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_1;
-		local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_1;
-		remote_prog = skeletons->xdp_tx->progs.xdp_tx;
-		remote_link = &skeletons->xdp_tx->links.xdp_tx;
-		break;
-	case 2:
-		local_prog = skeletons->xdp_redirect_maps->progs.xdp_redirect_map_2;
-		local_link = &skeletons->xdp_redirect_maps->links.xdp_redirect_map_2;
-		remote_prog = skeletons->xdp_dummy->progs.xdp_dummy_prog;
-		remote_link = &skeletons->xdp_dummy->links.xdp_dummy_prog;
-		break;
+	int interface, ret, i;
+
+	for (i = 0; i < nb_obj; i++) {
+		local_prog = bpf_object__find_program_by_name(objs[i], prog[index].local_name);
+		if (local_prog)
+			break;
 	}
-	interface = if_nametoindex(config[index].local_veth);
+	if (!ASSERT_OK_PTR(local_prog, "find local program"))
+		return -1;
+
+	for (i = 0; i < nb_obj; i++) {
+		remote_prog = bpf_object__find_program_by_name(objs[i], prog[index].remote_name);
+		if (remote_prog)
+			break;
+	}
+	if (!ASSERT_OK_PTR(remote_prog, "find remote program"))
+		return -1;
+
+	interface = if_nametoindex(net_config->veth_cfg[index].local_veth);
 	if (!ASSERT_NEQ(interface, 0, "non zero interface index"))
 		return -1;
-	link = bpf_program__attach_xdp(local_prog, interface);
-	if (!ASSERT_OK_PTR(link, "attach xdp program to local veth"))
+
+	ret = bpf_xdp_attach(interface, bpf_program__fd(local_prog),
+			     prog[index].local_flags, NULL);
+	if (!ASSERT_OK(ret, "attach xdp program to local veth"))
 		return -1;
-	*local_link = link;
-	nstoken = open_netns(config[index].namespace);
+
+	nstoken = open_netns(net_config->veth_cfg[index].namespace);
 	if (!ASSERT_OK_PTR(nstoken, "switch to remote veth namespace"))
 		return -1;
-	interface = if_nametoindex(config[index].remote_veth);
+
+	interface = if_nametoindex(net_config->veth_cfg[index].remote_veth);
 	if (!ASSERT_NEQ(interface, 0, "non zero interface index")) {
 		close_netns(nstoken);
 		return -1;
 	}
-	link = bpf_program__attach_xdp(remote_prog, interface);
-	*remote_link = link;
-	close_netns(nstoken);
-	if (!ASSERT_OK_PTR(link, "attach xdp program to remote veth"))
+
+	ret = bpf_xdp_attach(interface, bpf_program__fd(remote_prog),
+			     prog[index].remote_flags, NULL);
+	if (!ASSERT_OK(ret, "attach xdp program to remote veth")) {
+		close_netns(nstoken);
 		return -1;
+	}
 
+	close_netns(nstoken);
 	return 0;
 }
 
-static int configure_network(struct skeletons *skeletons)
+static int create_network(struct net_configuration *net_config)
 {
-	int interface_id;
-	int map_fd;
-	int err;
-	int i = 0;
+	struct nstoken *nstoken = NULL;
+	int i, err;
+
+	memcpy(net_config, &default_config, sizeof(struct net_configuration));
+
+	/* Create unique namespaces */
+	err = append_tid(net_config->ns0_name, NS_NAME_MAX_LEN);
+	if (!ASSERT_OK(err, "append TID to ns0 name"))
+		goto fail;
+	SYS(fail, "ip netns add %s", net_config->ns0_name);
 
-	/* First create and configure all interfaces */
 	for (i = 0; i < VETH_PAIRS_COUNT; i++) {
-		SYS(fail, "ip netns add %s", config[i].namespace);
-		SYS(fail, "ip link add %s type veth peer name %s netns %s",
-		    config[i].local_veth, config[i].remote_veth, config[i].namespace);
-		SYS(fail, "ip link set dev %s up", config[i].local_veth);
-		if (config[i].remote_addr)
-			SYS(fail, "ip -n %s addr add %s/24 dev %s",	config[i].namespace,
-			    config[i].remote_addr, config[i].remote_veth);
-		SYS(fail, "ip -n %s link set dev %s up", config[i].namespace,
-		    config[i].remote_veth);
+		err = append_tid(net_config->veth_cfg[i].namespace, NS_NAME_MAX_LEN);
+		if (!ASSERT_OK(err, "append TID to ns name"))
+			goto fail;
+		SYS(fail, "ip netns add %s", net_config->veth_cfg[i].namespace);
 	}
 
-	/* Then configure the redirect map and attach programs to interfaces */
-	map_fd = bpf_map__fd(skeletons->xdp_redirect_maps->maps.tx_port);
-	if (!ASSERT_GE(map_fd, 0, "open redirect map"))
+	/* Create interfaces */
+	nstoken = open_netns(net_config->ns0_name);
+	if (!nstoken)
 		goto fail;
+
 	for (i = 0; i < VETH_PAIRS_COUNT; i++) {
-		interface_id = if_nametoindex(config[i].next_veth);
-		if (!ASSERT_NEQ(interface_id, 0, "non zero interface index"))
-			goto fail;
-		err = bpf_map_update_elem(map_fd, &i, &interface_id, BPF_ANY);
-		if (!ASSERT_OK(err, "configure interface redirection through map"))
-			goto fail;
-		if (attach_programs_to_veth_pair(skeletons, i))
-			goto fail;
+		SYS(fail, "ip link add %s type veth peer name %s netns %s",
+		    net_config->veth_cfg[i].local_veth, net_config->veth_cfg[i].remote_veth,
+		    net_config->veth_cfg[i].namespace);
+		SYS(fail, "ip link set dev %s up", net_config->veth_cfg[i].local_veth);
+		if (net_config->veth_cfg[i].remote_addr[0])
+			SYS(fail, "ip -n %s addr add %s/24 dev %s",
+			    net_config->veth_cfg[i].namespace,
+			    net_config->veth_cfg[i].remote_addr,
+			    net_config->veth_cfg[i].remote_veth);
+		SYS(fail, "ip -n %s link set dev %s up", net_config->veth_cfg[i].namespace,
+		    net_config->veth_cfg[i].remote_veth);
 	}
 
+	close_netns(nstoken);
 	return 0;
 
 fail:
+	close_netns(nstoken);
 	return -1;
 }
 
-static void cleanup_network(void)
+static void cleanup_network(struct net_configuration *net_config)
 {
 	int i;
 
-	/* Deleting namespaces is enough to automatically remove veth pairs as well
-	 */
+	SYS_NOFAIL("ip netns del %s", net_config->ns0_name);
 	for (i = 0; i < VETH_PAIRS_COUNT; i++)
-		SYS_NOFAIL("ip netns del %s", config[i].namespace);
+		SYS_NOFAIL("ip netns del %s", net_config->veth_cfg[i].namespace);
 }
 
-static int check_ping(struct skeletons *skeletons)
+#define VETH_REDIRECT_SKEL_NB	3
+static void xdp_veth_redirect(u32 flags)
 {
+	struct prog_configuration ping_config[VETH_PAIRS_COUNT] = {
+		{
+			.local_name = "xdp_redirect_map_0",
+			.remote_name = "xdp_dummy_prog",
+			.local_flags = flags,
+			.remote_flags = flags,
+		},
+		{
+			.local_name = "xdp_redirect_map_1",
+			.remote_name = "xdp_tx",
+			.local_flags = flags,
+			.remote_flags = flags,
+		},
+		{
+			.local_name = "xdp_redirect_map_2",
+			.remote_name = "xdp_dummy_prog",
+			.local_flags = flags,
+			.remote_flags = flags,
+		}
+	};
+	struct bpf_object *bpf_objs[VETH_REDIRECT_SKEL_NB];
+	struct xdp_redirect_map *xdp_redirect_map;
+	struct net_configuration net_config;
+	struct nstoken *nstoken = NULL;
+	struct xdp_dummy *xdp_dummy;
+	struct xdp_tx *xdp_tx;
+	int map_fd;
+	int i;
+
+	xdp_dummy = xdp_dummy__open_and_load();
+	if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load"))
+		return;
+
+	xdp_tx = xdp_tx__open_and_load();
+	if (!ASSERT_OK_PTR(xdp_tx, "xdp_tx__open_and_load"))
+		goto destroy_xdp_dummy;
+
+	xdp_redirect_map = xdp_redirect_map__open_and_load();
+	if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+		goto destroy_xdp_tx;
+
+	if (!ASSERT_OK(create_network(&net_config), "create network"))
+		goto destroy_xdp_redirect_map;
+
+	/* Then configure the redirect map and attach programs to interfaces */
+	map_fd = bpf_map__fd(xdp_redirect_map->maps.tx_port);
+	if (!ASSERT_OK_FD(map_fd, "open redirect map"))
+		goto destroy_xdp_redirect_map;
+
+	bpf_objs[0] = xdp_dummy->obj;
+	bpf_objs[1] = xdp_tx->obj;
+	bpf_objs[2] = xdp_redirect_map->obj;
+
+	nstoken = open_netns(net_config.ns0_name);
+	if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+		goto destroy_xdp_redirect_map;
+
+	for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+		int next_veth = net_config.veth_cfg[i].next_veth;
+		int interface_id;
+		int err;
+
+		interface_id = if_nametoindex(net_config.veth_cfg[next_veth].local_veth);
+		if (!ASSERT_NEQ(interface_id, 0, "non zero interface index"))
+			goto destroy_xdp_redirect_map;
+		err = bpf_map_update_elem(map_fd, &i, &interface_id, BPF_ANY);
+		if (!ASSERT_OK(err, "configure interface redirection through map"))
+			goto destroy_xdp_redirect_map;
+		if (attach_programs_to_veth_pair(bpf_objs, VETH_REDIRECT_SKEL_NB,
+						 &net_config, ping_config, i))
+			goto destroy_xdp_redirect_map;
+	}
+
 	/* Test: if all interfaces are properly configured, we must be able to ping
 	 * veth33 from veth11
 	 */
-	return SYS_NOFAIL("ip netns exec %s ping -c 1 -W 1 %s > /dev/null",
-					  config[0].namespace, IP_DST);
+	ASSERT_OK(SYS_NOFAIL("ip netns exec %s ping -c 1 -W 1 %s > /dev/null",
+			     net_config.veth_cfg[0].namespace, IP_DST), "ping");
+
+destroy_xdp_redirect_map:
+	close_netns(nstoken);
+	xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_tx:
+	xdp_tx__destroy(xdp_tx);
+destroy_xdp_dummy:
+	xdp_dummy__destroy(xdp_dummy);
+
+	cleanup_network(&net_config);
 }
 
-void test_xdp_veth_redirect(void)
+#define BROADCAST_REDIRECT_SKEL_NB	2
+static void xdp_veth_broadcast_redirect(u32 attach_flags, u64 redirect_flags)
 {
-	struct skeletons skeletons = {};
+	struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = {
+		{
+			.local_name = "xdp_redirect_map_multi_prog",
+			.remote_name = "xdp_count_0",
+			.local_flags = attach_flags,
+			.remote_flags = attach_flags,
+		},
+		{
+			.local_name = "xdp_redirect_map_multi_prog",
+			.remote_name = "xdp_count_1",
+			.local_flags = attach_flags,
+			.remote_flags = attach_flags,
+		},
+		{
+			.local_name = "xdp_redirect_map_multi_prog",
+			.remote_name = "xdp_count_2",
+			.local_flags = attach_flags,
+			.remote_flags = attach_flags,
+		}
+	};
+	struct bpf_object *bpf_objs[BROADCAST_REDIRECT_SKEL_NB];
+	struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+	struct xdp_redirect_map *xdp_redirect_map;
+	struct bpf_devmap_val devmap_val = {};
+	struct net_configuration net_config;
+	struct nstoken *nstoken = NULL;
+	u16 protocol = ETH_P_IP;
+	int group_map;
+	int flags_map;
+	int cnt_map;
+	u64 cnt = 0;
+	int i, err;
 
-	skeletons.xdp_dummy = xdp_dummy__open_and_load();
-	if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load"))
+	xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+	if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load"))
 		return;
 
-	skeletons.xdp_tx = xdp_tx__open_and_load();
-	if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load"))
+	xdp_redirect_map = xdp_redirect_map__open_and_load();
+	if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+		goto destroy_xdp_redirect_multi_kern;
+
+	if (!ASSERT_OK(create_network(&net_config), "create network"))
+		goto destroy_xdp_redirect_map;
+
+	group_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_all);
+	if (!ASSERT_OK_FD(group_map, "open map_all"))
+		goto destroy_xdp_redirect_map;
+
+	flags_map = bpf_map__fd(xdp_redirect_multi_kern->maps.redirect_flags);
+	if (!ASSERT_OK_FD(group_map, "open map_all"))
+		goto destroy_xdp_redirect_map;
+
+	err = bpf_map_update_elem(flags_map, &protocol, &redirect_flags, BPF_NOEXIST);
+	if (!ASSERT_OK(err, "init IP count"))
+		goto destroy_xdp_redirect_map;
+
+	cnt_map = bpf_map__fd(xdp_redirect_map->maps.rxcnt);
+	if (!ASSERT_OK_FD(cnt_map, "open rxcnt map"))
+		goto destroy_xdp_redirect_map;
+
+	bpf_objs[0] = xdp_redirect_multi_kern->obj;
+	bpf_objs[1] = xdp_redirect_map->obj;
+
+	nstoken = open_netns(net_config.ns0_name);
+	if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+		goto destroy_xdp_redirect_map;
+
+	for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+		int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth);
+
+		if (attach_programs_to_veth_pair(bpf_objs, BROADCAST_REDIRECT_SKEL_NB,
+						 &net_config, prog_cfg, i))
+			goto destroy_xdp_redirect_map;
+
+		SYS(destroy_xdp_redirect_map,
+		    "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s",
+		    net_config.veth_cfg[i].namespace, IP_NEIGH, net_config.veth_cfg[i].remote_veth);
+
+		devmap_val.ifindex = ifindex;
+		err = bpf_map_update_elem(group_map, &ifindex, &devmap_val, 0);
+		if (!ASSERT_OK(err, "bpf_map_update_elem"))
+			goto destroy_xdp_redirect_map;
+
+	}
+
+	SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ",
+		    net_config.veth_cfg[0].namespace, IP_NEIGH);
+
+	for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+		err =  bpf_map_lookup_elem(cnt_map, &i, &cnt);
+		if (!ASSERT_OK(err, "get IP cnt"))
+			goto destroy_xdp_redirect_map;
+
+		if (redirect_flags & BPF_F_EXCLUDE_INGRESS)
+			/* veth11 shouldn't receive the ICMP requests;
+			 * others should
+			 */
+			ASSERT_EQ(cnt, i ? 4 : 0, "compare IP cnt");
+		else
+			/* All remote veth should receive the ICMP requests */
+			ASSERT_EQ(cnt, 4, "compare IP cnt");
+	}
+
+destroy_xdp_redirect_map:
+	close_netns(nstoken);
+	xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_redirect_multi_kern:
+	xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern);
+
+	cleanup_network(&net_config);
+}
+
+#define VETH_EGRESS_SKEL_NB	3
+static void xdp_veth_egress(u32 flags)
+{
+	struct prog_configuration prog_cfg[VETH_PAIRS_COUNT] = {
+		{
+			.local_name = "xdp_redirect_map_all_prog",
+			.remote_name = "xdp_dummy_prog",
+			.local_flags = flags,
+			.remote_flags = flags,
+		},
+		{
+			.local_name = "xdp_redirect_map_all_prog",
+			.remote_name = "store_mac_1",
+			.local_flags = flags,
+			.remote_flags = flags,
+		},
+		{
+			.local_name = "xdp_redirect_map_all_prog",
+			.remote_name = "store_mac_2",
+			.local_flags = flags,
+			.remote_flags = flags,
+		}
+	};
+	const char magic_mac[6] = { 0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF};
+	struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+	struct bpf_object *bpf_objs[VETH_EGRESS_SKEL_NB];
+	struct xdp_redirect_map *xdp_redirect_map;
+	struct bpf_devmap_val devmap_val = {};
+	struct net_configuration net_config;
+	int mac_map, egress_map, res_map;
+	struct nstoken *nstoken = NULL;
+	struct xdp_dummy *xdp_dummy;
+	int err;
+	int i;
+
+	xdp_dummy = xdp_dummy__open_and_load();
+	if (!ASSERT_OK_PTR(xdp_dummy, "xdp_dummy__open_and_load"))
+		return;
+
+	xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+	if (!ASSERT_OK_PTR(xdp_redirect_multi_kern, "xdp_redirect_multi_kern__open_and_load"))
 		goto destroy_xdp_dummy;
 
-	skeletons.xdp_redirect_maps = xdp_redirect_map__open_and_load();
-	if (!ASSERT_OK_PTR(skeletons.xdp_redirect_maps, "xdp_redirect_map__open_and_load"))
-		goto destroy_xdp_tx;
+	xdp_redirect_map = xdp_redirect_map__open_and_load();
+	if (!ASSERT_OK_PTR(xdp_redirect_map, "xdp_redirect_map__open_and_load"))
+		goto destroy_xdp_redirect_multi_kern;
 
-	if (configure_network(&skeletons))
+	if (!ASSERT_OK(create_network(&net_config), "create network"))
 		goto destroy_xdp_redirect_map;
 
-	ASSERT_OK(check_ping(&skeletons), "ping");
+	mac_map = bpf_map__fd(xdp_redirect_multi_kern->maps.mac_map);
+	if (!ASSERT_OK_FD(mac_map, "open mac_map"))
+		goto destroy_xdp_redirect_map;
+
+	egress_map = bpf_map__fd(xdp_redirect_multi_kern->maps.map_egress);
+	if (!ASSERT_OK_FD(egress_map, "open map_egress"))
+		goto destroy_xdp_redirect_map;
+
+	devmap_val.bpf_prog.fd = bpf_program__fd(xdp_redirect_multi_kern->progs.xdp_devmap_prog);
+
+	bpf_objs[0] = xdp_dummy->obj;
+	bpf_objs[1] = xdp_redirect_multi_kern->obj;
+	bpf_objs[2] = xdp_redirect_map->obj;
+
+	nstoken = open_netns(net_config.ns0_name);
+	if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+		goto destroy_xdp_redirect_map;
+
+	for (i = 0; i < VETH_PAIRS_COUNT; i++) {
+		int ifindex = if_nametoindex(net_config.veth_cfg[i].local_veth);
+
+		SYS(destroy_xdp_redirect_map,
+		    "ip -n %s neigh add %s lladdr 00:00:00:00:00:01 dev %s",
+		    net_config.veth_cfg[i].namespace, IP_NEIGH, net_config.veth_cfg[i].remote_veth);
+
+		if (attach_programs_to_veth_pair(bpf_objs, VETH_REDIRECT_SKEL_NB,
+						 &net_config, prog_cfg, i))
+			goto destroy_xdp_redirect_map;
+
+		err = bpf_map_update_elem(mac_map, &ifindex, magic_mac, 0);
+		if (!ASSERT_OK(err, "bpf_map_update_elem"))
+			goto destroy_xdp_redirect_map;
+
+		devmap_val.ifindex = ifindex;
+		err = bpf_map_update_elem(egress_map, &ifindex, &devmap_val, 0);
+		if (!ASSERT_OK(err, "bpf_map_update_elem"))
+			goto destroy_xdp_redirect_map;
+	}
+
+	SYS_NOFAIL("ip netns exec %s ping %s -i 0.1 -c 4 -W1 > /dev/null ",
+		    net_config.veth_cfg[0].namespace, IP_NEIGH);
+
+	res_map = bpf_map__fd(xdp_redirect_map->maps.rx_mac);
+	if (!ASSERT_OK_FD(res_map, "open rx_map"))
+		goto destroy_xdp_redirect_map;
+
+	for (i = 0; i < 2; i++) {
+		u32 key = i;
+		u64 res;
+
+		err = bpf_map_lookup_elem(res_map, &key, &res);
+		if (!ASSERT_OK(err, "get MAC res"))
+			goto destroy_xdp_redirect_map;
+
+		ASSERT_STRNEQ((const char *)&res, magic_mac, ETH_ALEN, "compare mac");
+	}
 
 destroy_xdp_redirect_map:
-	xdp_redirect_map__destroy(skeletons.xdp_redirect_maps);
-destroy_xdp_tx:
-	xdp_tx__destroy(skeletons.xdp_tx);
+	close_netns(nstoken);
+	xdp_redirect_map__destroy(xdp_redirect_map);
+destroy_xdp_redirect_multi_kern:
+	xdp_redirect_multi_kern__destroy(xdp_redirect_multi_kern);
 destroy_xdp_dummy:
-	xdp_dummy__destroy(skeletons.xdp_dummy);
+	xdp_dummy__destroy(xdp_dummy);
+
+	cleanup_network(&net_config);
+}
+
+void test_xdp_veth_redirect(void)
+{
+	if (test__start_subtest("0"))
+		xdp_veth_redirect(0);
+
+	if (test__start_subtest("DRV_MODE"))
+		xdp_veth_redirect(XDP_FLAGS_DRV_MODE);
+
+	if (test__start_subtest("SKB_MODE"))
+		xdp_veth_redirect(XDP_FLAGS_SKB_MODE);
+}
+
+void test_xdp_veth_broadcast_redirect(void)
+{
+	if (test__start_subtest("0/BROADCAST"))
+		xdp_veth_broadcast_redirect(0, BPF_F_BROADCAST);
+
+	if (test__start_subtest("0/(BROADCAST | EXCLUDE_INGRESS)"))
+		xdp_veth_broadcast_redirect(0, BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+
+	if (test__start_subtest("DRV_MODE/BROADCAST"))
+		xdp_veth_broadcast_redirect(XDP_FLAGS_DRV_MODE, BPF_F_BROADCAST);
+
+	if (test__start_subtest("DRV_MODE/(BROADCAST | EXCLUDE_INGRESS)"))
+		xdp_veth_broadcast_redirect(XDP_FLAGS_DRV_MODE,
+					    BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+
+	if (test__start_subtest("SKB_MODE/BROADCAST"))
+		xdp_veth_broadcast_redirect(XDP_FLAGS_SKB_MODE, BPF_F_BROADCAST);
+
+	if (test__start_subtest("SKB_MODE/(BROADCAST | EXCLUDE_INGRESS)"))
+		xdp_veth_broadcast_redirect(XDP_FLAGS_SKB_MODE,
+					    BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+void test_xdp_veth_egress(void)
+{
+	if (test__start_subtest("0/egress"))
+		xdp_veth_egress(0);
+
+	if (test__start_subtest("DRV_MODE/egress"))
+		xdp_veth_egress(XDP_FLAGS_DRV_MODE);
 
-	cleanup_network();
+	if (test__start_subtest("SKB_MODE/egress"))
+		xdp_veth_egress(XDP_FLAGS_SKB_MODE);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.c b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
new file mode 100644
index 000000000000..5af28f359cfd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.c
@@ -0,0 +1,2596 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/bpf.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+#include <linux/if_link.h>
+#include <linux/mman.h>
+#include <linux/netdev.h>
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#include "network_helpers.h"
+#include "test_xsk.h"
+#include "xsk_xdp_common.h"
+#include "xsk_xdp_progs.skel.h"
+
+#define DEFAULT_BATCH_SIZE		64
+#define MIN_PKT_SIZE			64
+#define MAX_ETH_JUMBO_SIZE		9000
+#define MAX_INTERFACES			2
+#define MAX_TEARDOWN_ITER		10
+#define MAX_TX_BUDGET_DEFAULT		32
+#define PKT_DUMP_NB_TO_PRINT		16
+/* Just to align the data in the packet */
+#define PKT_HDR_SIZE			(sizeof(struct ethhdr) + 2)
+#define POLL_TMOUT			1000
+#define THREAD_TMOUT			3
+#define UMEM_HEADROOM_TEST_SIZE		128
+#define XSK_DESC__INVALID_OPTION	(0xffff)
+#define XSK_UMEM__INVALID_FRAME_SIZE	(MAX_ETH_JUMBO_SIZE + 1)
+#define XSK_UMEM__LARGE_FRAME_SIZE	(3 * 1024)
+#define XSK_UMEM__MAX_FRAME_SIZE	(4 * 1024)
+
+static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00};
+
+bool opt_verbose;
+pthread_barrier_t barr;
+pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+int pkts_in_flight;
+
+/* The payload is a word consisting of a packet sequence number in the upper
+ * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's
+ * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0.
+ */
+static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size)
+{
+	u32 *ptr = (u32 *)dest, i;
+
+	start /= sizeof(*ptr);
+	size /= sizeof(*ptr);
+	for (i = 0; i < size; i++)
+		ptr[i] = htonl(pkt_nb << 16 | (i + start));
+}
+
+static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr)
+{
+	memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN);
+	memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN);
+	eth_hdr->h_proto = htons(ETH_P_LOOPBACK);
+}
+
+static bool is_umem_valid(struct ifobject *ifobj)
+{
+	return !!ifobj->umem->umem;
+}
+
+static u32 mode_to_xdp_flags(enum test_mode mode)
+{
+	return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
+}
+
+static u64 umem_size(struct xsk_umem_info *umem)
+{
+	return umem->num_frames * umem->frame_size;
+}
+
+int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer,
+			      u64 size)
+{
+	struct xsk_umem_config cfg = {
+		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+		.frame_size = umem->frame_size,
+		.frame_headroom = umem->frame_headroom,
+		.flags = XSK_UMEM__DEFAULT_FLAGS
+	};
+	int ret;
+
+	if (umem->fill_size)
+		cfg.fill_size = umem->fill_size;
+
+	if (umem->comp_size)
+		cfg.comp_size = umem->comp_size;
+
+	if (umem->unaligned_mode)
+		cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+
+	ret = xsk_umem__create(&umem->umem, buffer, size,
+			       &umem->fq, &umem->cq, &cfg);
+	if (ret)
+		return ret;
+
+	umem->buffer = buffer;
+	if (ifobj->shared_umem && ifobj->rx_on) {
+		umem->base_addr = umem_size(umem);
+		umem->next_buffer = umem_size(umem);
+	}
+
+	return 0;
+}
+
+static u64 umem_alloc_buffer(struct xsk_umem_info *umem)
+{
+	u64 addr;
+
+	addr = umem->next_buffer;
+	umem->next_buffer += umem->frame_size;
+	if (umem->next_buffer >= umem->base_addr + umem_size(umem))
+		umem->next_buffer = umem->base_addr;
+
+	return addr;
+}
+
+static void umem_reset_alloc(struct xsk_umem_info *umem)
+{
+	umem->next_buffer = 0;
+}
+
+static int enable_busy_poll(struct xsk_socket_info *xsk)
+{
+	int sock_opt;
+
+	sock_opt = 1;
+	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
+		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
+		return -errno;
+
+	sock_opt = 20;
+	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
+		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
+		return -errno;
+
+	sock_opt = xsk->batch_size;
+	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
+		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
+		return -errno;
+
+	return 0;
+}
+
+int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
+				  struct ifobject *ifobject, bool shared)
+{
+	struct xsk_socket_config cfg = {};
+	struct xsk_ring_cons *rxr;
+	struct xsk_ring_prod *txr;
+
+	xsk->umem = umem;
+	cfg.rx_size = xsk->rxqsize;
+	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+	cfg.bind_flags = ifobject->bind_flags;
+	if (shared)
+		cfg.bind_flags |= XDP_SHARED_UMEM;
+	if (ifobject->mtu > MAX_ETH_PKT_SIZE)
+		cfg.bind_flags |= XDP_USE_SG;
+	if (umem->comp_size)
+		cfg.tx_size = umem->comp_size;
+	if (umem->fill_size)
+		cfg.rx_size = umem->fill_size;
+
+	txr = ifobject->tx_on ? &xsk->tx : NULL;
+	rxr = ifobject->rx_on ? &xsk->rx : NULL;
+	return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg);
+}
+
+#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags"
+static unsigned int get_max_skb_frags(void)
+{
+	unsigned int max_skb_frags = 0;
+	FILE *file;
+
+	file = fopen(MAX_SKB_FRAGS_PATH, "r");
+	if (!file) {
+		ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH);
+		return 0;
+	}
+
+	if (fscanf(file, "%u", &max_skb_frags) != 1)
+		ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH);
+
+	fclose(file);
+	return max_skb_frags;
+}
+
+static int set_ring_size(struct ifobject *ifobj)
+{
+	int ret;
+	u32 ctr = 0;
+
+	while (ctr++ < SOCK_RECONF_CTR) {
+		ret = set_hw_ring_size(ifobj->ifname, &ifobj->ring);
+		if (!ret)
+			break;
+
+		/* Retry if it fails */
+		if (ctr >= SOCK_RECONF_CTR || errno != EBUSY)
+			return -errno;
+
+		usleep(USLEEP_MAX);
+	}
+
+	return ret;
+}
+
+int hw_ring_size_reset(struct ifobject *ifobj)
+{
+	ifobj->ring.tx_pending = ifobj->set_ring.default_tx;
+	ifobj->ring.rx_pending = ifobj->set_ring.default_rx;
+	return set_ring_size(ifobj);
+}
+
+static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
+			     struct ifobject *ifobj_rx)
+{
+	u32 i, j;
+
+	for (i = 0; i < MAX_INTERFACES; i++) {
+		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+		ifobj->xsk = &ifobj->xsk_arr[0];
+		ifobj->use_poll = false;
+		ifobj->use_fill_ring = true;
+		ifobj->release_rx = true;
+		ifobj->validation_func = NULL;
+		ifobj->use_metadata = false;
+
+		if (i == 0) {
+			ifobj->rx_on = false;
+			ifobj->tx_on = true;
+		} else {
+			ifobj->rx_on = true;
+			ifobj->tx_on = false;
+		}
+
+		memset(ifobj->umem, 0, sizeof(*ifobj->umem));
+		ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
+		ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+
+		for (j = 0; j < MAX_SOCKETS; j++) {
+			memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
+			ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+			ifobj->xsk_arr[j].batch_size = DEFAULT_BATCH_SIZE;
+			if (i == 0)
+				ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default;
+			else
+				ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default;
+
+			memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN);
+			memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN);
+			ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0);
+			ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1);
+		}
+	}
+
+	if (ifobj_tx->hw_ring_size_supp)
+		hw_ring_size_reset(ifobj_tx);
+
+	test->ifobj_tx = ifobj_tx;
+	test->ifobj_rx = ifobj_rx;
+	test->current_step = 0;
+	test->total_steps = 1;
+	test->nb_sockets = 1;
+	test->fail = false;
+	test->set_ring = false;
+	test->adjust_tail = false;
+	test->adjust_tail_support = false;
+	test->mtu = MAX_ETH_PKT_SIZE;
+	test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
+	test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
+	test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog;
+	test->xskmap_tx = ifobj_tx->xdp_progs->maps.xsk;
+}
+
+void test_init(struct test_spec *test, struct ifobject *ifobj_tx,
+			   struct ifobject *ifobj_rx, enum test_mode mode,
+			   const struct test_spec *test_to_run)
+{
+	struct pkt_stream *tx_pkt_stream;
+	struct pkt_stream *rx_pkt_stream;
+	u32 i;
+
+	tx_pkt_stream = test->tx_pkt_stream_default;
+	rx_pkt_stream = test->rx_pkt_stream_default;
+	memset(test, 0, sizeof(*test));
+	test->tx_pkt_stream_default = tx_pkt_stream;
+	test->rx_pkt_stream_default = rx_pkt_stream;
+
+	for (i = 0; i < MAX_INTERFACES; i++) {
+		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+		ifobj->bind_flags = XDP_USE_NEED_WAKEUP;
+		if (mode == TEST_MODE_ZC)
+			ifobj->bind_flags |= XDP_ZEROCOPY;
+		else
+			ifobj->bind_flags |= XDP_COPY;
+	}
+
+	memcpy(test->name, test_to_run->name, MAX_TEST_NAME_SIZE);
+	test->test_func = test_to_run->test_func;
+	test->mode = mode;
+	__test_spec_init(test, ifobj_tx, ifobj_rx);
+}
+
+static void test_spec_reset(struct test_spec *test)
+{
+	__test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
+}
+
+static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx,
+				   struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx,
+				   struct bpf_map *xskmap_tx)
+{
+	test->xdp_prog_rx = xdp_prog_rx;
+	test->xdp_prog_tx = xdp_prog_tx;
+	test->xskmap_rx = xskmap_rx;
+	test->xskmap_tx = xskmap_tx;
+}
+
+static int test_spec_set_mtu(struct test_spec *test, int mtu)
+{
+	int err;
+
+	if (test->ifobj_rx->mtu != mtu) {
+		err = xsk_set_mtu(test->ifobj_rx->ifindex, mtu);
+		if (err)
+			return err;
+		test->ifobj_rx->mtu = mtu;
+	}
+	if (test->ifobj_tx->mtu != mtu) {
+		err = xsk_set_mtu(test->ifobj_tx->ifindex, mtu);
+		if (err)
+			return err;
+		test->ifobj_tx->mtu = mtu;
+	}
+
+	return 0;
+}
+
+void pkt_stream_reset(struct pkt_stream *pkt_stream)
+{
+	if (pkt_stream) {
+		pkt_stream->current_pkt_nb = 0;
+		pkt_stream->nb_rx_pkts = 0;
+	}
+}
+
+static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream)
+{
+	if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts)
+		return NULL;
+
+	return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
+}
+
+static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent)
+{
+	while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) {
+		(*pkts_sent)++;
+		if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid)
+			return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
+		pkt_stream->current_pkt_nb++;
+	}
+	return NULL;
+}
+
+void pkt_stream_delete(struct pkt_stream *pkt_stream)
+{
+	free(pkt_stream->pkts);
+	free(pkt_stream);
+}
+
+void pkt_stream_restore_default(struct test_spec *test)
+{
+	struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream;
+	struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream;
+
+	if (tx_pkt_stream != test->tx_pkt_stream_default) {
+		pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream);
+		test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default;
+	}
+
+	if (rx_pkt_stream != test->rx_pkt_stream_default) {
+		pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream);
+		test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default;
+	}
+}
+
+static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
+{
+	struct pkt_stream *pkt_stream;
+
+	pkt_stream = calloc(1, sizeof(*pkt_stream));
+	if (!pkt_stream)
+		return NULL;
+
+	pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
+	if (!pkt_stream->pkts) {
+		free(pkt_stream);
+		return NULL;
+	}
+
+	pkt_stream->nb_pkts = nb_pkts;
+	return pkt_stream;
+}
+
+static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pkt *pkt)
+{
+	u32 nb_frags = 1, next_frag;
+
+	if (!pkt)
+		return 1;
+
+	if (!pkt_stream->verbatim) {
+		if (!pkt->valid || !pkt->len)
+			return 1;
+		return ceil_u32(pkt->len, frame_size);
+	}
+
+	/* Search for the end of the packet in verbatim mode */
+	if (!pkt_continues(pkt->options))
+		return nb_frags;
+
+	next_frag = pkt_stream->current_pkt_nb;
+	pkt++;
+	while (next_frag++ < pkt_stream->nb_pkts) {
+		nb_frags++;
+		if (!pkt_continues(pkt->options) || !pkt->valid)
+			break;
+		pkt++;
+	}
+	return nb_frags;
+}
+
+static bool set_pkt_valid(int offset, u32 len)
+{
+	return len <= MAX_ETH_JUMBO_SIZE;
+}
+
+static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
+{
+	pkt->offset = offset;
+	pkt->len = len;
+	pkt->valid = set_pkt_valid(offset, len);
+}
+
+static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
+{
+	bool prev_pkt_valid = pkt->valid;
+
+	pkt_set(pkt_stream, pkt, offset, len);
+	pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid;
+}
+
+static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
+{
+	return ceil_u32(len, umem->frame_size) * umem->frame_size;
+}
+
+static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off)
+{
+	struct pkt_stream *pkt_stream;
+	u32 i;
+
+	pkt_stream = __pkt_stream_alloc(nb_pkts);
+	if (!pkt_stream)
+		return NULL;
+
+	pkt_stream->nb_pkts = nb_pkts;
+	pkt_stream->max_pkt_len = pkt_len;
+	for (i = 0; i < nb_pkts; i++) {
+		struct pkt *pkt = &pkt_stream->pkts[i];
+
+		pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len);
+		pkt->pkt_nb = nb_start + i * nb_off;
+	}
+
+	return pkt_stream;
+}
+
+struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
+{
+	return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1);
+}
+
+static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
+{
+	return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
+}
+
+static int pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len)
+{
+	ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
+
+	if (!ifobj->xsk->pkt_stream)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static int pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+{
+	int ret;
+
+	ret = pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len);
+	if (ret)
+		return ret;
+
+	return pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len);
+}
+
+static int __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
+				      int offset)
+{
+	struct pkt_stream *pkt_stream;
+	u32 i;
+
+	pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream);
+	if (!pkt_stream)
+		return -ENOMEM;
+
+	for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2)
+		pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
+
+	ifobj->xsk->pkt_stream = pkt_stream;
+
+	return 0;
+}
+
+static int pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
+{
+	int ret = __pkt_stream_replace_half(test->ifobj_tx, pkt_len, offset);
+
+	if (ret)
+		return ret;
+
+	return __pkt_stream_replace_half(test->ifobj_rx, pkt_len, offset);
+}
+
+static int pkt_stream_receive_half(struct test_spec *test)
+{
+	struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream;
+	u32 i;
+
+	if (test->ifobj_rx->xsk->pkt_stream != test->rx_pkt_stream_default)
+		/* Packet stream has already been replaced so we have to release this one.
+		 * The newly created one will be freed by the restore_default() at the
+		 * end of the test
+		 */
+		pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream);
+
+	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts,
+							      pkt_stream->pkts[0].len);
+	if (!test->ifobj_rx->xsk->pkt_stream)
+		return -ENOMEM;
+
+	pkt_stream = test->ifobj_rx->xsk->pkt_stream;
+	for (i = 1; i < pkt_stream->nb_pkts; i += 2)
+		pkt_stream->pkts[i].valid = false;
+
+	pkt_stream->nb_valid_entries /= 2;
+
+	return 0;
+}
+
+static int pkt_stream_even_odd_sequence(struct test_spec *test)
+{
+	struct pkt_stream *pkt_stream;
+	u32 i;
+
+	for (i = 0; i < test->nb_sockets; i++) {
+		pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream;
+		pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
+						   pkt_stream->pkts[0].len, i, 2);
+		if (!pkt_stream)
+			return -ENOMEM;
+		test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream;
+
+		pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream;
+		pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
+						   pkt_stream->pkts[0].len, i, 2);
+		if (!pkt_stream)
+			return -ENOMEM;
+		test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream;
+	}
+
+	return 0;
+}
+
+static void release_even_odd_sequence(struct test_spec *test)
+{
+	struct pkt_stream *later_free_tx = test->ifobj_tx->xsk->pkt_stream;
+	struct pkt_stream *later_free_rx = test->ifobj_rx->xsk->pkt_stream;
+	int i;
+
+	for (i = 0; i < test->nb_sockets; i++) {
+		/* later_free_{rx/tx} will be freed by restore_default() */
+		if (test->ifobj_tx->xsk_arr[i].pkt_stream != later_free_tx)
+			pkt_stream_delete(test->ifobj_tx->xsk_arr[i].pkt_stream);
+		if (test->ifobj_rx->xsk_arr[i].pkt_stream != later_free_rx)
+			pkt_stream_delete(test->ifobj_rx->xsk_arr[i].pkt_stream);
+	}
+
+}
+
+static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem)
+{
+	if (!pkt->valid)
+		return pkt->offset;
+	return pkt->offset + umem_alloc_buffer(umem);
+}
+
+static void pkt_stream_cancel(struct pkt_stream *pkt_stream)
+{
+	pkt_stream->current_pkt_nb--;
+}
+
+static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len,
+			 u32 pkt_nb, u32 bytes_written)
+{
+	void *data = xsk_umem__get_data(umem->buffer, addr);
+
+	if (len < MIN_PKT_SIZE)
+		return;
+
+	if (!bytes_written) {
+		gen_eth_hdr(xsk, data);
+
+		len -= PKT_HDR_SIZE;
+		data += PKT_HDR_SIZE;
+	} else {
+		bytes_written -= PKT_HDR_SIZE;
+	}
+
+	write_payload(data, pkt_nb, bytes_written, len);
+}
+
+static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, struct pkt *frames,
+						       u32 nb_frames, bool verbatim)
+{
+	u32 i, len = 0, pkt_nb = 0, payload = 0;
+	struct pkt_stream *pkt_stream;
+
+	pkt_stream = __pkt_stream_alloc(nb_frames);
+	if (!pkt_stream)
+		return NULL;
+
+	for (i = 0; i < nb_frames; i++) {
+		struct pkt *pkt = &pkt_stream->pkts[pkt_nb];
+		struct pkt *frame = &frames[i];
+
+		pkt->offset = frame->offset;
+		if (verbatim) {
+			*pkt = *frame;
+			pkt->pkt_nb = payload;
+			if (!frame->valid || !pkt_continues(frame->options))
+				payload++;
+		} else {
+			if (frame->valid)
+				len += frame->len;
+			if (frame->valid && pkt_continues(frame->options))
+				continue;
+
+			pkt->pkt_nb = pkt_nb;
+			pkt->len = len;
+			pkt->valid = frame->valid;
+			pkt->options = 0;
+
+			len = 0;
+		}
+
+		print_verbose("offset: %d len: %u valid: %u options: %u pkt_nb: %u\n",
+			      pkt->offset, pkt->len, pkt->valid, pkt->options, pkt->pkt_nb);
+
+		if (pkt->valid && pkt->len > pkt_stream->max_pkt_len)
+			pkt_stream->max_pkt_len = pkt->len;
+
+		if (pkt->valid)
+			pkt_stream->nb_valid_entries++;
+
+		pkt_nb++;
+	}
+
+	pkt_stream->nb_pkts = pkt_nb;
+	pkt_stream->verbatim = verbatim;
+	return pkt_stream;
+}
+
+static int pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
+{
+	struct pkt_stream *pkt_stream;
+
+	pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true);
+	if (!pkt_stream)
+		return -ENOMEM;
+	test->ifobj_tx->xsk->pkt_stream = pkt_stream;
+
+	pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false);
+	if (!pkt_stream)
+		return -ENOMEM;
+	test->ifobj_rx->xsk->pkt_stream = pkt_stream;
+
+	return 0;
+}
+
+static void pkt_print_data(u32 *data, u32 cnt)
+{
+	u32 i;
+
+	for (i = 0; i < cnt; i++) {
+		u32 seqnum, pkt_nb;
+
+		seqnum = ntohl(*data) & 0xffff;
+		pkt_nb = ntohl(*data) >> 16;
+		ksft_print_msg("%u:%u ", pkt_nb, seqnum);
+		data++;
+	}
+}
+
+static void pkt_dump(void *pkt, u32 len, bool eth_header)
+{
+	struct ethhdr *ethhdr = pkt;
+	u32 i, *data;
+
+	if (eth_header) {
+		/*extract L2 frame */
+		ksft_print_msg("DEBUG>> L2: dst mac: ");
+		for (i = 0; i < ETH_ALEN; i++)
+			ksft_print_msg("%02X", ethhdr->h_dest[i]);
+
+		ksft_print_msg("\nDEBUG>> L2: src mac: ");
+		for (i = 0; i < ETH_ALEN; i++)
+			ksft_print_msg("%02X", ethhdr->h_source[i]);
+
+		data = pkt + PKT_HDR_SIZE;
+	} else {
+		data = pkt;
+	}
+
+	/*extract L5 frame */
+	ksft_print_msg("\nDEBUG>> L5: seqnum: ");
+	pkt_print_data(data, PKT_DUMP_NB_TO_PRINT);
+	ksft_print_msg("....");
+	if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) {
+		ksft_print_msg("\n.... ");
+		pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT,
+			       PKT_DUMP_NB_TO_PRINT);
+	}
+	ksft_print_msg("\n---------------------------------------\n");
+}
+
+static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr)
+{
+	u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom;
+	u32 offset = addr % umem->frame_size, expected_offset;
+	int pkt_offset = pkt->valid ? pkt->offset : 0;
+
+	if (!umem->unaligned_mode)
+		pkt_offset = 0;
+
+	expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
+
+	if (offset == expected_offset)
+		return true;
+
+	ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset);
+	return false;
+}
+
+static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
+{
+	void *data = xsk_umem__get_data(buffer, addr);
+	struct xdp_info *meta = data - sizeof(struct xdp_info);
+
+	if (meta->count != pkt->pkt_nb) {
+		ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n",
+			       __func__, pkt->pkt_nb,
+			       (unsigned long long)meta->count);
+		return false;
+	}
+
+	return true;
+}
+
+static int is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx, bool *supported)
+{
+	struct bpf_map *data_map;
+	int adjust_value = 0;
+	int key = 0;
+	int ret;
+
+	data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
+	if (!data_map || !bpf_map__is_internal(data_map)) {
+		ksft_print_msg("Error: could not find bss section of XDP program\n");
+		return -EINVAL;
+	}
+
+	ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value);
+	if (ret) {
+		ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret);
+		return ret;
+	}
+
+	/* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail
+	 * helper is not supported. Skip the adjust_tail test case in this scenario.
+	 */
+	*supported = adjust_value != -EOPNOTSUPP;
+
+	return 0;
+}
+
+static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb,
+			  u32 bytes_processed)
+{
+	u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum;
+	void *data = xsk_umem__get_data(umem->buffer, addr);
+
+	addr -= umem->base_addr;
+
+	if (addr >= umem->num_frames * umem->frame_size ||
+	    addr + len > umem->num_frames * umem->frame_size) {
+		ksft_print_msg("Frag invalid addr: %llx len: %u\n",
+			       (unsigned long long)addr, len);
+		return false;
+	}
+	if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
+		ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n",
+			       (unsigned long long)addr, len);
+		return false;
+	}
+
+	pkt_data = data;
+	if (!bytes_processed) {
+		pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data);
+		len -= PKT_HDR_SIZE;
+	} else {
+		bytes_processed -= PKT_HDR_SIZE;
+	}
+
+	expected_seqnum = bytes_processed / sizeof(*pkt_data);
+	seqnum = ntohl(*pkt_data) & 0xffff;
+	pkt_nb = ntohl(*pkt_data) >> 16;
+
+	if (expected_pkt_nb != pkt_nb) {
+		ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n",
+			       __func__, expected_pkt_nb, pkt_nb);
+		goto error;
+	}
+	if (expected_seqnum != seqnum) {
+		ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n",
+			       __func__, expected_seqnum, seqnum);
+		goto error;
+	}
+
+	words_to_end = len / sizeof(*pkt_data) - 1;
+	pkt_data += words_to_end;
+	seqnum = ntohl(*pkt_data) & 0xffff;
+	expected_seqnum += words_to_end;
+	if (expected_seqnum != seqnum) {
+		ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n",
+			       __func__, expected_seqnum, seqnum);
+		goto error;
+	}
+
+	return true;
+
+error:
+	pkt_dump(data, len, !bytes_processed);
+	return false;
+}
+
+static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
+{
+	if (pkt->len != len) {
+		ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n",
+			       __func__, pkt->len, len);
+		pkt_dump(xsk_umem__get_data(buffer, addr), len, true);
+		return false;
+	}
+
+	return true;
+}
+
+static u32 load_value(u32 *counter)
+{
+	return __atomic_load_n(counter, __ATOMIC_ACQUIRE);
+}
+
+static bool kick_tx_with_check(struct xsk_socket_info *xsk, int *ret)
+{
+	u32 max_budget = MAX_TX_BUDGET_DEFAULT;
+	u32 cons, ready_to_send;
+	int delta;
+
+	cons = load_value(xsk->tx.consumer);
+	ready_to_send = load_value(xsk->tx.producer) - cons;
+	*ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+
+	delta = load_value(xsk->tx.consumer) - cons;
+	/* By default, xsk should consume exact @max_budget descs at one
+	 * send in this case where hitting the max budget limit in while
+	 * loop is triggered in __xsk_generic_xmit(). Please make sure that
+	 * the number of descs to be sent is larger than @max_budget, or
+	 * else the tx.consumer will be updated in xskq_cons_peek_desc()
+	 * in time which hides the issue we try to verify.
+	 */
+	if (ready_to_send > max_budget && delta != max_budget)
+		return false;
+
+	return true;
+}
+
+int kick_tx(struct xsk_socket_info *xsk)
+{
+	int ret;
+
+	if (xsk->check_consumer) {
+		if (!kick_tx_with_check(xsk, &ret))
+			return TEST_FAILURE;
+	} else {
+		ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+	}
+	if (ret >= 0)
+		return TEST_PASS;
+	if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
+		usleep(100);
+		return TEST_PASS;
+	}
+	return TEST_FAILURE;
+}
+
+int kick_rx(struct xsk_socket_info *xsk)
+{
+	int ret;
+
+	ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
+	if (ret < 0)
+		return TEST_FAILURE;
+
+	return TEST_PASS;
+}
+
+static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
+{
+	unsigned int rcvd;
+	u32 idx;
+	int ret;
+
+	if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
+		ret = kick_tx(xsk);
+		if (ret)
+			return TEST_FAILURE;
+	}
+
+	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
+	if (rcvd) {
+		if (rcvd > xsk->outstanding_tx) {
+			u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
+
+			ksft_print_msg("[%s] Too many packets completed\n", __func__);
+			ksft_print_msg("Last completion address: %llx\n",
+				       (unsigned long long)addr);
+			return TEST_FAILURE;
+		}
+
+		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
+		xsk->outstanding_tx -= rcvd;
+	}
+
+	return TEST_PASS;
+}
+
+static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
+{
+	u32 frags_processed = 0, nb_frags = 0, pkt_len = 0;
+	u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0;
+	struct pkt_stream *pkt_stream = xsk->pkt_stream;
+	struct ifobject *ifobj = test->ifobj_rx;
+	struct xsk_umem_info *umem = xsk->umem;
+	struct pollfd fds = { };
+	struct pkt *pkt;
+	u64 first_addr = 0;
+	int ret;
+
+	fds.fd = xsk_socket__fd(xsk->xsk);
+	fds.events = POLLIN;
+
+	ret = kick_rx(xsk);
+	if (ret)
+		return TEST_FAILURE;
+
+	if (ifobj->use_poll) {
+		ret = poll(&fds, 1, POLL_TMOUT);
+		if (ret < 0)
+			return TEST_FAILURE;
+
+		if (!ret) {
+			if (!is_umem_valid(test->ifobj_tx))
+				return TEST_PASS;
+
+			ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
+			return TEST_CONTINUE;
+		}
+
+		if (!(fds.revents & POLLIN))
+			return TEST_CONTINUE;
+	}
+
+	rcvd = xsk_ring_cons__peek(&xsk->rx, xsk->batch_size, &idx_rx);
+	if (!rcvd)
+		return TEST_CONTINUE;
+
+	if (ifobj->use_fill_ring) {
+		ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+		while (ret != rcvd) {
+			if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
+				ret = poll(&fds, 1, POLL_TMOUT);
+				if (ret < 0)
+					return TEST_FAILURE;
+			}
+			ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+		}
+	}
+
+	while (frags_processed < rcvd) {
+		const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
+		u64 addr = desc->addr, orig;
+
+		orig = xsk_umem__extract_addr(addr);
+		addr = xsk_umem__add_offset_to_addr(addr);
+
+		if (!nb_frags) {
+			pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
+			if (!pkt) {
+				ksft_print_msg("[%s] received too many packets addr: %lx len %u\n",
+					       __func__, addr, desc->len);
+				return TEST_FAILURE;
+			}
+		}
+
+		print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n",
+			      addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid);
+
+		if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) ||
+		    !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata &&
+		    !is_metadata_correct(pkt, umem->buffer, addr)))
+			return TEST_FAILURE;
+
+		if (!nb_frags++)
+			first_addr = addr;
+		frags_processed++;
+		pkt_len += desc->len;
+		if (ifobj->use_fill_ring)
+			*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
+
+		if (pkt_continues(desc->options))
+			continue;
+
+		/* The complete packet has been received */
+		if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) ||
+		    !is_offset_correct(umem, pkt, addr))
+			return TEST_FAILURE;
+
+		pkt_stream->nb_rx_pkts++;
+		nb_frags = 0;
+		pkt_len = 0;
+	}
+
+	if (nb_frags) {
+		/* In the middle of a packet. Start over from beginning of packet. */
+		idx_rx -= nb_frags;
+		xsk_ring_cons__cancel(&xsk->rx, nb_frags);
+		if (ifobj->use_fill_ring) {
+			idx_fq -= nb_frags;
+			xsk_ring_prod__cancel(&umem->fq, nb_frags);
+		}
+		frags_processed -= nb_frags;
+	}
+
+	if (ifobj->use_fill_ring)
+		xsk_ring_prod__submit(&umem->fq, frags_processed);
+	if (ifobj->release_rx)
+		xsk_ring_cons__release(&xsk->rx, frags_processed);
+
+	pthread_mutex_lock(&pacing_mutex);
+	pkts_in_flight -= pkts_sent;
+	pthread_mutex_unlock(&pacing_mutex);
+	pkts_sent = 0;
+
+	return TEST_CONTINUE;
+}
+
+bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num,
+			  unsigned long *bitmap)
+{
+	struct pkt_stream *pkt_stream = xsk->pkt_stream;
+
+	if (!pkt_stream) {
+		__set_bit(sock_num, bitmap);
+		return false;
+	}
+
+	if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) {
+		__set_bit(sock_num, bitmap);
+		if (bitmap_full(bitmap, test->nb_sockets))
+			return true;
+	}
+
+	return false;
+}
+
+static int receive_pkts(struct test_spec *test)
+{
+	struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+	DECLARE_BITMAP(bitmap, test->nb_sockets);
+	struct xsk_socket_info *xsk;
+	u32 sock_num = 0;
+	int res, ret;
+
+	bitmap_zero(bitmap, test->nb_sockets);
+
+	ret = gettimeofday(&tv_now, NULL);
+	if (ret)
+		return TEST_FAILURE;
+
+	timeradd(&tv_now, &tv_timeout, &tv_end);
+
+	while (1) {
+		xsk = &test->ifobj_rx->xsk_arr[sock_num];
+
+		if ((all_packets_received(test, xsk, sock_num, bitmap)))
+			break;
+
+		res = __receive_pkts(test, xsk);
+		if (!(res == TEST_PASS || res == TEST_CONTINUE))
+			return res;
+
+		ret = gettimeofday(&tv_now, NULL);
+		if (ret)
+			return TEST_FAILURE;
+
+		if (timercmp(&tv_now, &tv_end, >)) {
+			ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
+			return TEST_FAILURE;
+		}
+		sock_num = (sock_num + 1) % test->nb_sockets;
+	}
+
+	return TEST_PASS;
+}
+
+static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout)
+{
+	u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len;
+	struct pkt_stream *pkt_stream = xsk->pkt_stream;
+	struct xsk_umem_info *umem = ifobject->umem;
+	bool use_poll = ifobject->use_poll;
+	struct pollfd fds = { };
+	int ret;
+
+	buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
+	/* pkts_in_flight might be negative if many invalid packets are sent */
+	if (pkts_in_flight >= (int)((umem_size(umem) - xsk->batch_size * buffer_len) /
+	    buffer_len)) {
+		ret = kick_tx(xsk);
+		if (ret)
+			return TEST_FAILURE;
+		return TEST_CONTINUE;
+	}
+
+	fds.fd = xsk_socket__fd(xsk->xsk);
+	fds.events = POLLOUT;
+
+	while (xsk_ring_prod__reserve(&xsk->tx, xsk->batch_size, &idx) < xsk->batch_size) {
+		if (use_poll) {
+			ret = poll(&fds, 1, POLL_TMOUT);
+			if (timeout) {
+				if (ret < 0) {
+					ksft_print_msg("ERROR: [%s] Poll error %d\n",
+						       __func__, errno);
+					return TEST_FAILURE;
+				}
+				if (ret == 0)
+					return TEST_PASS;
+				break;
+			}
+			if (ret <= 0) {
+				ksft_print_msg("ERROR: [%s] Poll error %d\n",
+					       __func__, errno);
+				return TEST_FAILURE;
+			}
+		}
+
+		complete_pkts(xsk, xsk->batch_size);
+	}
+
+	for (i = 0; i < xsk->batch_size; i++) {
+		struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
+		u32 nb_frags_left, nb_frags, bytes_written = 0;
+
+		if (!pkt)
+			break;
+
+		nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt);
+		if (nb_frags > xsk->batch_size - i) {
+			pkt_stream_cancel(pkt_stream);
+			xsk_ring_prod__cancel(&xsk->tx, xsk->batch_size - i);
+			break;
+		}
+		nb_frags_left = nb_frags;
+
+		while (nb_frags_left--) {
+			struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+
+			tx_desc->addr = pkt_get_addr(pkt, ifobject->umem);
+			if (pkt_stream->verbatim) {
+				tx_desc->len = pkt->len;
+				tx_desc->options = pkt->options;
+			} else if (nb_frags_left) {
+				tx_desc->len = umem->frame_size;
+				tx_desc->options = XDP_PKT_CONTD;
+			} else {
+				tx_desc->len = pkt->len - bytes_written;
+				tx_desc->options = 0;
+			}
+			if (pkt->valid)
+				pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb,
+					     bytes_written);
+			bytes_written += tx_desc->len;
+
+			print_verbose("Tx addr: %llx len: %u options: %u pkt_nb: %u\n",
+				      tx_desc->addr, tx_desc->len, tx_desc->options, pkt->pkt_nb);
+
+			if (nb_frags_left) {
+				i++;
+				if (pkt_stream->verbatim)
+					pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
+			}
+		}
+
+		if (pkt && pkt->valid) {
+			valid_pkts++;
+			valid_frags += nb_frags;
+		}
+	}
+
+	pthread_mutex_lock(&pacing_mutex);
+	pkts_in_flight += valid_pkts;
+	pthread_mutex_unlock(&pacing_mutex);
+
+	xsk_ring_prod__submit(&xsk->tx, i);
+	xsk->outstanding_tx += valid_frags;
+
+	if (use_poll) {
+		ret = poll(&fds, 1, POLL_TMOUT);
+		if (ret <= 0) {
+			if (ret == 0 && timeout)
+				return TEST_PASS;
+
+			ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret);
+			return TEST_FAILURE;
+		}
+	}
+
+	if (!timeout) {
+		if (complete_pkts(xsk, i))
+			return TEST_FAILURE;
+
+		usleep(10);
+		return TEST_PASS;
+	}
+
+	return TEST_CONTINUE;
+}
+
+static int wait_for_tx_completion(struct xsk_socket_info *xsk)
+{
+	struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+	int ret;
+
+	ret = gettimeofday(&tv_now, NULL);
+	if (ret)
+		return TEST_FAILURE;
+	timeradd(&tv_now, &tv_timeout, &tv_end);
+
+	while (xsk->outstanding_tx) {
+		ret = gettimeofday(&tv_now, NULL);
+		if (ret)
+			return TEST_FAILURE;
+		if (timercmp(&tv_now, &tv_end, >)) {
+			ksft_print_msg("ERROR: [%s] Transmission loop timed out\n", __func__);
+			return TEST_FAILURE;
+		}
+
+		complete_pkts(xsk, xsk->batch_size);
+	}
+
+	return TEST_PASS;
+}
+
+bool all_packets_sent(struct test_spec *test, unsigned long *bitmap)
+{
+	return bitmap_full(bitmap, test->nb_sockets);
+}
+
+static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
+{
+	bool timeout = !is_umem_valid(test->ifobj_rx);
+	DECLARE_BITMAP(bitmap, test->nb_sockets);
+	u32 i, ret;
+
+	bitmap_zero(bitmap, test->nb_sockets);
+
+	while (!(all_packets_sent(test, bitmap))) {
+		for (i = 0; i < test->nb_sockets; i++) {
+			struct pkt_stream *pkt_stream;
+
+			pkt_stream = ifobject->xsk_arr[i].pkt_stream;
+			if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) {
+				__set_bit(i, bitmap);
+				continue;
+			}
+			ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout);
+			if (ret == TEST_CONTINUE && !test->fail)
+				continue;
+
+			if ((ret || test->fail) && !timeout)
+				return TEST_FAILURE;
+
+			if (ret == TEST_PASS && timeout)
+				return ret;
+
+			ret = wait_for_tx_completion(&ifobject->xsk_arr[i]);
+			if (ret)
+				return TEST_FAILURE;
+		}
+	}
+
+	return TEST_PASS;
+}
+
+static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats)
+{
+	int fd = xsk_socket__fd(xsk), err;
+	socklen_t optlen, expected_len;
+
+	optlen = sizeof(*stats);
+	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen);
+	if (err) {
+		ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+			       __func__, -err, strerror(-err));
+		return TEST_FAILURE;
+	}
+
+	expected_len = sizeof(struct xdp_statistics);
+	if (optlen != expected_len) {
+		ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n",
+			       __func__, expected_len, optlen);
+		return TEST_FAILURE;
+	}
+
+	return TEST_PASS;
+}
+
+static int validate_rx_dropped(struct ifobject *ifobject)
+{
+	struct xsk_socket *xsk = ifobject->xsk->xsk;
+	struct xdp_statistics stats;
+	int err;
+
+	err = kick_rx(ifobject->xsk);
+	if (err)
+		return TEST_FAILURE;
+
+	err = get_xsk_stats(xsk, &stats);
+	if (err)
+		return TEST_FAILURE;
+
+	/* The receiver calls getsockopt after receiving the last (valid)
+	 * packet which is not the final packet sent in this test (valid and
+	 * invalid packets are sent in alternating fashion with the final
+	 * packet being invalid). Since the last packet may or may not have
+	 * been dropped already, both outcomes must be allowed.
+	 */
+	if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 ||
+	    stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1)
+		return TEST_PASS;
+
+	return TEST_FAILURE;
+}
+
+static int validate_rx_full(struct ifobject *ifobject)
+{
+	struct xsk_socket *xsk = ifobject->xsk->xsk;
+	struct xdp_statistics stats;
+	int err;
+
+	usleep(1000);
+	err = kick_rx(ifobject->xsk);
+	if (err)
+		return TEST_FAILURE;
+
+	err = get_xsk_stats(xsk, &stats);
+	if (err)
+		return TEST_FAILURE;
+
+	if (stats.rx_ring_full)
+		return TEST_PASS;
+
+	return TEST_FAILURE;
+}
+
+static int validate_fill_empty(struct ifobject *ifobject)
+{
+	struct xsk_socket *xsk = ifobject->xsk->xsk;
+	struct xdp_statistics stats;
+	int err;
+
+	usleep(1000);
+	err = kick_rx(ifobject->xsk);
+	if (err)
+		return TEST_FAILURE;
+
+	err = get_xsk_stats(xsk, &stats);
+	if (err)
+		return TEST_FAILURE;
+
+	if (stats.rx_fill_ring_empty_descs)
+		return TEST_PASS;
+
+	return TEST_FAILURE;
+}
+
+static int validate_tx_invalid_descs(struct ifobject *ifobject)
+{
+	struct xsk_socket *xsk = ifobject->xsk->xsk;
+	int fd = xsk_socket__fd(xsk);
+	struct xdp_statistics stats;
+	socklen_t optlen;
+	int err;
+
+	optlen = sizeof(stats);
+	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+	if (err) {
+		ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+			       __func__, -err, strerror(-err));
+		return TEST_FAILURE;
+	}
+
+	if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
+		ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n",
+			       __func__,
+			       (unsigned long long)stats.tx_invalid_descs,
+			       ifobject->xsk->pkt_stream->nb_pkts);
+		return TEST_FAILURE;
+	}
+
+	return TEST_PASS;
+}
+
+static int xsk_configure(struct test_spec *test, struct ifobject *ifobject,
+			  struct xsk_umem_info *umem, bool tx)
+{
+	int i, ret;
+
+	for (i = 0; i < test->nb_sockets; i++) {
+		bool shared = (ifobject->shared_umem && tx) ? true : !!i;
+		u32 ctr = 0;
+
+		while (ctr++ < SOCK_RECONF_CTR) {
+			ret = xsk_configure_socket(&ifobject->xsk_arr[i], umem,
+						     ifobject, shared);
+			if (!ret)
+				break;
+
+			/* Retry if it fails as xsk_socket__create() is asynchronous */
+			if (ctr >= SOCK_RECONF_CTR)
+				return ret;
+			usleep(USLEEP_MAX);
+		}
+		if (ifobject->busy_poll) {
+			ret = enable_busy_poll(&ifobject->xsk_arr[i]);
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobject)
+{
+	int ret = xsk_configure(test, ifobject, test->ifobj_rx->umem, true);
+
+	if (ret)
+		return ret;
+	ifobject->xsk = &ifobject->xsk_arr[0];
+	ifobject->xskmap = test->ifobj_rx->xskmap;
+	memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info));
+	ifobject->umem->base_addr = 0;
+
+	return 0;
+}
+
+static int xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream,
+				   bool fill_up)
+{
+	u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM;
+	u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts;
+	int ret;
+
+	if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+		buffers_to_fill = umem->num_frames;
+	else
+		buffers_to_fill = umem->fill_size;
+
+	ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
+	if (ret != buffers_to_fill)
+		return -ENOSPC;
+
+	while (filled < buffers_to_fill) {
+		struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts);
+		u64 addr;
+		u32 i;
+
+		for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt_stream, pkt); i++) {
+			if (!pkt) {
+				if (!fill_up)
+					break;
+				addr = filled * umem->frame_size + umem->base_addr;
+			} else if (pkt->offset >= 0) {
+				addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem);
+			} else {
+				addr = pkt->offset + umem_alloc_buffer(umem);
+			}
+
+			*xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
+			if (++filled >= buffers_to_fill)
+				break;
+		}
+	}
+	xsk_ring_prod__submit(&umem->fq, filled);
+	xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled);
+
+	pkt_stream_reset(pkt_stream);
+	umem_reset_alloc(umem);
+
+	return 0;
+}
+
+static int thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
+{
+	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+	int mmap_flags;
+	u64 umem_sz;
+	void *bufs;
+	int ret;
+	u32 i;
+
+	umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
+	mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+
+	if (ifobject->umem->unaligned_mode)
+		mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB;
+
+	if (ifobject->shared_umem)
+		umem_sz *= 2;
+
+	bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+	if (bufs == MAP_FAILED)
+		return -errno;
+
+	ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz);
+	if (ret)
+		return ret;
+
+	ret = xsk_configure(test, ifobject, ifobject->umem, false);
+	if (ret)
+		return ret;
+
+	ifobject->xsk = &ifobject->xsk_arr[0];
+
+	if (!ifobject->rx_on)
+		return 0;
+
+	ret = xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream,
+				     ifobject->use_fill_ring);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < test->nb_sockets; i++) {
+		ifobject->xsk = &ifobject->xsk_arr[i];
+		ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+void *worker_testapp_validate_tx(void *arg)
+{
+	struct test_spec *test = (struct test_spec *)arg;
+	struct ifobject *ifobject = test->ifobj_tx;
+	int err;
+
+	if (test->current_step == 1) {
+		if (!ifobject->shared_umem) {
+			if (thread_common_ops(test, ifobject)) {
+				test->fail = true;
+				pthread_exit(NULL);
+			}
+		} else {
+			if (thread_common_ops_tx(test, ifobject)) {
+				test->fail = true;
+				pthread_exit(NULL);
+			}
+		}
+	}
+
+	err = send_pkts(test, ifobject);
+
+	if (!err && ifobject->validation_func)
+		err = ifobject->validation_func(ifobject);
+	if (err)
+		test->fail = true;
+
+	pthread_exit(NULL);
+}
+
+void *worker_testapp_validate_rx(void *arg)
+{
+	struct test_spec *test = (struct test_spec *)arg;
+	struct ifobject *ifobject = test->ifobj_rx;
+	int err;
+
+	if (test->current_step == 1) {
+		err = thread_common_ops(test, ifobject);
+	} else {
+		xsk_clear_xskmap(ifobject->xskmap);
+		err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0);
+		if (err)
+			ksft_print_msg("Error: Failed to update xskmap, error %s\n",
+				       strerror(-err));
+	}
+
+	pthread_barrier_wait(&barr);
+
+	/* We leave only now in case of error to avoid getting stuck in the barrier */
+	if (err) {
+		test->fail = true;
+		pthread_exit(NULL);
+	}
+
+	err = receive_pkts(test);
+
+	if (!err && ifobject->validation_func)
+		err = ifobject->validation_func(ifobject);
+
+	if (err) {
+		if (!test->adjust_tail) {
+			test->fail = true;
+		} else {
+			bool supported;
+
+			if (is_adjust_tail_supported(ifobject->xdp_progs, &supported))
+				test->fail = true;
+			else if (!supported)
+				test->adjust_tail_support = false;
+			else
+				test->fail = true;
+		}
+	}
+
+	pthread_exit(NULL);
+}
+
+static void testapp_clean_xsk_umem(struct ifobject *ifobj)
+{
+	u64 umem_sz = ifobj->umem->num_frames * ifobj->umem->frame_size;
+
+	if (ifobj->shared_umem)
+		umem_sz *= 2;
+
+	umem_sz = ceil_u64(umem_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
+	xsk_umem__delete(ifobj->umem->umem);
+	munmap(ifobj->umem->buffer, umem_sz);
+}
+
+static void handler(int signum)
+{
+	pthread_exit(NULL);
+}
+
+static bool xdp_prog_changed_rx(struct test_spec *test)
+{
+	struct ifobject *ifobj = test->ifobj_rx;
+
+	return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode;
+}
+
+static bool xdp_prog_changed_tx(struct test_spec *test)
+{
+	struct ifobject *ifobj = test->ifobj_tx;
+
+	return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode;
+}
+
+static int xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog,
+			     struct bpf_map *xskmap, enum test_mode mode)
+{
+	int err;
+
+	xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode));
+	err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode));
+	if (err) {
+		ksft_print_msg("Error attaching XDP program\n");
+		return err;
+	}
+
+	if (ifobj->mode != mode && (mode == TEST_MODE_DRV || mode == TEST_MODE_ZC))
+		if (!xsk_is_in_mode(ifobj->ifindex, XDP_FLAGS_DRV_MODE)) {
+			ksft_print_msg("ERROR: XDP prog not in DRV mode\n");
+			return -EINVAL;
+		}
+
+	ifobj->xdp_prog = xdp_prog;
+	ifobj->xskmap = xskmap;
+	ifobj->mode = mode;
+
+	return 0;
+}
+
+static int xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx,
+				 struct ifobject *ifobj_tx)
+{
+	int err = 0;
+
+	if (xdp_prog_changed_rx(test)) {
+		err = xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode);
+		if (err)
+			return err;
+	}
+
+	if (!ifobj_tx || ifobj_tx->shared_umem)
+		return 0;
+
+	if (xdp_prog_changed_tx(test))
+		err = xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode);
+
+	return err;
+}
+
+static void clean_sockets(struct test_spec *test, struct ifobject *ifobj)
+{
+	u32 i;
+
+	if (!ifobj || !test)
+		return;
+
+	for (i = 0; i < test->nb_sockets; i++)
+		xsk_socket__delete(ifobj->xsk_arr[i].xsk);
+}
+
+static void clean_umem(struct test_spec *test, struct ifobject *ifobj1, struct ifobject *ifobj2)
+{
+	if (!ifobj1)
+		return;
+
+	testapp_clean_xsk_umem(ifobj1);
+	if (ifobj2 && !ifobj2->shared_umem)
+		testapp_clean_xsk_umem(ifobj2);
+}
+
+static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *ifobj1,
+				      struct ifobject *ifobj2)
+{
+	pthread_t t0, t1;
+	int err;
+
+	if (test->mtu > MAX_ETH_PKT_SIZE) {
+		if (test->mode == TEST_MODE_ZC && (!ifobj1->multi_buff_zc_supp ||
+						   (ifobj2 && !ifobj2->multi_buff_zc_supp))) {
+			ksft_print_msg("Multi buffer for zero-copy not supported.\n");
+			return TEST_SKIP;
+		}
+		if (test->mode != TEST_MODE_ZC && (!ifobj1->multi_buff_supp ||
+						   (ifobj2 && !ifobj2->multi_buff_supp))) {
+			ksft_print_msg("Multi buffer not supported.\n");
+			return TEST_SKIP;
+		}
+	}
+	err = test_spec_set_mtu(test, test->mtu);
+	if (err) {
+		ksft_print_msg("Error, could not set mtu.\n");
+		return TEST_FAILURE;
+	}
+
+	if (ifobj2) {
+		if (pthread_barrier_init(&barr, NULL, 2))
+			return TEST_FAILURE;
+		pkt_stream_reset(ifobj2->xsk->pkt_stream);
+	}
+
+	test->current_step++;
+	pkt_stream_reset(ifobj1->xsk->pkt_stream);
+	pkts_in_flight = 0;
+
+	signal(SIGUSR1, handler);
+	/*Spawn RX thread */
+	pthread_create(&t0, NULL, ifobj1->func_ptr, test);
+
+	if (ifobj2) {
+		pthread_barrier_wait(&barr);
+		if (pthread_barrier_destroy(&barr)) {
+			pthread_kill(t0, SIGUSR1);
+			clean_sockets(test, ifobj1);
+			clean_umem(test, ifobj1, NULL);
+			return TEST_FAILURE;
+		}
+
+		/*Spawn TX thread */
+		pthread_create(&t1, NULL, ifobj2->func_ptr, test);
+
+		pthread_join(t1, NULL);
+	}
+
+	if (!ifobj2)
+		pthread_kill(t0, SIGUSR1);
+	else
+		pthread_join(t0, NULL);
+
+	if (test->total_steps == test->current_step || test->fail) {
+		clean_sockets(test, ifobj1);
+		clean_sockets(test, ifobj2);
+		clean_umem(test, ifobj1, ifobj2);
+	}
+
+	if (test->fail)
+		return TEST_FAILURE;
+
+	return TEST_PASS;
+}
+
+static int testapp_validate_traffic(struct test_spec *test)
+{
+	struct ifobject *ifobj_rx = test->ifobj_rx;
+	struct ifobject *ifobj_tx = test->ifobj_tx;
+
+	if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) ||
+	    (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) {
+		ksft_print_msg("No huge pages present.\n");
+		return TEST_SKIP;
+	}
+
+	if (test->set_ring) {
+		if (ifobj_tx->hw_ring_size_supp) {
+			if (set_ring_size(ifobj_tx)) {
+				ksft_print_msg("Failed to change HW ring size.\n");
+				return TEST_FAILURE;
+			}
+		} else {
+			ksft_print_msg("Changing HW ring size not supported.\n");
+			return TEST_SKIP;
+		}
+	}
+
+	if (xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx))
+		return TEST_FAILURE;
+	return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx);
+}
+
+static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj)
+{
+	return __testapp_validate_traffic(test, ifobj, NULL);
+}
+
+int testapp_teardown(struct test_spec *test)
+{
+	int i;
+
+	for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
+		if (testapp_validate_traffic(test))
+			return TEST_FAILURE;
+		test_spec_reset(test);
+	}
+
+	return TEST_PASS;
+}
+
+static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
+{
+	thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr;
+	struct ifobject *tmp_ifobj = (*ifobj1);
+
+	(*ifobj1)->func_ptr = (*ifobj2)->func_ptr;
+	(*ifobj2)->func_ptr = tmp_func_ptr;
+
+	*ifobj1 = *ifobj2;
+	*ifobj2 = tmp_ifobj;
+}
+
+int testapp_bidirectional(struct test_spec *test)
+{
+	int res;
+
+	test->ifobj_tx->rx_on = true;
+	test->ifobj_rx->tx_on = true;
+	test->total_steps = 2;
+	if (testapp_validate_traffic(test))
+		return TEST_FAILURE;
+
+	print_verbose("Switching Tx/Rx direction\n");
+	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+	res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
+
+	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+	return res;
+}
+
+static int swap_xsk_resources(struct test_spec *test)
+{
+	int ret;
+
+	test->ifobj_tx->xsk_arr[0].pkt_stream = NULL;
+	test->ifobj_rx->xsk_arr[0].pkt_stream = NULL;
+	test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default;
+	test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default;
+	test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1];
+	test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1];
+
+	ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0);
+	if (ret)
+		return TEST_FAILURE;
+
+	return TEST_PASS;
+}
+
+int testapp_xdp_prog_cleanup(struct test_spec *test)
+{
+	test->total_steps = 2;
+	test->nb_sockets = 2;
+	if (testapp_validate_traffic(test))
+		return TEST_FAILURE;
+
+	if (swap_xsk_resources(test)) {
+		clean_sockets(test, test->ifobj_rx);
+		clean_sockets(test, test->ifobj_tx);
+		clean_umem(test, test->ifobj_rx, test->ifobj_tx);
+		return TEST_FAILURE;
+	}
+
+	return testapp_validate_traffic(test);
+}
+
+int testapp_headroom(struct test_spec *test)
+{
+	test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_stats_rx_dropped(struct test_spec *test)
+{
+	if (test->mode == TEST_MODE_ZC) {
+		ksft_print_msg("Can not run RX_DROPPED test for ZC mode\n");
+		return TEST_SKIP;
+	}
+
+	if (pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0))
+		return TEST_FAILURE;
+	test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
+		XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
+	if (pkt_stream_receive_half(test))
+		return TEST_FAILURE;
+	test->ifobj_rx->validation_func = validate_rx_dropped;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_stats_tx_invalid_descs(struct test_spec *test)
+{
+	if (pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0))
+		return TEST_FAILURE;
+	test->ifobj_tx->validation_func = validate_tx_invalid_descs;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_stats_rx_full(struct test_spec *test)
+{
+	if (pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE))
+		return TEST_FAILURE;
+	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+
+	test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
+	test->ifobj_rx->release_rx = false;
+	test->ifobj_rx->validation_func = validate_rx_full;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_stats_fill_empty(struct test_spec *test)
+{
+	if (pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE))
+		return TEST_FAILURE;
+	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+
+	test->ifobj_rx->use_fill_ring = false;
+	test->ifobj_rx->validation_func = validate_fill_empty;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_unaligned(struct test_spec *test)
+{
+	test->ifobj_tx->umem->unaligned_mode = true;
+	test->ifobj_rx->umem->unaligned_mode = true;
+	/* Let half of the packets straddle a 4K buffer boundary */
+	if (pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2))
+		return TEST_FAILURE;
+
+	return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_unaligned_mb(struct test_spec *test)
+{
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+	test->ifobj_tx->umem->unaligned_mode = true;
+	test->ifobj_rx->umem->unaligned_mode = true;
+	if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE))
+		return TEST_FAILURE;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_single_pkt(struct test_spec *test)
+{
+	struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}};
+
+	if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)))
+		return TEST_FAILURE;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_mb(struct test_spec *test)
+{
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+	if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE))
+		return TEST_FAILURE;
+
+	return testapp_validate_traffic(test);
+}
+
+int testapp_invalid_desc_mb(struct test_spec *test)
+{
+	struct xsk_umem_info *umem = test->ifobj_tx->umem;
+	u64 umem_size = umem->num_frames * umem->frame_size;
+	struct pkt pkts[] = {
+		/* Valid packet for synch to start with */
+		{0, MIN_PKT_SIZE, 0, true, 0},
+		/* Zero frame len is not legal */
+		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+		{0, 0, 0, false, 0},
+		/* Invalid address in the second frame */
+		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+		{umem_size, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+		/* Invalid len in the middle */
+		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+		{0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+		/* Invalid options in the middle */
+		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XSK_DESC__INVALID_OPTION},
+		/* Transmit 2 frags, receive 3 */
+		{0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, XDP_PKT_CONTD},
+		{0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, 0},
+		/* Middle frame crosses chunk boundary with small length */
+		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+		{-MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false, 0},
+		/* Valid packet for synch so that something is received */
+		{0, MIN_PKT_SIZE, 0, true, 0}};
+
+	if (umem->unaligned_mode) {
+		/* Crossing a chunk boundary allowed */
+		pkts[12].valid = true;
+		pkts[13].valid = true;
+	}
+
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+	if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)))
+		return TEST_FAILURE;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_invalid_desc(struct test_spec *test)
+{
+	struct xsk_umem_info *umem = test->ifobj_tx->umem;
+	u64 umem_size = umem->num_frames * umem->frame_size;
+	struct pkt pkts[] = {
+		/* Zero packet address allowed */
+		{0, MIN_PKT_SIZE, 0, true},
+		/* Allowed packet */
+		{0, MIN_PKT_SIZE, 0, true},
+		/* Straddling the start of umem */
+		{-2, MIN_PKT_SIZE, 0, false},
+		/* Packet too large */
+		{0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
+		/* Up to end of umem allowed */
+		{umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true},
+		/* After umem ends */
+		{umem_size, MIN_PKT_SIZE, 0, false},
+		/* Straddle the end of umem */
+		{umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
+		/* Straddle a 4K boundary */
+		{0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
+		/* Straddle a 2K boundary */
+		{0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true},
+		/* Valid packet for synch so that something is received */
+		{0, MIN_PKT_SIZE, 0, true}};
+
+	if (umem->unaligned_mode) {
+		/* Crossing a page boundary allowed */
+		pkts[7].valid = true;
+	}
+	if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
+		/* Crossing a 2K frame size boundary not allowed */
+		pkts[8].valid = false;
+	}
+
+	if (test->ifobj_tx->shared_umem) {
+		pkts[4].offset += umem_size;
+		pkts[5].offset += umem_size;
+		pkts[6].offset += umem_size;
+	}
+
+	if (pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts)))
+		return TEST_FAILURE;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_xdp_drop(struct test_spec *test)
+{
+	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop,
+			       skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+	if (pkt_stream_receive_half(test))
+		return TEST_FAILURE;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_xdp_metadata_copy(struct test_spec *test)
+{
+	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata,
+			       skel_tx->progs.xsk_xdp_populate_metadata,
+			       skel_rx->maps.xsk, skel_tx->maps.xsk);
+	test->ifobj_rx->use_metadata = true;
+
+	skel_rx->bss->count = 0;
+
+	return testapp_validate_traffic(test);
+}
+
+int testapp_xdp_shared_umem(struct test_spec *test)
+{
+	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+	int ret;
+
+	test->total_steps = 1;
+	test->nb_sockets = 2;
+
+	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem,
+			       skel_tx->progs.xsk_xdp_shared_umem,
+			       skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+	if (pkt_stream_even_odd_sequence(test))
+		return TEST_FAILURE;
+
+	ret = testapp_validate_traffic(test);
+
+	release_even_odd_sequence(test);
+
+	return ret;
+}
+
+int testapp_poll_txq_tmout(struct test_spec *test)
+{
+	test->ifobj_tx->use_poll = true;
+	/* create invalid frame by set umem frame_size and pkt length equal to 2048 */
+	test->ifobj_tx->umem->frame_size = 2048;
+	if (pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048))
+		return TEST_FAILURE;
+	return testapp_validate_traffic_single_thread(test, test->ifobj_tx);
+}
+
+int testapp_poll_rxq_tmout(struct test_spec *test)
+{
+	test->ifobj_rx->use_poll = true;
+	return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
+}
+
+int testapp_too_many_frags(struct test_spec *test)
+{
+	struct pkt *pkts;
+	u32 max_frags, i;
+	int ret = TEST_FAILURE;
+
+	if (test->mode == TEST_MODE_ZC) {
+		max_frags = test->ifobj_tx->xdp_zc_max_segs;
+	} else {
+		max_frags = get_max_skb_frags();
+		if (!max_frags) {
+			ksft_print_msg("Can't get MAX_SKB_FRAGS from system, using default (17)\n");
+			max_frags = 17;
+		}
+		max_frags += 1;
+	}
+
+	pkts = calloc(2 * max_frags + 2, sizeof(struct pkt));
+	if (!pkts)
+		return TEST_FAILURE;
+
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+
+	/* Valid packet for synch */
+	pkts[0].len = MIN_PKT_SIZE;
+	pkts[0].valid = true;
+
+	/* One valid packet with the max amount of frags */
+	for (i = 1; i < max_frags + 1; i++) {
+		pkts[i].len = MIN_PKT_SIZE;
+		pkts[i].options = XDP_PKT_CONTD;
+		pkts[i].valid = true;
+	}
+	pkts[max_frags].options = 0;
+
+	/* An invalid packet with the max amount of frags but signals packet
+	 * continues on the last frag
+	 */
+	for (i = max_frags + 1; i < 2 * max_frags + 1; i++) {
+		pkts[i].len = MIN_PKT_SIZE;
+		pkts[i].options = XDP_PKT_CONTD;
+		pkts[i].valid = false;
+	}
+
+	/* Valid packet for synch */
+	pkts[2 * max_frags + 1].len = MIN_PKT_SIZE;
+	pkts[2 * max_frags + 1].valid = true;
+
+	if (pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2)) {
+		free(pkts);
+		return TEST_FAILURE;
+	}
+
+	ret = testapp_validate_traffic(test);
+	free(pkts);
+	return ret;
+}
+
+static int xsk_load_xdp_programs(struct ifobject *ifobj)
+{
+	ifobj->xdp_progs = xsk_xdp_progs__open_and_load();
+	if (libbpf_get_error(ifobj->xdp_progs))
+		return libbpf_get_error(ifobj->xdp_progs);
+
+	return 0;
+}
+
+/* Simple test */
+static bool hugepages_present(void)
+{
+	size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE;
+	void *bufs;
+
+	bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+		    MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB);
+	if (bufs == MAP_FAILED)
+		return false;
+
+	mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
+	munmap(bufs, mmap_sz);
+	return true;
+}
+
+int init_iface(struct ifobject *ifobj, thread_func_t func_ptr)
+{
+	LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
+	int err;
+
+	ifobj->func_ptr = func_ptr;
+
+	err = xsk_load_xdp_programs(ifobj);
+	if (err) {
+		ksft_print_msg("Error loading XDP program\n");
+		return err;
+	}
+
+	if (hugepages_present())
+		ifobj->unaligned_supp = true;
+
+	err = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &query_opts);
+	if (err) {
+		ksft_print_msg("Error querying XDP capabilities\n");
+		return err;
+	}
+	if (query_opts.feature_flags & NETDEV_XDP_ACT_RX_SG)
+		ifobj->multi_buff_supp = true;
+	if (query_opts.feature_flags & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
+		if (query_opts.xdp_zc_max_segs > 1) {
+			ifobj->multi_buff_zc_supp = true;
+			ifobj->xdp_zc_max_segs = query_opts.xdp_zc_max_segs;
+		} else {
+			ifobj->xdp_zc_max_segs = 0;
+		}
+	}
+
+	return 0;
+}
+
+int testapp_send_receive(struct test_spec *test)
+{
+	return testapp_validate_traffic(test);
+}
+
+int testapp_send_receive_2k_frame(struct test_spec *test)
+{
+	test->ifobj_tx->umem->frame_size = 2048;
+	test->ifobj_rx->umem->frame_size = 2048;
+	if (pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE))
+		return TEST_FAILURE;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_poll_rx(struct test_spec *test)
+{
+	test->ifobj_rx->use_poll = true;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_poll_tx(struct test_spec *test)
+{
+	test->ifobj_tx->use_poll = true;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_aligned_inv_desc(struct test_spec *test)
+{
+	return testapp_invalid_desc(test);
+}
+
+int testapp_aligned_inv_desc_2k_frame(struct test_spec *test)
+{
+	test->ifobj_tx->umem->frame_size = 2048;
+	test->ifobj_rx->umem->frame_size = 2048;
+	return testapp_invalid_desc(test);
+}
+
+int testapp_unaligned_inv_desc(struct test_spec *test)
+{
+	test->ifobj_tx->umem->unaligned_mode = true;
+	test->ifobj_rx->umem->unaligned_mode = true;
+	return testapp_invalid_desc(test);
+}
+
+int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test)
+{
+	u64 page_size, umem_size;
+
+	/* Odd frame size so the UMEM doesn't end near a page boundary. */
+	test->ifobj_tx->umem->frame_size = 4001;
+	test->ifobj_rx->umem->frame_size = 4001;
+	test->ifobj_tx->umem->unaligned_mode = true;
+	test->ifobj_rx->umem->unaligned_mode = true;
+	/* This test exists to test descriptors that staddle the end of
+	 * the UMEM but not a page.
+	 */
+	page_size = sysconf(_SC_PAGESIZE);
+	umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
+	assert(umem_size % page_size > MIN_PKT_SIZE);
+	assert(umem_size % page_size < page_size - MIN_PKT_SIZE);
+
+	return testapp_invalid_desc(test);
+}
+
+int testapp_aligned_inv_desc_mb(struct test_spec *test)
+{
+	return testapp_invalid_desc_mb(test);
+}
+
+int testapp_unaligned_inv_desc_mb(struct test_spec *test)
+{
+	test->ifobj_tx->umem->unaligned_mode = true;
+	test->ifobj_rx->umem->unaligned_mode = true;
+	return testapp_invalid_desc_mb(test);
+}
+
+int testapp_xdp_metadata(struct test_spec *test)
+{
+	return testapp_xdp_metadata_copy(test);
+}
+
+int testapp_xdp_metadata_mb(struct test_spec *test)
+{
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+	return testapp_xdp_metadata_copy(test);
+}
+
+int testapp_hw_sw_min_ring_size(struct test_spec *test)
+{
+	int ret;
+
+	test->set_ring = true;
+	test->total_steps = 2;
+	test->ifobj_tx->ring.tx_pending = DEFAULT_BATCH_SIZE;
+	test->ifobj_tx->ring.rx_pending = DEFAULT_BATCH_SIZE * 2;
+	test->ifobj_tx->xsk->batch_size = 1;
+	test->ifobj_rx->xsk->batch_size = 1;
+	ret = testapp_validate_traffic(test);
+	if (ret)
+		return ret;
+
+	/* Set batch size to hw_ring_size - 1 */
+	test->ifobj_tx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
+	test->ifobj_rx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
+	return testapp_validate_traffic(test);
+}
+
+int testapp_hw_sw_max_ring_size(struct test_spec *test)
+{
+	u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 4;
+	int ret;
+
+	test->set_ring = true;
+	test->total_steps = 2;
+	test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending;
+	test->ifobj_tx->ring.rx_pending  = test->ifobj_tx->ring.rx_max_pending;
+	test->ifobj_rx->umem->num_frames = max_descs;
+	test->ifobj_rx->umem->fill_size = max_descs;
+	test->ifobj_rx->umem->comp_size = max_descs;
+	test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+	test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+
+	ret = testapp_validate_traffic(test);
+	if (ret)
+		return ret;
+
+	/* Set batch_size to 8152 for testing, as the ice HW ignores the 3 lowest bits when
+	 * updating the Rx HW tail register.
+	 */
+	test->ifobj_tx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
+	test->ifobj_rx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
+	if (pkt_stream_replace(test, max_descs, MIN_PKT_SIZE)) {
+		clean_sockets(test, test->ifobj_tx);
+		clean_sockets(test, test->ifobj_rx);
+		clean_umem(test, test->ifobj_rx, test->ifobj_tx);
+		return TEST_FAILURE;
+	}
+
+	return testapp_validate_traffic(test);
+}
+
+static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value)
+{
+	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail,
+			       skel_tx->progs.xsk_xdp_adjust_tail,
+			       skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+	skel_rx->bss->adjust_value = adjust_value;
+
+	return testapp_validate_traffic(test);
+}
+
+static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len)
+{
+	int ret;
+
+	test->adjust_tail_support = true;
+	test->adjust_tail = true;
+	test->total_steps = 1;
+
+	ret = pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len);
+	if (ret)
+		return TEST_FAILURE;
+
+	ret = pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value);
+	if (ret)
+		return TEST_FAILURE;
+
+	ret = testapp_xdp_adjust_tail(test, value);
+	if (ret)
+		return ret;
+
+	if (!test->adjust_tail_support) {
+		ksft_print_msg("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n",
+				      mode_string(test), busy_poll_string(test));
+		return TEST_SKIP;
+	}
+
+	return 0;
+}
+
+int testapp_adjust_tail_shrink(struct test_spec *test)
+{
+	/* Shrink by 4 bytes for testing purpose */
+	return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2);
+}
+
+int testapp_adjust_tail_shrink_mb(struct test_spec *test)
+{
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+	/* Shrink by the frag size */
+	return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
+int testapp_adjust_tail_grow(struct test_spec *test)
+{
+	/* Grow by 4 bytes for testing purpose */
+	return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2);
+}
+
+int testapp_adjust_tail_grow_mb(struct test_spec *test)
+{
+	test->mtu = MAX_ETH_JUMBO_SIZE;
+	/* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */
+	return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1,
+				   XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
+int testapp_tx_queue_consumer(struct test_spec *test)
+{
+	int nr_packets;
+
+	if (test->mode == TEST_MODE_ZC) {
+		ksft_print_msg("Can not run TX_QUEUE_CONSUMER test for ZC mode\n");
+		return TEST_SKIP;
+	}
+
+	nr_packets = MAX_TX_BUDGET_DEFAULT + 1;
+	if (pkt_stream_replace(test, nr_packets, MIN_PKT_SIZE))
+		return TEST_FAILURE;
+	test->ifobj_tx->xsk->batch_size = nr_packets;
+	test->ifobj_tx->xsk->check_consumer = true;
+
+	return testapp_validate_traffic(test);
+}
+
+struct ifobject *ifobject_create(void)
+{
+	struct ifobject *ifobj;
+
+	ifobj = calloc(1, sizeof(struct ifobject));
+	if (!ifobj)
+		return NULL;
+
+	ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr));
+	if (!ifobj->xsk_arr)
+		goto out_xsk_arr;
+
+	ifobj->umem = calloc(1, sizeof(*ifobj->umem));
+	if (!ifobj->umem)
+		goto out_umem;
+
+	return ifobj;
+
+out_umem:
+	free(ifobj->xsk_arr);
+out_xsk_arr:
+	free(ifobj);
+	return NULL;
+}
+
+void ifobject_delete(struct ifobject *ifobj)
+{
+	free(ifobj->umem);
+	free(ifobj->xsk_arr);
+	free(ifobj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_xsk.h b/tools/testing/selftests/bpf/prog_tests/test_xsk.h
new file mode 100644
index 000000000000..8fc78a057de0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_xsk.h
@@ -0,0 +1,298 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef TEST_XSK_H_
+#define TEST_XSK_H_
+
+#include <linux/ethtool.h>
+#include <linux/if_xdp.h>
+
+#include "../kselftest.h"
+#include "xsk.h"
+
+#ifndef SO_PREFER_BUSY_POLL
+#define SO_PREFER_BUSY_POLL 69
+#endif
+
+#ifndef SO_BUSY_POLL_BUDGET
+#define SO_BUSY_POLL_BUDGET 70
+#endif
+
+#define TEST_PASS 0
+#define TEST_FAILURE -1
+#define TEST_CONTINUE 1
+#define TEST_SKIP 2
+
+#define DEFAULT_PKT_CNT			(4 * 1024)
+#define DEFAULT_UMEM_BUFFERS		(DEFAULT_PKT_CNT / 4)
+#define HUGEPAGE_SIZE			(2 * 1024 * 1024)
+#define MIN_PKT_SIZE			64
+#define MAX_ETH_PKT_SIZE		1518
+#define MAX_INTERFACE_NAME_CHARS	16
+#define MAX_TEST_NAME_SIZE		48
+#define SOCK_RECONF_CTR			10
+#define USLEEP_MAX			10000
+
+extern bool opt_verbose;
+#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
+
+
+static inline u32 ceil_u32(u32 a, u32 b)
+{
+	return (a + b - 1) / b;
+}
+
+static inline u64 ceil_u64(u64 a, u64 b)
+{
+	return (a + b - 1) / b;
+}
+
+/* Simple test */
+enum test_mode {
+	TEST_MODE_SKB,
+	TEST_MODE_DRV,
+	TEST_MODE_ZC,
+	TEST_MODE_ALL
+};
+
+struct ifobject;
+struct test_spec;
+typedef int (*validation_func_t)(struct ifobject *ifobj);
+typedef void *(*thread_func_t)(void *arg);
+typedef int (*test_func_t)(struct test_spec *test);
+
+struct xsk_socket_info {
+	struct xsk_ring_cons rx;
+	struct xsk_ring_prod tx;
+	struct xsk_umem_info *umem;
+	struct xsk_socket *xsk;
+	struct pkt_stream *pkt_stream;
+	u32 outstanding_tx;
+	u32 rxqsize;
+	u32 batch_size;
+	u8 dst_mac[ETH_ALEN];
+	u8 src_mac[ETH_ALEN];
+	bool check_consumer;
+};
+
+int kick_rx(struct xsk_socket_info *xsk);
+int kick_tx(struct xsk_socket_info *xsk);
+
+struct xsk_umem_info {
+	struct xsk_ring_prod fq;
+	struct xsk_ring_cons cq;
+	struct xsk_umem *umem;
+	u64 next_buffer;
+	u32 num_frames;
+	u32 frame_headroom;
+	void *buffer;
+	u32 frame_size;
+	u32 base_addr;
+	u32 fill_size;
+	u32 comp_size;
+	bool unaligned_mode;
+};
+
+struct set_hw_ring {
+	u32 default_tx;
+	u32 default_rx;
+};
+
+int hw_ring_size_reset(struct ifobject *ifobj);
+
+struct ifobject {
+	char ifname[MAX_INTERFACE_NAME_CHARS];
+	struct xsk_socket_info *xsk;
+	struct xsk_socket_info *xsk_arr;
+	struct xsk_umem_info *umem;
+	thread_func_t func_ptr;
+	validation_func_t validation_func;
+	struct xsk_xdp_progs *xdp_progs;
+	struct bpf_map *xskmap;
+	struct bpf_program *xdp_prog;
+	struct ethtool_ringparam ring;
+	struct set_hw_ring set_ring;
+	enum test_mode mode;
+	int ifindex;
+	int mtu;
+	u32 bind_flags;
+	u32 xdp_zc_max_segs;
+	bool tx_on;
+	bool rx_on;
+	bool use_poll;
+	bool busy_poll;
+	bool use_fill_ring;
+	bool release_rx;
+	bool shared_umem;
+	bool use_metadata;
+	bool unaligned_supp;
+	bool multi_buff_supp;
+	bool multi_buff_zc_supp;
+	bool hw_ring_size_supp;
+};
+struct ifobject *ifobject_create(void);
+void ifobject_delete(struct ifobject *ifobj);
+int init_iface(struct ifobject *ifobj, thread_func_t func_ptr);
+
+int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer, u64 size);
+int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
+			 struct ifobject *ifobject, bool shared);
+
+
+struct pkt {
+	int offset;
+	u32 len;
+	u32 pkt_nb;
+	bool valid;
+	u16 options;
+};
+
+struct pkt_stream {
+	u32 nb_pkts;
+	u32 current_pkt_nb;
+	struct pkt *pkts;
+	u32 max_pkt_len;
+	u32 nb_rx_pkts;
+	u32 nb_valid_entries;
+	bool verbatim;
+};
+
+static inline bool pkt_continues(u32 options)
+{
+	return options & XDP_PKT_CONTD;
+}
+
+struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len);
+void pkt_stream_delete(struct pkt_stream *pkt_stream);
+void pkt_stream_reset(struct pkt_stream *pkt_stream);
+void pkt_stream_restore_default(struct test_spec *test);
+
+struct test_spec {
+	struct ifobject *ifobj_tx;
+	struct ifobject *ifobj_rx;
+	struct pkt_stream *tx_pkt_stream_default;
+	struct pkt_stream *rx_pkt_stream_default;
+	struct bpf_program *xdp_prog_rx;
+	struct bpf_program *xdp_prog_tx;
+	struct bpf_map *xskmap_rx;
+	struct bpf_map *xskmap_tx;
+	test_func_t test_func;
+	int mtu;
+	u16 total_steps;
+	u16 current_step;
+	u16 nb_sockets;
+	bool fail;
+	bool set_ring;
+	bool adjust_tail;
+	bool adjust_tail_support;
+	enum test_mode mode;
+	char name[MAX_TEST_NAME_SIZE];
+};
+
+#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : ""
+static inline char *mode_string(struct test_spec *test)
+{
+	switch (test->mode) {
+	case TEST_MODE_SKB:
+		return "SKB";
+	case TEST_MODE_DRV:
+		return "DRV";
+	case TEST_MODE_ZC:
+		return "ZC";
+	default:
+		return "BOGUS";
+	}
+}
+
+void test_init(struct test_spec *test, struct ifobject *ifobj_tx,
+	       struct ifobject *ifobj_rx, enum test_mode mode,
+	       const struct test_spec *test_to_run);
+
+int testapp_adjust_tail_grow(struct test_spec *test);
+int testapp_adjust_tail_grow_mb(struct test_spec *test);
+int testapp_adjust_tail_shrink(struct test_spec *test);
+int testapp_adjust_tail_shrink_mb(struct test_spec *test);
+int testapp_aligned_inv_desc(struct test_spec *test);
+int testapp_aligned_inv_desc_2k_frame(struct test_spec *test);
+int testapp_aligned_inv_desc_mb(struct test_spec *test);
+int testapp_bidirectional(struct test_spec *test);
+int testapp_headroom(struct test_spec *test);
+int testapp_hw_sw_max_ring_size(struct test_spec *test);
+int testapp_hw_sw_min_ring_size(struct test_spec *test);
+int testapp_poll_rx(struct test_spec *test);
+int testapp_poll_rxq_tmout(struct test_spec *test);
+int testapp_poll_tx(struct test_spec *test);
+int testapp_poll_txq_tmout(struct test_spec *test);
+int testapp_send_receive(struct test_spec *test);
+int testapp_send_receive_2k_frame(struct test_spec *test);
+int testapp_send_receive_mb(struct test_spec *test);
+int testapp_send_receive_unaligned(struct test_spec *test);
+int testapp_send_receive_unaligned_mb(struct test_spec *test);
+int testapp_single_pkt(struct test_spec *test);
+int testapp_stats_fill_empty(struct test_spec *test);
+int testapp_stats_rx_dropped(struct test_spec *test);
+int testapp_stats_tx_invalid_descs(struct test_spec *test);
+int testapp_stats_rx_full(struct test_spec *test);
+int testapp_teardown(struct test_spec *test);
+int testapp_too_many_frags(struct test_spec *test);
+int testapp_tx_queue_consumer(struct test_spec *test);
+int testapp_unaligned_inv_desc(struct test_spec *test);
+int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test);
+int testapp_unaligned_inv_desc_mb(struct test_spec *test);
+int testapp_xdp_drop(struct test_spec *test);
+int testapp_xdp_metadata(struct test_spec *test);
+int testapp_xdp_metadata_mb(struct test_spec *test);
+int testapp_xdp_prog_cleanup(struct test_spec *test);
+int testapp_xdp_shared_umem(struct test_spec *test);
+
+void *worker_testapp_validate_rx(void *arg);
+void *worker_testapp_validate_tx(void *arg);
+
+static const struct test_spec tests[] = {
+	{.name = "SEND_RECEIVE", .test_func = testapp_send_receive},
+	{.name = "SEND_RECEIVE_2K_FRAME", .test_func = testapp_send_receive_2k_frame},
+	{.name = "SEND_RECEIVE_SINGLE_PKT", .test_func = testapp_single_pkt},
+	{.name = "POLL_RX", .test_func = testapp_poll_rx},
+	{.name = "POLL_TX", .test_func = testapp_poll_tx},
+	{.name = "POLL_RXQ_FULL", .test_func = testapp_poll_rxq_tmout},
+	{.name = "POLL_TXQ_FULL", .test_func = testapp_poll_txq_tmout},
+	{.name = "ALIGNED_INV_DESC", .test_func = testapp_aligned_inv_desc},
+	{.name = "ALIGNED_INV_DESC_2K_FRAME_SIZE", .test_func = testapp_aligned_inv_desc_2k_frame},
+	{.name = "UMEM_HEADROOM", .test_func = testapp_headroom},
+	{.name = "BIDIRECTIONAL", .test_func = testapp_bidirectional},
+	{.name = "STAT_RX_DROPPED", .test_func = testapp_stats_rx_dropped},
+	{.name = "STAT_TX_INVALID", .test_func = testapp_stats_tx_invalid_descs},
+	{.name = "STAT_RX_FULL", .test_func = testapp_stats_rx_full},
+	{.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty},
+	{.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup},
+	{.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop},
+	{.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem},
+	{.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata},
+	{.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb},
+	{.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb},
+	{.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
+	{.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink},
+	{.name = "TX_QUEUE_CONSUMER", .test_func = testapp_tx_queue_consumer},
+	};
+
+static const struct test_spec ci_skip_tests[] = {
+	/* Flaky tests */
+	{.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb},
+	{.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow},
+	{.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb},
+	{.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb},
+	/* Tests with huge page dependency */
+	{.name = "SEND_RECEIVE_UNALIGNED", .test_func = testapp_send_receive_unaligned},
+	{.name = "UNALIGNED_INV_DESC", .test_func = testapp_unaligned_inv_desc},
+	{.name = "UNALIGNED_INV_DESC_4001_FRAME_SIZE",
+	 .test_func = testapp_unaligned_inv_desc_4001_frame},
+	{.name = "SEND_RECEIVE_UNALIGNED_9K_PACKETS",
+	 .test_func = testapp_send_receive_unaligned_mb},
+	{.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb},
+	/* Test with HW ring size dependency */
+	{.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size},
+	{.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size},
+	/* Too long test */
+	{.name = "TEARDOWN", .test_func = testapp_teardown},
+};
+
+
+#endif				/* TEST_XSK_H_ */
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index d66687f1ee6a..34f9ccce2602 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -3,6 +3,7 @@
 #include <test_progs.h>
 #include "timer.skel.h"
 #include "timer_failure.skel.h"
+#include "timer_interrupt.skel.h"
 
 #define NUM_THR 8
 
@@ -86,6 +87,10 @@ void serial_test_timer(void)
 	int err;
 
 	timer_skel = timer__open_and_load();
+	if (!timer_skel && errno == EOPNOTSUPP) {
+		test__skip();
+		return;
+	}
 	if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
 		return;
 
@@ -95,3 +100,36 @@ void serial_test_timer(void)
 
 	RUN_TESTS(timer_failure);
 }
+
+void test_timer_interrupt(void)
+{
+	struct timer_interrupt *skel = NULL;
+	int err, prog_fd;
+	LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+	skel = timer_interrupt__open_and_load();
+	if (!skel && errno == EOPNOTSUPP) {
+		test__skip();
+		return;
+	}
+	if (!ASSERT_OK_PTR(skel, "timer_interrupt__open_and_load"))
+		return;
+
+	err = timer_interrupt__attach(skel);
+	if (!ASSERT_OK(err, "timer_interrupt__attach"))
+		goto out;
+
+	prog_fd = bpf_program__fd(skel->progs.test_timer_interrupt);
+	err = bpf_prog_test_run_opts(prog_fd, &opts);
+	if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+		goto out;
+
+	usleep(50);
+
+	ASSERT_EQ(skel->bss->in_interrupt, 0, "in_interrupt");
+	if (skel->bss->preempt_count)
+		ASSERT_NEQ(skel->bss->in_interrupt_cb, 0, "in_interrupt_cb");
+
+out:
+	timer_interrupt__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_crash.c b/tools/testing/selftests/bpf/prog_tests/timer_crash.c
index f74b82305da8..b841597c8a3a 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_crash.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_crash.c
@@ -12,6 +12,10 @@ static void test_timer_crash_mode(int mode)
 	struct timer_crash *skel;
 
 	skel = timer_crash__open_and_load();
+	if (!skel && errno == EOPNOTSUPP) {
+		test__skip();
+		return;
+	}
 	if (!ASSERT_OK_PTR(skel, "timer_crash__open_and_load"))
 		return;
 	skel->bss->pid = getpid();
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
index 1a2f99596916..eb303fa1e09a 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
@@ -59,6 +59,10 @@ void test_timer_lockup(void)
 	}
 
 	skel = timer_lockup__open_and_load();
+	if (!skel && errno == EOPNOTSUPP) {
+		test__skip();
+		return;
+	}
 	if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load"))
 		return;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
index 9ff7843909e7..c930c7d7105b 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
@@ -65,6 +65,10 @@ void serial_test_timer_mim(void)
 		goto cleanup;
 
 	timer_skel = timer_mim__open_and_load();
+	if (!timer_skel && errno == EOPNOTSUPP) {
+		test__skip();
+		return;
+	}
 	if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
 		goto cleanup;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
index c3ab9b6fb069..b81dde283052 100644
--- a/tools/testing/selftests/bpf/prog_tests/token.c
+++ b/tools/testing/selftests/bpf/prog_tests/token.c
@@ -19,6 +19,7 @@
 #include "priv_prog.skel.h"
 #include "dummy_st_ops_success.skel.h"
 #include "token_lsm.skel.h"
+#include "priv_freplace_prog.skel.h"
 
 static inline int sys_mount(const char *dev_name, const char *dir_name,
 			    const char *type, unsigned long flags,
@@ -114,7 +115,7 @@ static int create_bpffs_fd(void)
 
 static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
 {
-	int mnt_fd, err;
+	int err;
 
 	/* set up token delegation mount options */
 	err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str);
@@ -135,12 +136,7 @@ static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
 	if (err < 0)
 		return -errno;
 
-	/* create O_PATH fd for detached mount */
-	mnt_fd = sys_fsmount(fs_fd, 0, 0);
-	if (err < 0)
-		return -errno;
-
-	return mnt_fd;
+	return 0;
 }
 
 /* send FD over Unix domain (AF_UNIX) socket */
@@ -286,6 +282,7 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba
 {
 	int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1;
 	struct token_lsm *lsm_skel = NULL;
+	char one;
 
 	/* load and attach LSM "policy" before we go into unpriv userns */
 	lsm_skel = token_lsm__open_and_load();
@@ -332,13 +329,19 @@ static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callba
 	err = sendfd(sock_fd, fs_fd);
 	if (!ASSERT_OK(err, "send_fs_fd"))
 		goto cleanup;
-	zclose(fs_fd);
+
+	/* wait that the parent reads the fd, does the fsconfig() calls
+	 * and send us a signal that it is done
+	 */
+	err = read(sock_fd, &one, sizeof(one));
+	if (!ASSERT_GE(err, 0, "read_one"))
+		goto cleanup;
 
 	/* avoid mucking around with mount namespaces and mounting at
-	 * well-known path, just get detach-mounted BPF FS fd back from parent
+	 * well-known path, just create O_PATH fd for detached mount
 	 */
-	err = recvfd(sock_fd, &mnt_fd);
-	if (!ASSERT_OK(err, "recv_mnt_fd"))
+	mnt_fd = sys_fsmount(fs_fd, 0, 0);
+	if (!ASSERT_OK_FD(mnt_fd, "mnt_fd"))
 		goto cleanup;
 
 	/* try to fspick() BPF FS and try to add some delegation options */
@@ -428,24 +431,24 @@ again:
 
 static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
 {
-	int fs_fd = -1, mnt_fd = -1, token_fd = -1, err;
+	int fs_fd = -1, token_fd = -1, err;
+	char one = 1;
 
 	err = recvfd(sock_fd, &fs_fd);
 	if (!ASSERT_OK(err, "recv_bpffs_fd"))
 		goto cleanup;
 
-	mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts);
-	if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) {
+	err = materialize_bpffs_fd(fs_fd, bpffs_opts);
+	if (!ASSERT_GE(err, 0, "materialize_bpffs_fd")) {
 		err = -EINVAL;
 		goto cleanup;
 	}
-	zclose(fs_fd);
 
-	/* pass BPF FS context object to parent */
-	err = sendfd(sock_fd, mnt_fd);
-	if (!ASSERT_OK(err, "send_mnt_fd"))
+	/* notify the child that we did the fsconfig() calls and it can proceed. */
+	err = write(sock_fd, &one, sizeof(one));
+	if (!ASSERT_EQ(err, sizeof(one), "send_one"))
 		goto cleanup;
-	zclose(mnt_fd);
+	zclose(fs_fd);
 
 	/* receive BPF token FD back from child for some extra tests */
 	err = recvfd(sock_fd, &token_fd);
@@ -458,7 +461,6 @@ static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
 cleanup:
 	zclose(sock_fd);
 	zclose(fs_fd);
-	zclose(mnt_fd);
 	zclose(token_fd);
 
 	if (child_pid > 0)
@@ -788,6 +790,84 @@ static int userns_obj_priv_prog(int mnt_fd, struct token_lsm *lsm_skel)
 	return 0;
 }
 
+static int userns_obj_priv_freplace_setup(int mnt_fd, struct priv_freplace_prog **fr_skel,
+					  struct priv_prog **skel, int *tgt_fd)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+	int err;
+	char buf[256];
+
+	/* use bpf_token_path to provide BPF FS path */
+	snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+	opts.bpf_token_path = buf;
+	*skel = priv_prog__open_opts(&opts);
+	if (!ASSERT_OK_PTR(*skel, "priv_prog__open_opts"))
+		return -EINVAL;
+	err = priv_prog__load(*skel);
+	if (!ASSERT_OK(err, "priv_prog__load"))
+		return -EINVAL;
+
+	*fr_skel = priv_freplace_prog__open_opts(&opts);
+	if (!ASSERT_OK_PTR(*skel, "priv_freplace_prog__open_opts"))
+		return -EINVAL;
+
+	*tgt_fd = bpf_program__fd((*skel)->progs.xdp_prog1);
+	return 0;
+}
+
+/* Verify that freplace works from user namespace, because bpf token is loaded
+ * in bpf_object__prepare
+ */
+static int userns_obj_priv_freplace_prog(int mnt_fd, struct token_lsm *lsm_skel)
+{
+	struct priv_freplace_prog *fr_skel = NULL;
+	struct priv_prog *skel = NULL;
+	int err, tgt_fd;
+
+	err = userns_obj_priv_freplace_setup(mnt_fd, &fr_skel, &skel, &tgt_fd);
+	if (!ASSERT_OK(err, "setup"))
+		goto out;
+
+	err = bpf_object__prepare(fr_skel->obj);
+	if (!ASSERT_OK(err, "freplace__prepare"))
+		goto out;
+
+	err = bpf_program__set_attach_target(fr_skel->progs.new_xdp_prog2, tgt_fd, "xdp_prog1");
+	if (!ASSERT_OK(err, "set_attach_target"))
+		goto out;
+
+	err = priv_freplace_prog__load(fr_skel);
+	ASSERT_OK(err, "priv_freplace_prog__load");
+
+out:
+	priv_freplace_prog__destroy(fr_skel);
+	priv_prog__destroy(skel);
+	return err;
+}
+
+/* Verify that replace fails to set attach target from user namespace without bpf token */
+static int userns_obj_priv_freplace_prog_fail(int mnt_fd, struct token_lsm *lsm_skel)
+{
+	struct priv_freplace_prog *fr_skel = NULL;
+	struct priv_prog *skel = NULL;
+	int err, tgt_fd;
+
+	err = userns_obj_priv_freplace_setup(mnt_fd, &fr_skel, &skel, &tgt_fd);
+	if (!ASSERT_OK(err, "setup"))
+		goto out;
+
+	err = bpf_program__set_attach_target(fr_skel->progs.new_xdp_prog2, tgt_fd, "xdp_prog1");
+	if (ASSERT_ERR(err, "attach fails"))
+		err = 0;
+	else
+		err = -EINVAL;
+
+out:
+	priv_freplace_prog__destroy(fr_skel);
+	priv_prog__destroy(skel);
+	return err;
+}
+
 /* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command,
  * which should cause struct_ops application to fail, as BTF won't be uploaded
  * into the kernel, even if STRUCT_OPS programs themselves are allowed
@@ -967,6 +1047,41 @@ err_out:
 
 #define bit(n) (1ULL << (n))
 
+static int userns_bpf_token_info(int mnt_fd, struct token_lsm *lsm_skel)
+{
+	int err, token_fd = -1;
+	struct bpf_token_info info;
+	u32 len = sizeof(struct bpf_token_info);
+
+	/* create BPF token from BPF FS mount */
+	token_fd = bpf_token_create(mnt_fd, NULL);
+	if (!ASSERT_GT(token_fd, 0, "token_create")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+
+	memset(&info, 0, len);
+	err = bpf_obj_get_info_by_fd(token_fd, &info, &len);
+	if (!ASSERT_ERR(err, "bpf_obj_get_token_info"))
+		goto cleanup;
+	if (!ASSERT_EQ(info.allowed_cmds, bit(BPF_MAP_CREATE), "token_info_cmds_map_create")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+	if (!ASSERT_EQ(info.allowed_progs, bit(BPF_PROG_TYPE_XDP), "token_info_progs_xdp")) {
+		err = -EINVAL;
+		goto cleanup;
+	}
+
+	/* The BPF_PROG_TYPE_EXT is not set in token */
+	if (ASSERT_EQ(info.allowed_progs, bit(BPF_PROG_TYPE_EXT), "token_info_progs_ext"))
+		err = -EINVAL;
+
+cleanup:
+	zclose(token_fd);
+	return err;
+}
+
 void test_token(void)
 {
 	if (test__start_subtest("map_token")) {
@@ -1004,12 +1119,28 @@ void test_token(void)
 	if (test__start_subtest("obj_priv_prog")) {
 		struct bpffs_opts opts = {
 			.cmds = bit(BPF_PROG_LOAD),
-			.progs = bit(BPF_PROG_TYPE_KPROBE),
+			.progs = bit(BPF_PROG_TYPE_XDP),
 			.attachs = ~0ULL,
 		};
 
 		subtest_userns(&opts, userns_obj_priv_prog);
 	}
+	if (test__start_subtest("obj_priv_freplace_prog")) {
+		struct bpffs_opts opts = {
+			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD) | bit(BPF_BTF_GET_FD_BY_ID),
+			.progs = bit(BPF_PROG_TYPE_EXT) | bit(BPF_PROG_TYPE_XDP),
+			.attachs = ~0ULL,
+		};
+		subtest_userns(&opts, userns_obj_priv_freplace_prog);
+	}
+	if (test__start_subtest("obj_priv_freplace_prog_fail")) {
+		struct bpffs_opts opts = {
+			.cmds = bit(BPF_BTF_LOAD) | bit(BPF_PROG_LOAD) | bit(BPF_BTF_GET_FD_BY_ID),
+			.progs = bit(BPF_PROG_TYPE_EXT) | bit(BPF_PROG_TYPE_XDP),
+			.attachs = ~0ULL,
+		};
+		subtest_userns(&opts, userns_obj_priv_freplace_prog_fail);
+	}
 	if (test__start_subtest("obj_priv_btf_fail")) {
 		struct bpffs_opts opts = {
 			/* disallow BTF loading */
@@ -1054,4 +1185,13 @@ void test_token(void)
 
 		subtest_userns(&opts, userns_obj_priv_implicit_token_envvar);
 	}
+	if (test__start_subtest("bpf_token_info")) {
+		struct bpffs_opts opts = {
+			.cmds = bit(BPF_MAP_CREATE),
+			.progs = bit(BPF_PROG_TYPE_XDP),
+			.attachs = ~0ULL,
+		};
+
+		subtest_userns(&opts, userns_bpf_token_info);
+	}
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
index a222df765bc3..10e231965589 100644
--- a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
@@ -28,10 +28,62 @@ out:
 	tracing_failure__destroy(skel);
 }
 
+static void test_tracing_fail_prog(const char *prog_name, const char *exp_msg)
+{
+	struct tracing_failure *skel;
+	struct bpf_program *prog;
+	char log_buf[256];
+	int err;
+
+	skel = tracing_failure__open();
+	if (!ASSERT_OK_PTR(skel, "tracing_failure__open"))
+		return;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto out;
+
+	bpf_program__set_autoload(prog, true);
+	bpf_program__set_log_buf(prog, log_buf, sizeof(log_buf));
+
+	err = tracing_failure__load(skel);
+	if (!ASSERT_ERR(err, "tracing_failure__load"))
+		goto out;
+
+	ASSERT_HAS_SUBSTR(log_buf, exp_msg, "log_buf");
+out:
+	tracing_failure__destroy(skel);
+}
+
+static void test_tracing_deny(void)
+{
+	int btf_id;
+
+	/* __rcu_read_lock depends on CONFIG_PREEMPT_RCU */
+	btf_id = libbpf_find_vmlinux_btf_id("__rcu_read_lock", BPF_TRACE_FENTRY);
+	if (btf_id <= 0) {
+		test__skip();
+		return;
+	}
+
+	test_tracing_fail_prog("tracing_deny",
+			       "Attaching tracing programs to function '__rcu_read_lock' is rejected.");
+}
+
+static void test_fexit_noreturns(void)
+{
+	test_tracing_fail_prog("fexit_noreturns",
+			       "Attaching fexit/fmod_ret to __noreturn function 'do_exit' is rejected.");
+}
+
 void test_tracing_failure(void)
 {
 	if (test__start_subtest("bpf_spin_lock"))
 		test_bpf_spin_lock(true);
 	if (test__start_subtest("bpf_spin_unlock"))
 		test_bpf_spin_lock(false);
+	if (test__start_subtest("tracing_deny"))
+		test_tracing_deny();
+	if (test__start_subtest("fexit_noreturns"))
+		test_fexit_noreturns();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
index 19e68d4b3532..6f8c0bfb0415 100644
--- a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
@@ -112,10 +112,39 @@ destroy_skel:
 	tracing_struct_many_args__destroy(skel);
 }
 
+static void test_union_args(void)
+{
+	struct tracing_struct *skel;
+	int err;
+
+	skel = tracing_struct__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "tracing_struct__open_and_load"))
+		return;
+
+	err = tracing_struct__attach(skel);
+	if (!ASSERT_OK(err, "tracing_struct__attach"))
+		goto out;
+
+	ASSERT_OK(trigger_module_test_read(256), "trigger_read");
+
+	ASSERT_EQ(skel->bss->ut1_a_a, 1, "ut1:a.arg.a");
+	ASSERT_EQ(skel->bss->ut1_b, 4, "ut1:b");
+	ASSERT_EQ(skel->bss->ut1_c, 5, "ut1:c");
+
+	ASSERT_EQ(skel->bss->ut2_a, 6, "ut2:a");
+	ASSERT_EQ(skel->bss->ut2_b_a, 2, "ut2:b.arg.a");
+	ASSERT_EQ(skel->bss->ut2_b_b, 3, "ut2:b.arg.b");
+
+out:
+	tracing_struct__destroy(skel);
+}
+
 void test_tracing_struct(void)
 {
 	if (test__start_subtest("struct_args"))
 		test_struct_args();
 	if (test__start_subtest("struct_many_args"))
 		test_struct_many_args();
+	if (test__start_subtest("union_args"))
+		test_union_args();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe.c b/tools/testing/selftests/bpf/prog_tests/uprobe.c
index cf3e0e7a64fa..86404476c1da 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2023 Hengqi Chen */
 
 #include <test_progs.h>
+#include <asm/ptrace.h>
 #include "test_uprobe.skel.h"
 
 static FILE *urand_spawn(int *pid)
@@ -33,7 +34,7 @@ static int urand_trigger(FILE **urand_pipe)
 	return exit_code;
 }
 
-void test_uprobe(void)
+static void test_uprobe_attach(void)
 {
 	LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
 	struct test_uprobe *skel;
@@ -93,3 +94,156 @@ cleanup:
 		pclose(urand_pipe);
 	test_uprobe__destroy(skel);
 }
+
+#ifdef __x86_64__
+__naked __maybe_unused unsigned long uprobe_regs_change_trigger(void)
+{
+	asm volatile (
+		"ret\n"
+	);
+}
+
+static __naked void uprobe_regs_change(struct pt_regs *before, struct pt_regs *after)
+{
+	asm volatile (
+		"movq %r11,  48(%rdi)\n"
+		"movq %r10,  56(%rdi)\n"
+		"movq  %r9,  64(%rdi)\n"
+		"movq  %r8,  72(%rdi)\n"
+		"movq %rax,  80(%rdi)\n"
+		"movq %rcx,  88(%rdi)\n"
+		"movq %rdx,  96(%rdi)\n"
+		"movq %rsi, 104(%rdi)\n"
+		"movq %rdi, 112(%rdi)\n"
+
+		/* save 2nd argument */
+		"pushq %rsi\n"
+		"call uprobe_regs_change_trigger\n"
+
+		/* save  return value and load 2nd argument pointer to rax */
+		"pushq %rax\n"
+		"movq 8(%rsp), %rax\n"
+
+		"movq %r11,  48(%rax)\n"
+		"movq %r10,  56(%rax)\n"
+		"movq  %r9,  64(%rax)\n"
+		"movq  %r8,  72(%rax)\n"
+		"movq %rcx,  88(%rax)\n"
+		"movq %rdx,  96(%rax)\n"
+		"movq %rsi, 104(%rax)\n"
+		"movq %rdi, 112(%rax)\n"
+
+		/* restore return value and 2nd argument */
+		"pop %rax\n"
+		"pop %rsi\n"
+
+		"movq %rax,  80(%rsi)\n"
+		"ret\n"
+	);
+}
+
+static void regs_common(void)
+{
+	struct pt_regs before = {}, after = {}, expected = {
+		.rax = 0xc0ffe,
+		.rcx = 0xbad,
+		.rdx = 0xdead,
+		.r8  = 0x8,
+		.r9  = 0x9,
+		.r10 = 0x10,
+		.r11 = 0x11,
+		.rdi = 0x12,
+		.rsi = 0x13,
+	};
+	LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+	struct test_uprobe *skel;
+
+	skel = test_uprobe__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	skel->bss->my_pid = getpid();
+	skel->bss->regs = expected;
+
+	uprobe_opts.func_name = "uprobe_regs_change_trigger";
+	skel->links.test_regs_change = bpf_program__attach_uprobe_opts(skel->progs.test_regs_change,
+							    -1,
+							    "/proc/self/exe",
+							    0 /* offset */,
+							    &uprobe_opts);
+	if (!ASSERT_OK_PTR(skel->links.test_regs_change, "bpf_program__attach_uprobe_opts"))
+		goto cleanup;
+
+	uprobe_regs_change(&before, &after);
+
+	ASSERT_EQ(after.rax, expected.rax, "ax");
+	ASSERT_EQ(after.rcx, expected.rcx, "cx");
+	ASSERT_EQ(after.rdx, expected.rdx, "dx");
+	ASSERT_EQ(after.r8,  expected.r8,  "r8");
+	ASSERT_EQ(after.r9,  expected.r9,  "r9");
+	ASSERT_EQ(after.r10, expected.r10, "r10");
+	ASSERT_EQ(after.r11, expected.r11, "r11");
+	ASSERT_EQ(after.rdi, expected.rdi, "rdi");
+	ASSERT_EQ(after.rsi, expected.rsi, "rsi");
+
+cleanup:
+	test_uprobe__destroy(skel);
+}
+
+static noinline unsigned long uprobe_regs_change_ip_1(void)
+{
+	return 0xc0ffee;
+}
+
+static noinline unsigned long uprobe_regs_change_ip_2(void)
+{
+	return 0xdeadbeef;
+}
+
+static void regs_ip(void)
+{
+	LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+	struct test_uprobe *skel;
+	unsigned long ret;
+
+	skel = test_uprobe__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	skel->bss->my_pid = getpid();
+	skel->bss->ip = (unsigned long) uprobe_regs_change_ip_2;
+
+	uprobe_opts.func_name = "uprobe_regs_change_ip_1";
+	skel->links.test_regs_change_ip = bpf_program__attach_uprobe_opts(
+						skel->progs.test_regs_change_ip,
+						-1,
+						"/proc/self/exe",
+						0 /* offset */,
+						&uprobe_opts);
+	if (!ASSERT_OK_PTR(skel->links.test_regs_change_ip, "bpf_program__attach_uprobe_opts"))
+		goto cleanup;
+
+	ret = uprobe_regs_change_ip_1();
+	ASSERT_EQ(ret, 0xdeadbeef, "ret");
+
+cleanup:
+	test_uprobe__destroy(skel);
+}
+
+static void test_uprobe_regs_change(void)
+{
+	if (test__start_subtest("regs_change_common"))
+		regs_common();
+	if (test__start_subtest("regs_change_ip"))
+		regs_ip();
+}
+#else
+static void test_uprobe_regs_change(void) { }
+#endif
+
+void test_uprobe(void)
+{
+	if (test__start_subtest("attach"))
+		test_uprobe_attach();
+	test_uprobe_regs_change();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
index c397336fe1ed..955a37751b52 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
@@ -8,22 +8,31 @@
 #include <asm/ptrace.h>
 #include <linux/compiler.h>
 #include <linux/stringify.h>
+#include <linux/kernel.h>
 #include <sys/wait.h>
 #include <sys/syscall.h>
 #include <sys/prctl.h>
 #include <asm/prctl.h>
 #include "uprobe_syscall.skel.h"
 #include "uprobe_syscall_executed.skel.h"
+#include "bpf/libbpf_internal.h"
 
-__naked unsigned long uretprobe_regs_trigger(void)
+#define USDT_NOP .byte 0x0f, 0x1f, 0x44, 0x00, 0x00
+#include "usdt.h"
+
+#pragma GCC diagnostic ignored "-Wattributes"
+
+__attribute__((aligned(16)))
+__nocf_check __weak __naked unsigned long uprobe_regs_trigger(void)
 {
 	asm volatile (
+		".byte 0x0f, 0x1f, 0x44, 0x00, 0x00\n" /* nop5 */
 		"movq $0xdeadbeef, %rax\n"
 		"ret\n"
 	);
 }
 
-__naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after)
+__naked void uprobe_regs(struct pt_regs *before, struct pt_regs *after)
 {
 	asm volatile (
 		"movq %r15,   0(%rdi)\n"
@@ -44,15 +53,17 @@ __naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after)
 		"movq   $0, 120(%rdi)\n" /* orig_rax */
 		"movq   $0, 128(%rdi)\n" /* rip      */
 		"movq   $0, 136(%rdi)\n" /* cs       */
+		"pushq %rax\n"
 		"pushf\n"
 		"pop %rax\n"
 		"movq %rax, 144(%rdi)\n" /* eflags   */
+		"pop %rax\n"
 		"movq %rsp, 152(%rdi)\n" /* rsp      */
 		"movq   $0, 160(%rdi)\n" /* ss       */
 
 		/* save 2nd argument */
 		"pushq %rsi\n"
-		"call uretprobe_regs_trigger\n"
+		"call uprobe_regs_trigger\n"
 
 		/* save  return value and load 2nd argument pointer to rax */
 		"pushq %rax\n"
@@ -92,25 +103,37 @@ __naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after)
 );
 }
 
-static void test_uretprobe_regs_equal(void)
+static void test_uprobe_regs_equal(bool retprobe)
 {
+	LIBBPF_OPTS(bpf_uprobe_opts, opts,
+		.retprobe = retprobe,
+	);
 	struct uprobe_syscall *skel = NULL;
 	struct pt_regs before = {}, after = {};
 	unsigned long *pb = (unsigned long *) &before;
 	unsigned long *pa = (unsigned long *) &after;
 	unsigned long *pp;
+	unsigned long offset;
 	unsigned int i, cnt;
-	int err;
+
+	offset = get_uprobe_offset(&uprobe_regs_trigger);
+	if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+		return;
 
 	skel = uprobe_syscall__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "uprobe_syscall__open_and_load"))
 		goto cleanup;
 
-	err = uprobe_syscall__attach(skel);
-	if (!ASSERT_OK(err, "uprobe_syscall__attach"))
+	skel->links.probe = bpf_program__attach_uprobe_opts(skel->progs.probe,
+				0, "/proc/self/exe", offset, &opts);
+	if (!ASSERT_OK_PTR(skel->links.probe, "bpf_program__attach_uprobe_opts"))
 		goto cleanup;
 
-	uretprobe_regs(&before, &after);
+	/* make sure uprobe gets optimized */
+	if (!retprobe)
+		uprobe_regs_trigger();
+
+	uprobe_regs(&before, &after);
 
 	pp = (unsigned long *) &skel->bss->regs;
 	cnt = sizeof(before)/sizeof(*pb);
@@ -119,7 +142,7 @@ static void test_uretprobe_regs_equal(void)
 		unsigned int offset = i * sizeof(unsigned long);
 
 		/*
-		 * Check register before and after uretprobe_regs_trigger call
+		 * Check register before and after uprobe_regs_trigger call
 		 * that triggers the uretprobe.
 		 */
 		switch (offset) {
@@ -133,7 +156,7 @@ static void test_uretprobe_regs_equal(void)
 
 		/*
 		 * Check register seen from bpf program and register after
-		 * uretprobe_regs_trigger call
+		 * uprobe_regs_trigger call (with rax exception, check below).
 		 */
 		switch (offset) {
 		/*
@@ -146,6 +169,15 @@ static void test_uretprobe_regs_equal(void)
 		case offsetof(struct pt_regs, rsp):
 		case offsetof(struct pt_regs, ss):
 			break;
+		/*
+		 * uprobe does not see return value in rax, it needs to see the
+		 * original (before) rax value
+		 */
+		case offsetof(struct pt_regs, rax):
+			if (!retprobe) {
+				ASSERT_EQ(pp[i], pb[i], "uprobe rax prog-before value check");
+				break;
+			}
 		default:
 			if (!ASSERT_EQ(pp[i], pa[i], "register prog-after value check"))
 				fprintf(stdout, "failed register offset %u\n", offset);
@@ -175,7 +207,7 @@ static int write_bpf_testmod_uprobe(unsigned long offset)
 	return ret != n ? (int) ret : 0;
 }
 
-static void test_uretprobe_regs_change(void)
+static void test_regs_change(void)
 {
 	struct pt_regs before = {}, after = {};
 	unsigned long *pb = (unsigned long *) &before;
@@ -183,13 +215,16 @@ static void test_uretprobe_regs_change(void)
 	unsigned long cnt = sizeof(before)/sizeof(*pb);
 	unsigned int i, err, offset;
 
-	offset = get_uprobe_offset(uretprobe_regs_trigger);
+	offset = get_uprobe_offset(uprobe_regs_trigger);
 
 	err = write_bpf_testmod_uprobe(offset);
 	if (!ASSERT_OK(err, "register_uprobe"))
 		return;
 
-	uretprobe_regs(&before, &after);
+	/* make sure uprobe gets optimized */
+	uprobe_regs_trigger();
+
+	uprobe_regs(&before, &after);
 
 	err = write_bpf_testmod_uprobe(0);
 	if (!ASSERT_OK(err, "unregister_uprobe"))
@@ -251,7 +286,8 @@ static void test_uretprobe_syscall_call(void)
 		.retprobe = true,
 	);
 	struct uprobe_syscall_executed *skel;
-	int pid, status, err, go[2], c;
+	int pid, status, err, go[2], c = 0;
+	struct bpf_link *link;
 
 	if (!ASSERT_OK(pipe(go), "pipe"))
 		return;
@@ -277,11 +313,14 @@ static void test_uretprobe_syscall_call(void)
 		_exit(0);
 	}
 
-	skel->links.test = bpf_program__attach_uprobe_multi(skel->progs.test, pid,
-							    "/proc/self/exe",
-							    "uretprobe_syscall_call", &opts);
-	if (!ASSERT_OK_PTR(skel->links.test, "bpf_program__attach_uprobe_multi"))
+	skel->bss->pid = pid;
+
+	link = bpf_program__attach_uprobe_multi(skel->progs.test_uretprobe_multi,
+						pid, "/proc/self/exe",
+						"uretprobe_syscall_call", &opts);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
 		goto cleanup;
+	skel->links.test_uretprobe_multi = link;
 
 	/* kick the child */
 	write(go[1], &c, 1);
@@ -301,6 +340,256 @@ cleanup:
 	close(go[0]);
 }
 
+#define TRAMP "[uprobes-trampoline]"
+
+__attribute__((aligned(16)))
+__nocf_check __weak __naked void uprobe_test(void)
+{
+	asm volatile ("					\n"
+		".byte 0x0f, 0x1f, 0x44, 0x00, 0x00	\n"
+		"ret					\n"
+	);
+}
+
+__attribute__((aligned(16)))
+__nocf_check __weak void usdt_test(void)
+{
+	USDT(optimized_uprobe, usdt);
+}
+
+static int find_uprobes_trampoline(void *tramp_addr)
+{
+	void *start, *end;
+	char line[128];
+	int ret = -1;
+	FILE *maps;
+
+	maps = fopen("/proc/self/maps", "r");
+	if (!maps) {
+		fprintf(stderr, "cannot open maps\n");
+		return -1;
+	}
+
+	while (fgets(line, sizeof(line), maps)) {
+		int m = -1;
+
+		/* We care only about private r-x mappings. */
+		if (sscanf(line, "%p-%p r-xp %*x %*x:%*x %*u %n", &start, &end, &m) != 2)
+			continue;
+		if (m < 0)
+			continue;
+		if (!strncmp(&line[m], TRAMP, sizeof(TRAMP)-1) && (start == tramp_addr)) {
+			ret = 0;
+			break;
+		}
+	}
+
+	fclose(maps);
+	return ret;
+}
+
+static unsigned char nop5[5] = { 0x0f, 0x1f, 0x44, 0x00, 0x00 };
+
+static void *find_nop5(void *fn)
+{
+	int i;
+
+	for (i = 0; i < 10; i++) {
+		if (!memcmp(nop5, fn + i, 5))
+			return fn + i;
+	}
+	return NULL;
+}
+
+typedef void (__attribute__((nocf_check)) *trigger_t)(void);
+
+static void *check_attach(struct uprobe_syscall_executed *skel, trigger_t trigger,
+			  void *addr, int executed)
+{
+	struct __arch_relative_insn {
+		__u8 op;
+		__s32 raddr;
+	} __packed *call;
+	void *tramp = NULL;
+
+	/* Uprobe gets optimized after first trigger, so let's press twice. */
+	trigger();
+	trigger();
+
+	/* Make sure bpf program got executed.. */
+	ASSERT_EQ(skel->bss->executed, executed, "executed");
+
+	/* .. and check the trampoline is as expected. */
+	call = (struct __arch_relative_insn *) addr;
+	tramp = (void *) (call + 1) + call->raddr;
+	ASSERT_EQ(call->op, 0xe8, "call");
+	ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline");
+
+	return tramp;
+}
+
+static void check_detach(void *addr, void *tramp)
+{
+	/* [uprobes_trampoline] stays after detach */
+	ASSERT_OK(find_uprobes_trampoline(tramp), "uprobes_trampoline");
+	ASSERT_OK(memcmp(addr, nop5, 5), "nop5");
+}
+
+static void check(struct uprobe_syscall_executed *skel, struct bpf_link *link,
+		  trigger_t trigger, void *addr, int executed)
+{
+	void *tramp;
+
+	tramp = check_attach(skel, trigger, addr, executed);
+	bpf_link__destroy(link);
+	check_detach(addr, tramp);
+}
+
+static void test_uprobe_legacy(void)
+{
+	struct uprobe_syscall_executed *skel = NULL;
+	LIBBPF_OPTS(bpf_uprobe_opts, opts,
+		.retprobe = true,
+	);
+	struct bpf_link *link;
+	unsigned long offset;
+
+	offset = get_uprobe_offset(&uprobe_test);
+	if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+		goto cleanup;
+
+	/* uprobe */
+	skel = uprobe_syscall_executed__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+		return;
+
+	skel->bss->pid = getpid();
+
+	link = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe,
+				0, "/proc/self/exe", offset, NULL);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts"))
+		goto cleanup;
+
+	check(skel, link, uprobe_test, uprobe_test, 2);
+
+	/* uretprobe */
+	skel->bss->executed = 0;
+
+	link = bpf_program__attach_uprobe_opts(skel->progs.test_uretprobe,
+				0, "/proc/self/exe", offset, &opts);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_opts"))
+		goto cleanup;
+
+	check(skel, link, uprobe_test, uprobe_test, 2);
+
+cleanup:
+	uprobe_syscall_executed__destroy(skel);
+}
+
+static void test_uprobe_multi(void)
+{
+	struct uprobe_syscall_executed *skel = NULL;
+	LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
+	struct bpf_link *link;
+	unsigned long offset;
+
+	offset = get_uprobe_offset(&uprobe_test);
+	if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+		goto cleanup;
+
+	opts.offsets = &offset;
+	opts.cnt = 1;
+
+	skel = uprobe_syscall_executed__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+		return;
+
+	skel->bss->pid = getpid();
+
+	/* uprobe.multi */
+	link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_multi,
+				0, "/proc/self/exe", NULL, &opts);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+		goto cleanup;
+
+	check(skel, link, uprobe_test, uprobe_test, 2);
+
+	/* uretprobe.multi */
+	skel->bss->executed = 0;
+	opts.retprobe = true;
+	link = bpf_program__attach_uprobe_multi(skel->progs.test_uretprobe_multi,
+				0, "/proc/self/exe", NULL, &opts);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+		goto cleanup;
+
+	check(skel, link, uprobe_test, uprobe_test, 2);
+
+cleanup:
+	uprobe_syscall_executed__destroy(skel);
+}
+
+static void test_uprobe_session(void)
+{
+	struct uprobe_syscall_executed *skel = NULL;
+	LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
+		.session = true,
+	);
+	struct bpf_link *link;
+	unsigned long offset;
+
+	offset = get_uprobe_offset(&uprobe_test);
+	if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+		goto cleanup;
+
+	opts.offsets = &offset;
+	opts.cnt = 1;
+
+	skel = uprobe_syscall_executed__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+		return;
+
+	skel->bss->pid = getpid();
+
+	link = bpf_program__attach_uprobe_multi(skel->progs.test_uprobe_session,
+				0, "/proc/self/exe", NULL, &opts);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+		goto cleanup;
+
+	check(skel, link, uprobe_test, uprobe_test, 4);
+
+cleanup:
+	uprobe_syscall_executed__destroy(skel);
+}
+
+static void test_uprobe_usdt(void)
+{
+	struct uprobe_syscall_executed *skel;
+	struct bpf_link *link;
+	void *addr;
+
+	errno = 0;
+	addr = find_nop5(usdt_test);
+	if (!ASSERT_OK_PTR(addr, "find_nop5"))
+		return;
+
+	skel = uprobe_syscall_executed__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+		return;
+
+	skel->bss->pid = getpid();
+
+	link = bpf_program__attach_usdt(skel->progs.test_usdt,
+				-1 /* all PIDs */, "/proc/self/exe",
+				"optimized_uprobe", "usdt", NULL);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach_usdt"))
+		goto cleanup;
+
+	check(skel, link, usdt_test, addr, 2);
+
+cleanup:
+	uprobe_syscall_executed__destroy(skel);
+}
+
 /*
  * Borrowed from tools/testing/selftests/x86/test_shadow_stack.c.
  *
@@ -343,43 +632,172 @@ static void test_uretprobe_shadow_stack(void)
 		return;
 	}
 
-	/* Run all of the uretprobe tests. */
-	test_uretprobe_regs_equal();
-	test_uretprobe_regs_change();
+	/* Run all the tests with shadow stack in place. */
+
+	test_uprobe_regs_equal(false);
+	test_uprobe_regs_equal(true);
 	test_uretprobe_syscall_call();
 
+	test_uprobe_legacy();
+	test_uprobe_multi();
+	test_uprobe_session();
+	test_uprobe_usdt();
+
+	test_regs_change();
+
 	ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK);
 }
-#else
-static void test_uretprobe_regs_equal(void)
+
+static volatile bool race_stop;
+
+static USDT_DEFINE_SEMA(race);
+
+static void *worker_trigger(void *arg)
 {
-	test__skip();
+	unsigned long rounds = 0;
+
+	while (!race_stop) {
+		uprobe_test();
+		rounds++;
+	}
+
+	printf("tid %ld trigger rounds: %lu\n", sys_gettid(), rounds);
+	return NULL;
 }
 
-static void test_uretprobe_regs_change(void)
+static void *worker_attach(void *arg)
 {
-	test__skip();
+	LIBBPF_OPTS(bpf_uprobe_opts, opts);
+	struct uprobe_syscall_executed *skel;
+	unsigned long rounds = 0, offset;
+	const char *sema[2] = {
+		__stringify(USDT_SEMA(race)),
+		NULL,
+	};
+	unsigned long *ref;
+	int err;
+
+	offset = get_uprobe_offset(&uprobe_test);
+	if (!ASSERT_GE(offset, 0, "get_uprobe_offset"))
+		return NULL;
+
+	err = elf_resolve_syms_offsets("/proc/self/exe", 1, (const char **) &sema, &ref, STT_OBJECT);
+	if (!ASSERT_OK(err, "elf_resolve_syms_offsets_sema"))
+		return NULL;
+
+	opts.ref_ctr_offset = *ref;
+
+	skel = uprobe_syscall_executed__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+		return NULL;
+
+	skel->bss->pid = getpid();
+
+	while (!race_stop) {
+		skel->links.test_uprobe = bpf_program__attach_uprobe_opts(skel->progs.test_uprobe,
+					0, "/proc/self/exe", offset, &opts);
+		if (!ASSERT_OK_PTR(skel->links.test_uprobe, "bpf_program__attach_uprobe_opts"))
+			break;
+
+		bpf_link__destroy(skel->links.test_uprobe);
+		skel->links.test_uprobe = NULL;
+		rounds++;
+	}
+
+	printf("tid %ld attach rounds: %lu hits: %d\n", sys_gettid(), rounds, skel->bss->executed);
+	uprobe_syscall_executed__destroy(skel);
+	free(ref);
+	return NULL;
 }
 
-static void test_uretprobe_syscall_call(void)
+static useconds_t race_msec(void)
 {
-	test__skip();
+	char *env;
+
+	env = getenv("BPF_SELFTESTS_UPROBE_SYSCALL_RACE_MSEC");
+	if (env)
+		return atoi(env);
+
+	/* default duration is 500ms */
+	return 500;
 }
 
-static void test_uretprobe_shadow_stack(void)
+static void test_uprobe_race(void)
 {
-	test__skip();
+	int err, i, nr_threads;
+	pthread_t *threads;
+
+	nr_threads = libbpf_num_possible_cpus();
+	if (!ASSERT_GT(nr_threads, 0, "libbpf_num_possible_cpus"))
+		return;
+	nr_threads = max(2, nr_threads);
+
+	threads = alloca(sizeof(*threads) * nr_threads);
+	if (!ASSERT_OK_PTR(threads, "malloc"))
+		return;
+
+	for (i = 0; i < nr_threads; i++) {
+		err = pthread_create(&threads[i], NULL, i % 2 ? worker_trigger : worker_attach,
+				     NULL);
+		if (!ASSERT_OK(err, "pthread_create"))
+			goto cleanup;
+	}
+
+	usleep(race_msec() * 1000);
+
+cleanup:
+	race_stop = true;
+	for (nr_threads = i, i = 0; i < nr_threads; i++)
+		pthread_join(threads[i], NULL);
+
+	ASSERT_FALSE(USDT_SEMA_IS_ACTIVE(race), "race_semaphore");
 }
+
+#ifndef __NR_uprobe
+#define __NR_uprobe 336
 #endif
 
-void test_uprobe_syscall(void)
+static void test_uprobe_error(void)
+{
+	long err = syscall(__NR_uprobe);
+
+	ASSERT_EQ(err, -1, "error");
+	ASSERT_EQ(errno, ENXIO, "errno");
+}
+
+static void __test_uprobe_syscall(void)
 {
 	if (test__start_subtest("uretprobe_regs_equal"))
-		test_uretprobe_regs_equal();
-	if (test__start_subtest("uretprobe_regs_change"))
-		test_uretprobe_regs_change();
+		test_uprobe_regs_equal(true);
 	if (test__start_subtest("uretprobe_syscall_call"))
 		test_uretprobe_syscall_call();
 	if (test__start_subtest("uretprobe_shadow_stack"))
 		test_uretprobe_shadow_stack();
+	if (test__start_subtest("uprobe_legacy"))
+		test_uprobe_legacy();
+	if (test__start_subtest("uprobe_multi"))
+		test_uprobe_multi();
+	if (test__start_subtest("uprobe_session"))
+		test_uprobe_session();
+	if (test__start_subtest("uprobe_usdt"))
+		test_uprobe_usdt();
+	if (test__start_subtest("uprobe_race"))
+		test_uprobe_race();
+	if (test__start_subtest("uprobe_error"))
+		test_uprobe_error();
+	if (test__start_subtest("uprobe_regs_equal"))
+		test_uprobe_regs_equal(false);
+	if (test__start_subtest("regs_change"))
+		test_regs_change();
+}
+#else
+static void __test_uprobe_syscall(void)
+{
+	test__skip();
+}
+#endif
+
+void test_uprobe_syscall(void)
+{
+	__test_uprobe_syscall();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
index 56ed1eb9b527..f4be5269fa90 100644
--- a/tools/testing/selftests/bpf/prog_tests/usdt.c
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -40,12 +40,79 @@ static void __always_inline trigger_func(int x) {
 	}
 }
 
-static void subtest_basic_usdt(void)
+#if defined(__x86_64__) || defined(__i386__)
+/*
+ * SIB (Scale-Index-Base) addressing format: "size@(base_reg, index_reg, scale)"
+ * - 'size' is the size in bytes of the array element, and its sign indicates
+ *   whether the type is signed (negative) or unsigned (positive).
+ * - 'base_reg' is the register holding the base address, normally rdx or edx
+ * - 'index_reg' is the register holding the index, normally rax or eax
+ * - 'scale' is the scaling factor (typically 1, 2, 4, or 8), which matches the
+ *    size of the element type.
+ *
+ * For example, for an array of 'short' (signed 2-byte elements), the SIB spec would be:
+ * - size: -2 (negative because 'short' is signed)
+ * - scale: 2 (since sizeof(short) == 2)
+ *
+ * The resulting SIB format: "-2@(%%rdx,%%rax,2)" for x86_64, "-2@(%%edx,%%eax,2)" for i386
+ */
+static volatile short array[] = {-1, -2, -3, -4};
+
+#if defined(__x86_64__)
+#define USDT_SIB_ARG_SPEC -2@(%%rdx,%%rax,2)
+#else
+#define USDT_SIB_ARG_SPEC -2@(%%edx,%%eax,2)
+#endif
+
+unsigned short test_usdt_sib_semaphore SEC(".probes");
+
+static void trigger_sib_spec(void)
+{
+	/*
+	 * Force SIB addressing with inline assembly.
+	 *
+	 * You must compile with -std=gnu99 or -std=c99 to use the
+	 * STAP_PROBE_ASM macro.
+	 *
+	 * The STAP_PROBE_ASM macro generates a quoted string that gets
+	 * inserted between the surrounding assembly instructions. In this
+	 * case, USDT_SIB_ARG_SPEC is embedded directly into the instruction
+	 * stream, creating a probe point between the asm statement boundaries.
+	 * It works fine with gcc/clang.
+	 *
+	 * Register constraints:
+	 * - "d"(array): Binds the 'array' variable to %rdx or %edx register
+	 * - "a"(0): Binds the constant 0 to %rax or %eax register
+	 * These ensure that when USDT_SIB_ARG_SPEC references %%rdx(%edx) and
+	 * %%rax(%eax), they contain the expected values for SIB addressing.
+	 *
+	 * The "memory" clobber prevents the compiler from reordering memory
+	 * accesses around the probe point, ensuring that the probe behavior
+	 * is predictable and consistent.
+	 */
+	asm volatile(
+		STAP_PROBE_ASM(test, usdt_sib, USDT_SIB_ARG_SPEC)
+		:
+		: "d"(array), "a"(0)
+		: "memory"
+	);
+}
+#endif
+
+static void subtest_basic_usdt(bool optimized)
 {
 	LIBBPF_OPTS(bpf_usdt_opts, opts);
 	struct test_usdt *skel;
 	struct test_usdt__bss *bss;
-	int err;
+	int err, i, called;
+	const __u64 expected_cookie = 0xcafedeadbeeffeed;
+
+#define TRIGGER(x) ({			\
+	trigger_func(x);		\
+	if (optimized)			\
+		trigger_func(x);	\
+	optimized ? 2 : 1;		\
+	})
 
 	skel = test_usdt__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel_open"))
@@ -59,22 +126,32 @@ static void subtest_basic_usdt(void)
 		goto cleanup;
 
 	/* usdt0 won't be auto-attached */
-	opts.usdt_cookie = 0xcafedeadbeeffeed;
+	opts.usdt_cookie = expected_cookie;
 	skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0,
 						     0 /*self*/, "/proc/self/exe",
 						     "test", "usdt0", &opts);
 	if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link"))
 		goto cleanup;
 
-	trigger_func(1);
+#if defined(__x86_64__) || defined(__i386__)
+	opts.usdt_cookie = expected_cookie;
+	skel->links.usdt_sib = bpf_program__attach_usdt(skel->progs.usdt_sib,
+							 0 /*self*/, "/proc/self/exe",
+							 "test", "usdt_sib", &opts);
+	if (!ASSERT_OK_PTR(skel->links.usdt_sib, "usdt_sib_link"))
+		goto cleanup;
+#endif
+
+	called = TRIGGER(1);
 
-	ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called");
-	ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called");
-	ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called");
+	ASSERT_EQ(bss->usdt0_called, called, "usdt0_called");
+	ASSERT_EQ(bss->usdt3_called, called, "usdt3_called");
+	ASSERT_EQ(bss->usdt12_called, called, "usdt12_called");
 
-	ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie");
+	ASSERT_EQ(bss->usdt0_cookie, expected_cookie, "usdt0_cookie");
 	ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
 	ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
+	ASSERT_EQ(bss->usdt0_arg_size, -ENOENT, "usdt0_arg_size");
 
 	/* auto-attached usdt3 gets default zero cookie value */
 	ASSERT_EQ(bss->usdt3_cookie, 0, "usdt3_cookie");
@@ -86,6 +163,9 @@ static void subtest_basic_usdt(void)
 	ASSERT_EQ(bss->usdt3_args[0], 1, "usdt3_arg1");
 	ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
 	ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+	ASSERT_EQ(bss->usdt3_arg_sizes[0], 4, "usdt3_arg1_size");
+	ASSERT_EQ(bss->usdt3_arg_sizes[1], 8, "usdt3_arg2_size");
+	ASSERT_EQ(bss->usdt3_arg_sizes[2], 8, "usdt3_arg3_size");
 
 	/* auto-attached usdt12 gets default zero cookie value */
 	ASSERT_EQ(bss->usdt12_cookie, 0, "usdt12_cookie");
@@ -104,17 +184,22 @@ static void subtest_basic_usdt(void)
 	ASSERT_EQ(bss->usdt12_args[10], nums[idx], "usdt12_arg11");
 	ASSERT_EQ(bss->usdt12_args[11], t1.y, "usdt12_arg12");
 
+	int usdt12_expected_arg_sizes[12] = { 4, 4, 8, 8, 4, 8, 8, 8, 4, 2, 2, 1 };
+
+	for (i = 0; i < 12; i++)
+		ASSERT_EQ(bss->usdt12_arg_sizes[i], usdt12_expected_arg_sizes[i], "usdt12_arg_size");
+
 	/* trigger_func() is marked __always_inline, so USDT invocations will be
 	 * inlined in two different places, meaning that each USDT will have
 	 * at least 2 different places to be attached to. This verifies that
 	 * bpf_program__attach_usdt() handles this properly and attaches to
 	 * all possible places of USDT invocation.
 	 */
-	trigger_func(2);
+	called += TRIGGER(2);
 
-	ASSERT_EQ(bss->usdt0_called, 2, "usdt0_called");
-	ASSERT_EQ(bss->usdt3_called, 2, "usdt3_called");
-	ASSERT_EQ(bss->usdt12_called, 2, "usdt12_called");
+	ASSERT_EQ(bss->usdt0_called, called, "usdt0_called");
+	ASSERT_EQ(bss->usdt3_called, called, "usdt3_called");
+	ASSERT_EQ(bss->usdt12_called, called, "usdt12_called");
 
 	/* only check values that depend on trigger_func()'s input value */
 	ASSERT_EQ(bss->usdt3_args[0], 2, "usdt3_arg1");
@@ -133,9 +218,9 @@ static void subtest_basic_usdt(void)
 	if (!ASSERT_OK_PTR(skel->links.usdt3, "usdt3_reattach"))
 		goto cleanup;
 
-	trigger_func(3);
+	called += TRIGGER(3);
 
-	ASSERT_EQ(bss->usdt3_called, 3, "usdt3_called");
+	ASSERT_EQ(bss->usdt3_called, called, "usdt3_called");
 	/* this time usdt3 has custom cookie */
 	ASSERT_EQ(bss->usdt3_cookie, 0xBADC00C51E, "usdt3_cookie");
 	ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");
@@ -147,8 +232,19 @@ static void subtest_basic_usdt(void)
 	ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
 	ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
 
+#if defined(__x86_64__) || defined(__i386__)
+	trigger_sib_spec();
+	ASSERT_EQ(bss->usdt_sib_called, 1, "usdt_sib_called");
+	ASSERT_EQ(bss->usdt_sib_cookie, expected_cookie, "usdt_sib_cookie");
+	ASSERT_EQ(bss->usdt_sib_arg_cnt, 1, "usdt_sib_arg_cnt");
+	ASSERT_EQ(bss->usdt_sib_arg, nums[0], "usdt_sib_arg");
+	ASSERT_EQ(bss->usdt_sib_arg_ret, 0, "usdt_sib_arg_ret");
+	ASSERT_EQ(bss->usdt_sib_arg_size, sizeof(nums[0]), "usdt_sib_arg_size");
+#endif
+
 cleanup:
 	test_usdt__destroy(skel);
+#undef TRIGGER
 }
 
 unsigned short test_usdt_100_semaphore SEC(".probes");
@@ -261,8 +357,16 @@ static void subtest_multispec_usdt(void)
 	 */
 	trigger_300_usdts();
 
-	/* we'll reuse usdt_100 BPF program for usdt_300 test */
 	bpf_link__destroy(skel->links.usdt_100);
+
+	bss->usdt_100_called = 0;
+	bss->usdt_100_sum = 0;
+
+	/* If built with arm64/clang, there will be much less number of specs
+	 * for usdt_300 call sites.
+	 */
+#if !defined(__aarch64__) || !defined(__clang__)
+	/* we'll reuse usdt_100 BPF program for usdt_300 test */
 	skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe",
 							"test", "usdt_300", NULL);
 	err = -errno;
@@ -273,13 +377,11 @@ static void subtest_multispec_usdt(void)
 	/* let's check that there are no "dangling" BPF programs attached due
 	 * to partial success of the above test:usdt_300 attachment
 	 */
-	bss->usdt_100_called = 0;
-	bss->usdt_100_sum = 0;
-
 	f300(777); /* this is 301st instance of usdt_300 */
 
 	ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called");
 	ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum");
+#endif
 
 	/* This time we have USDT with 400 inlined invocations, but arg specs
 	 * should be the same across all sites, so libbpf will only need to
@@ -410,7 +512,11 @@ cleanup:
 void test_usdt(void)
 {
 	if (test__start_subtest("basic"))
-		subtest_basic_usdt();
+		subtest_basic_usdt(false);
+#ifdef __x86_64__
+	if (test__start_subtest("basic_optimized"))
+		subtest_basic_usdt(true);
+#endif
 	if (test__start_subtest("multispec"))
 		subtest_multispec_usdt();
 	if (test__start_subtest("urand_auto_attach"))
diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
index d424e7ecbd12..9fd3ae987321 100644
--- a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
@@ -21,8 +21,7 @@
 #include "../progs/test_user_ringbuf.h"
 
 static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ;
-static const long c_ringbuf_size = 1 << 12; /* 1 small page */
-static const long c_max_entries = c_ringbuf_size / c_sample_size;
+static long c_ringbuf_size, c_max_entries;
 
 static void drain_current_samples(void)
 {
@@ -424,7 +423,9 @@ static void test_user_ringbuf_loop(void)
 	uint32_t remaining_samples = total_samples;
 	int err;
 
-	BUILD_BUG_ON(total_samples <= c_max_entries);
+	if (!ASSERT_LT(c_max_entries, total_samples, "compare_c_max_entries"))
+		return;
+
 	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
 	if (err)
 		return;
@@ -686,6 +687,9 @@ void test_user_ringbuf(void)
 {
 	int i;
 
+	c_ringbuf_size = getpagesize(); /* 1 page */
+	c_max_entries = c_ringbuf_size / c_sample_size;
+
 	for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
 		if (!test__start_subtest(success_tests[i].test_name))
 			continue;
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index d9f65adb456b..4b4b081b46cc 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -7,6 +7,7 @@
 #include "verifier_arena.skel.h"
 #include "verifier_arena_large.skel.h"
 #include "verifier_array_access.skel.h"
+#include "verifier_async_cb_context.skel.h"
 #include "verifier_basic_stack.skel.h"
 #include "verifier_bitfield_write.skel.h"
 #include "verifier_bounds.skel.h"
@@ -14,6 +15,7 @@
 #include "verifier_bounds_deduction_non_const.skel.h"
 #include "verifier_bounds_mix_sign_unsign.skel.h"
 #include "verifier_bpf_get_stack.skel.h"
+#include "verifier_bpf_trap.skel.h"
 #include "verifier_bswap.skel.h"
 #include "verifier_btf_ctx_access.skel.h"
 #include "verifier_btf_unreliable_prog.skel.h"
@@ -33,6 +35,7 @@
 #include "verifier_global_subprogs.skel.h"
 #include "verifier_global_ptr_args.skel.h"
 #include "verifier_gotol.skel.h"
+#include "verifier_gotox.skel.h"
 #include "verifier_helper_access_var_len.skel.h"
 #include "verifier_helper_packet_access.skel.h"
 #include "verifier_helper_restricted.skel.h"
@@ -45,6 +48,8 @@
 #include "verifier_ldsx.skel.h"
 #include "verifier_leak_ptr.skel.h"
 #include "verifier_linked_scalars.skel.h"
+#include "verifier_live_stack.skel.h"
+#include "verifier_load_acquire.skel.h"
 #include "verifier_loops1.skel.h"
 #include "verifier_lwt.skel.h"
 #include "verifier_map_in_map.skel.h"
@@ -52,9 +57,12 @@
 #include "verifier_map_ptr_mixing.skel.h"
 #include "verifier_map_ret_val.skel.h"
 #include "verifier_masking.skel.h"
+#include "verifier_may_goto_1.skel.h"
+#include "verifier_may_goto_2.skel.h"
 #include "verifier_meta_access.skel.h"
 #include "verifier_movsx.skel.h"
 #include "verifier_mtu.skel.h"
+#include "verifier_mul.skel.h"
 #include "verifier_netfilter_ctx.skel.h"
 #include "verifier_netfilter_retcode.skel.h"
 #include "verifier_bpf_fastcall.skel.h"
@@ -78,8 +86,10 @@
 #include "verifier_spill_fill.skel.h"
 #include "verifier_spin_lock.skel.h"
 #include "verifier_stack_ptr.skel.h"
+#include "verifier_store_release.skel.h"
 #include "verifier_subprog_precision.skel.h"
 #include "verifier_subreg.skel.h"
+#include "verifier_tailcall.skel.h"
 #include "verifier_tailcall_jit.skel.h"
 #include "verifier_typedef.skel.h"
 #include "verifier_uninit.skel.h"
@@ -98,6 +108,7 @@
 #include "verifier_xdp_direct_packet_access.skel.h"
 #include "verifier_bits_iter.skel.h"
 #include "verifier_lsm.skel.h"
+#include "irq.skel.h"
 
 #define MAX_ENTRIES 11
 
@@ -118,7 +129,7 @@ static void run_tests_aux(const char *skel_name,
 	/* test_verifier tests are executed w/o CAP_SYS_ADMIN, do the same here */
 	err = cap_disable_effective(1ULL << CAP_SYS_ADMIN, &old_caps);
 	if (err) {
-		PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(err));
+		PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err));
 		return;
 	}
 
@@ -128,7 +139,7 @@ static void run_tests_aux(const char *skel_name,
 
 	err = cap_enable_effective(old_caps, NULL);
 	if (err)
-		PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(err));
+		PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(-err));
 }
 
 #define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL)
@@ -143,6 +154,7 @@ void test_verifier_bounds_deduction(void)     { RUN(verifier_bounds_deduction);
 void test_verifier_bounds_deduction_non_const(void)     { RUN(verifier_bounds_deduction_non_const); }
 void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); }
 void test_verifier_bpf_get_stack(void)        { RUN(verifier_bpf_get_stack); }
+void test_verifier_bpf_trap(void)             { RUN(verifier_bpf_trap); }
 void test_verifier_bswap(void)                { RUN(verifier_bswap); }
 void test_verifier_btf_ctx_access(void)       { RUN(verifier_btf_ctx_access); }
 void test_verifier_btf_unreliable_prog(void)  { RUN(verifier_btf_unreliable_prog); }
@@ -162,6 +174,7 @@ void test_verifier_div_overflow(void)         { RUN(verifier_div_overflow); }
 void test_verifier_global_subprogs(void)      { RUN(verifier_global_subprogs); }
 void test_verifier_global_ptr_args(void)      { RUN(verifier_global_ptr_args); }
 void test_verifier_gotol(void)                { RUN(verifier_gotol); }
+void test_verifier_gotox(void)                { RUN(verifier_gotox); }
 void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); }
 void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); }
 void test_verifier_helper_restricted(void)    { RUN(verifier_helper_restricted); }
@@ -170,10 +183,12 @@ void test_verifier_int_ptr(void)              { RUN(verifier_int_ptr); }
 void test_verifier_iterating_callbacks(void)  { RUN(verifier_iterating_callbacks); }
 void test_verifier_jeq_infer_not_null(void)   { RUN(verifier_jeq_infer_not_null); }
 void test_verifier_jit_convergence(void)      { RUN(verifier_jit_convergence); }
+void test_verifier_load_acquire(void)         { RUN(verifier_load_acquire); }
 void test_verifier_ld_ind(void)               { RUN(verifier_ld_ind); }
 void test_verifier_ldsx(void)                  { RUN(verifier_ldsx); }
 void test_verifier_leak_ptr(void)             { RUN(verifier_leak_ptr); }
 void test_verifier_linked_scalars(void)       { RUN(verifier_linked_scalars); }
+void test_verifier_live_stack(void)           { RUN(verifier_live_stack); }
 void test_verifier_loops1(void)               { RUN(verifier_loops1); }
 void test_verifier_lwt(void)                  { RUN(verifier_lwt); }
 void test_verifier_map_in_map(void)           { RUN(verifier_map_in_map); }
@@ -181,8 +196,11 @@ void test_verifier_map_ptr(void)              { RUN(verifier_map_ptr); }
 void test_verifier_map_ptr_mixing(void)       { RUN(verifier_map_ptr_mixing); }
 void test_verifier_map_ret_val(void)          { RUN(verifier_map_ret_val); }
 void test_verifier_masking(void)              { RUN(verifier_masking); }
+void test_verifier_may_goto_1(void)           { RUN(verifier_may_goto_1); }
+void test_verifier_may_goto_2(void)           { RUN(verifier_may_goto_2); }
 void test_verifier_meta_access(void)          { RUN(verifier_meta_access); }
 void test_verifier_movsx(void)                 { RUN(verifier_movsx); }
+void test_verifier_mul(void)                  { RUN(verifier_mul); }
 void test_verifier_netfilter_ctx(void)        { RUN(verifier_netfilter_ctx); }
 void test_verifier_netfilter_retcode(void)    { RUN(verifier_netfilter_retcode); }
 void test_verifier_bpf_fastcall(void)         { RUN(verifier_bpf_fastcall); }
@@ -206,8 +224,10 @@ void test_verifier_sockmap_mutate(void)       { RUN(verifier_sockmap_mutate); }
 void test_verifier_spill_fill(void)           { RUN(verifier_spill_fill); }
 void test_verifier_spin_lock(void)            { RUN(verifier_spin_lock); }
 void test_verifier_stack_ptr(void)            { RUN(verifier_stack_ptr); }
+void test_verifier_store_release(void)        { RUN(verifier_store_release); }
 void test_verifier_subprog_precision(void)    { RUN(verifier_subprog_precision); }
 void test_verifier_subreg(void)               { RUN(verifier_subreg); }
+void test_verifier_tailcall(void)             { RUN(verifier_tailcall); }
 void test_verifier_tailcall_jit(void)         { RUN(verifier_tailcall_jit); }
 void test_verifier_typedef(void)              { RUN(verifier_typedef); }
 void test_verifier_uninit(void)               { RUN(verifier_uninit); }
@@ -225,24 +245,8 @@ void test_verifier_xdp(void)                  { RUN(verifier_xdp); }
 void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); }
 void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); }
 void test_verifier_lsm(void)                  { RUN(verifier_lsm); }
-
-void test_verifier_mtu(void)
-{
-	__u64 caps = 0;
-	int ret;
-
-	/* In case CAP_BPF and CAP_PERFMON is not set */
-	ret = cap_enable_effective(1ULL << CAP_BPF | 1ULL << CAP_NET_ADMIN, &caps);
-	if (!ASSERT_OK(ret, "set_cap_bpf_cap_net_admin"))
-		return;
-	ret = cap_disable_effective(1ULL << CAP_SYS_ADMIN | 1ULL << CAP_PERFMON, NULL);
-	if (!ASSERT_OK(ret, "disable_cap_sys_admin"))
-		goto restore_cap;
-	RUN(verifier_mtu);
-restore_cap:
-	if (caps)
-		cap_enable_effective(caps, NULL);
-}
+void test_irq(void)			      { RUN(irq); }
+void test_verifier_mtu(void)		      { RUN(verifier_mtu); }
 
 static int init_test_val_map(struct bpf_object *obj, char *map_name)
 {
@@ -279,6 +283,7 @@ void test_verifier_array_access(void)
 		      verifier_array_access__elf_bytes,
 		      init_array_access_maps);
 }
+void test_verifier_async_cb_context(void)    { RUN(verifier_async_cb_context); }
 
 static int init_value_ptr_arith_maps(struct bpf_object *obj)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
index ab0f02faa80c..4d69d9d55e17 100644
--- a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
@@ -268,7 +268,7 @@ static void test_verify_pkcs7_sig_from_map(void)
 	char *tmp_dir;
 	struct test_verify_pkcs7_sig *skel = NULL;
 	struct bpf_map *map;
-	struct data data;
+	struct data data = {};
 	int ret, zero = 0;
 
 	/* Trigger creation of session keyring. */
diff --git a/tools/testing/selftests/bpf/prog_tests/wq.c b/tools/testing/selftests/bpf/prog_tests/wq.c
index 99e438fe12ac..15c67d23128b 100644
--- a/tools/testing/selftests/bpf/prog_tests/wq.c
+++ b/tools/testing/selftests/bpf/prog_tests/wq.c
@@ -38,3 +38,59 @@ void serial_test_failures_wq(void)
 {
 	RUN_TESTS(wq_failures);
 }
+
+static void test_failure_map_no_btf(void)
+{
+	struct wq *skel = NULL;
+	char log[8192];
+	const struct bpf_insn *insns;
+	size_t insn_cnt;
+	int ret, err, map_fd;
+	LIBBPF_OPTS(bpf_prog_load_opts, opts, .log_size = sizeof(log), .log_buf = log,
+		    .log_level = 2);
+
+	skel = wq__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	err = bpf_object__prepare(skel->obj);
+	if (!ASSERT_OK(err, "skel__prepare"))
+		goto out;
+
+	map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "map_no_btf", sizeof(__u32), sizeof(__u64), 100,
+				NULL);
+	if (!ASSERT_GT(map_fd, -1, "map create"))
+		goto out;
+
+	err = bpf_map__reuse_fd(skel->maps.array, map_fd);
+	if (!ASSERT_OK(err, "map reuse fd")) {
+		close(map_fd);
+		goto out;
+	}
+
+	insns = bpf_program__insns(skel->progs.test_map_no_btf);
+	if (!ASSERT_OK_PTR(insns, "insns ptr"))
+		goto out;
+
+	insn_cnt = bpf_program__insn_cnt(skel->progs.test_map_no_btf);
+	if (!ASSERT_GT(insn_cnt, 0u, "insn cnt"))
+		goto out;
+
+	ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts);
+	if (!ASSERT_LT(ret, 0, "prog load failed")) {
+		if (ret > 0)
+			close(ret);
+		goto out;
+	}
+
+	ASSERT_HAS_SUBSTR(log, "map 'map_no_btf' has to have BTF in order to use bpf_wq",
+			  "log complains no map BTF");
+out:
+	wq__destroy(skel);
+}
+
+void test_wq_custom(void)
+{
+	if (test__start_subtest("test_failure_map_no_btf"))
+		test_failure_map_no_btf();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 53d6ad8c2257..43264347e7d7 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -37,21 +37,26 @@ static void test_xdp_adjust_tail_shrink(void)
 	bpf_object__close(obj);
 }
 
-static void test_xdp_adjust_tail_grow(void)
+static void test_xdp_adjust_tail_grow(bool is_64k_pagesize)
 {
 	const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
 	struct bpf_object *obj;
-	char buf[4096]; /* avoid segfault: large buf to hold grow results */
+	char buf[8192]; /* avoid segfault: large buf to hold grow results */
 	__u32 expect_sz;
 	int err, prog_fd;
 	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.data_in = &pkt_v4,
-		.data_size_in = sizeof(pkt_v4),
 		.data_out = buf,
 		.data_size_out = sizeof(buf),
 		.repeat = 1,
 	);
 
+	/* topts.data_size_in as a special signal to bpf prog */
+	if (is_64k_pagesize)
+		topts.data_size_in = sizeof(pkt_v4) - 1;
+	else
+		topts.data_size_in = sizeof(pkt_v4);
+
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
 	if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
 		return;
@@ -82,6 +87,8 @@ static void test_xdp_adjust_tail_grow2(void)
 	/* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */
 #if defined(__s390x__)
 	int tailroom = 512;
+#elif defined(__powerpc__)
+	int tailroom = 384;
 #else
 	int tailroom = 320;
 #endif
@@ -206,7 +213,7 @@ out:
 	bpf_object__close(obj);
 }
 
-static void test_xdp_adjust_frags_tail_grow(void)
+static void test_xdp_adjust_frags_tail_grow_4k(void)
 {
 	const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
 	__u32 exp_size;
@@ -244,14 +251,20 @@ static void test_xdp_adjust_frags_tail_grow(void)
 	ASSERT_EQ(topts.retval, XDP_TX, "9Kb+10b retval");
 	ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size");
 
-	for (i = 0; i < 9000; i++)
-		ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+	for (i = 0; i < 9000; i++) {
+		if (buf[i] != 1)
+			ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+	}
 
-	for (i = 9000; i < 9010; i++)
-		ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+	for (i = 9000; i < 9010; i++) {
+		if (buf[i] != 0)
+			ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+	}
 
-	for (i = 9010; i < 16384; i++)
-		ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+	for (i = 9010; i < 16384; i++) {
+		if (buf[i] != 1)
+			ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+	}
 
 	/* Test a too large grow */
 	memset(buf, 1, 16384);
@@ -271,16 +284,93 @@ out:
 	bpf_object__close(obj);
 }
 
+static void test_xdp_adjust_frags_tail_grow_64k(void)
+{
+	const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
+	__u32 exp_size;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	int err, i, prog_fd;
+	__u8 *buf;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+	obj = bpf_object__open(file);
+	if (libbpf_get_error(obj))
+		return;
+
+	prog = bpf_object__next_program(obj, NULL);
+	if (bpf_object__load(obj))
+		goto out;
+
+	prog_fd = bpf_program__fd(prog);
+
+	buf = malloc(262144);
+	if (!ASSERT_OK_PTR(buf, "alloc buf 256Kb"))
+		goto out;
+
+	/* Test case add 10 bytes to last frag */
+	memset(buf, 1, 262144);
+	exp_size = 90000 + 10;
+
+	topts.data_in = buf;
+	topts.data_out = buf;
+	topts.data_size_in = 90000;
+	topts.data_size_out = 262144;
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+	ASSERT_OK(err, "90Kb+10b");
+	ASSERT_EQ(topts.retval, XDP_TX, "90Kb+10b retval");
+	ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size");
+
+	for (i = 0; i < 90000; i++) {
+		if (buf[i] != 1)
+			ASSERT_EQ(buf[i], 1, "90Kb+10b-old");
+	}
+
+	for (i = 90000; i < 90010; i++) {
+		if (buf[i] != 0)
+			ASSERT_EQ(buf[i], 0, "90Kb+10b-new");
+	}
+
+	for (i = 90010; i < 262144; i++) {
+		if (buf[i] != 1)
+			ASSERT_EQ(buf[i], 1, "90Kb+10b-untouched");
+	}
+
+	/* Test a too large grow */
+	memset(buf, 1, 262144);
+	exp_size = 90001;
+
+	topts.data_in = topts.data_out = buf;
+	topts.data_size_in = 90001;
+	topts.data_size_out = 262144;
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+	ASSERT_OK(err, "90Kb+10b");
+	ASSERT_EQ(topts.retval, XDP_DROP, "90Kb+10b retval");
+	ASSERT_EQ(topts.data_size_out, exp_size, "90Kb+10b size");
+
+	free(buf);
+out:
+	bpf_object__close(obj);
+}
+
 void test_xdp_adjust_tail(void)
 {
+	int page_size = getpagesize();
+
 	if (test__start_subtest("xdp_adjust_tail_shrink"))
 		test_xdp_adjust_tail_shrink();
 	if (test__start_subtest("xdp_adjust_tail_grow"))
-		test_xdp_adjust_tail_grow();
+		test_xdp_adjust_tail_grow(page_size == 65536);
 	if (test__start_subtest("xdp_adjust_tail_grow2"))
 		test_xdp_adjust_tail_grow2();
 	if (test__start_subtest("xdp_adjust_frags_tail_shrink"))
 		test_xdp_adjust_frags_tail_shrink();
-	if (test__start_subtest("xdp_adjust_frags_tail_grow"))
-		test_xdp_adjust_frags_tail_grow();
+	if (test__start_subtest("xdp_adjust_frags_tail_grow")) {
+		if (page_size == 65536)
+			test_xdp_adjust_frags_tail_grow_64k();
+		else
+			test_xdp_adjust_frags_tail_grow_4k();
+	}
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
index 6d8b54124cb3..fb952703653e 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -17,7 +17,7 @@
 #include "network_helpers.h"
 #include <linux/if_bonding.h>
 #include <linux/limits.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 #include <uapi/linux/netdev.h>
 
 #include "xdp_dummy.skel.h"
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index e6a783c7f5db..ee94c281888a 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -2,6 +2,23 @@
 #include <test_progs.h>
 #include <network_helpers.h>
 #include "test_xdp_context_test_run.skel.h"
+#include "test_xdp_meta.skel.h"
+
+#define RX_NAME "veth0"
+#define TX_NAME "veth1"
+#define TX_NETNS "xdp_context_tx"
+#define RX_NETNS "xdp_context_rx"
+#define TAP_NAME "tap0"
+#define DUMMY_NAME "dum0"
+#define TAP_NETNS "xdp_context_tuntap"
+
+#define TEST_PAYLOAD_LEN 32
+static const __u8 test_payload[TEST_PAYLOAD_LEN] = {
+	0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+	0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+	0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+};
 
 void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
 			    __u32 data_meta, __u32 data, __u32 data_end,
@@ -80,9 +97,7 @@ void test_xdp_context_test_run(void)
 	/* Meta data must be 255 bytes or smaller */
 	test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0);
 
-	/* Total size of data must match data_end - data_meta */
-	test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
-			       sizeof(data) - 1, 0, 0, 0);
+	/* Total size of data must be data_end - data_meta or larger */
 	test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
 			       sizeof(data) + 1, 0, 0, 0);
 
@@ -103,3 +118,395 @@ void test_xdp_context_test_run(void)
 
 	test_xdp_context_test_run__destroy(skel);
 }
+
+static int send_test_packet(int ifindex)
+{
+	int n, sock = -1;
+	__u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
+
+	/* We use the Ethernet header only to identify the test packet */
+	struct ethhdr eth = {
+		.h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF },
+	};
+
+	memcpy(packet, &eth, sizeof(eth));
+	memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN);
+
+	sock = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
+	if (!ASSERT_GE(sock, 0, "socket"))
+		goto err;
+
+	struct sockaddr_ll saddr = {
+		.sll_family = PF_PACKET,
+		.sll_ifindex = ifindex,
+		.sll_halen = ETH_ALEN
+	};
+	n = sendto(sock, packet, sizeof(packet), 0, (struct sockaddr *)&saddr,
+		   sizeof(saddr));
+	if (!ASSERT_EQ(n, sizeof(packet), "sendto"))
+		goto err;
+
+	close(sock);
+	return 0;
+
+err:
+	if (sock >= 0)
+		close(sock);
+	return -1;
+}
+
+static int write_test_packet(int tap_fd)
+{
+	__u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN];
+	int n;
+
+	/* The Ethernet header is mostly not relevant. We use it to identify the
+	 * test packet and some BPF helpers we exercise expect to operate on
+	 * Ethernet frames carrying IP packets. Pretend that's the case.
+	 */
+	struct ethhdr eth = {
+		.h_source = { 0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF },
+		.h_proto = htons(ETH_P_IP),
+	};
+
+	memcpy(packet, &eth, sizeof(eth));
+	memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN);
+
+	n = write(tap_fd, packet, sizeof(packet));
+	if (!ASSERT_EQ(n, sizeof(packet), "write packet"))
+		return -1;
+
+	return 0;
+}
+
+static void dump_err_stream(const struct bpf_program *prog)
+{
+	char buf[512];
+	int ret;
+
+	ret = 0;
+	do {
+		ret = bpf_prog_stream_read(bpf_program__fd(prog),
+					   BPF_STREAM_STDERR, buf, sizeof(buf),
+					   NULL);
+		if (ret > 0)
+			fwrite(buf, sizeof(buf[0]), ret, stderr);
+	} while (ret > 0);
+}
+
+void test_xdp_context_veth(void)
+{
+	LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+	LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+	struct netns_obj *rx_ns = NULL, *tx_ns = NULL;
+	struct bpf_program *tc_prog, *xdp_prog;
+	struct test_xdp_meta *skel = NULL;
+	struct nstoken *nstoken = NULL;
+	int rx_ifindex, tx_ifindex;
+	int ret;
+
+	tx_ns = netns_new(TX_NETNS, false);
+	if (!ASSERT_OK_PTR(tx_ns, "create tx_ns"))
+		return;
+
+	rx_ns = netns_new(RX_NETNS, false);
+	if (!ASSERT_OK_PTR(rx_ns, "create rx_ns"))
+		goto close;
+
+	SYS(close, "ip link add " RX_NAME " netns " RX_NETNS
+	    " type veth peer name " TX_NAME " netns " TX_NETNS);
+
+	nstoken = open_netns(RX_NETNS);
+	if (!ASSERT_OK_PTR(nstoken, "setns rx_ns"))
+		goto close;
+
+	SYS(close, "ip link set dev " RX_NAME " up");
+
+	skel = test_xdp_meta__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+		goto close;
+
+	rx_ifindex = if_nametoindex(RX_NAME);
+	if (!ASSERT_GE(rx_ifindex, 0, "if_nametoindex rx"))
+		goto close;
+
+	tc_hook.ifindex = rx_ifindex;
+	ret = bpf_tc_hook_create(&tc_hook);
+	if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+		goto close;
+
+	tc_prog = bpf_object__find_program_by_name(skel->obj, "ing_cls");
+	if (!ASSERT_OK_PTR(tc_prog, "open ing_cls prog"))
+		goto close;
+
+	tc_opts.prog_fd = bpf_program__fd(tc_prog);
+	ret = bpf_tc_attach(&tc_hook, &tc_opts);
+	if (!ASSERT_OK(ret, "bpf_tc_attach"))
+		goto close;
+
+	xdp_prog = bpf_object__find_program_by_name(skel->obj, "ing_xdp");
+	if (!ASSERT_OK_PTR(xdp_prog, "open ing_xdp prog"))
+		goto close;
+
+	ret = bpf_xdp_attach(rx_ifindex,
+			     bpf_program__fd(xdp_prog),
+			     0, NULL);
+	if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+		goto close;
+
+	close_netns(nstoken);
+
+	nstoken = open_netns(TX_NETNS);
+	if (!ASSERT_OK_PTR(nstoken, "setns tx_ns"))
+		goto close;
+
+	SYS(close, "ip link set dev " TX_NAME " up");
+
+	tx_ifindex = if_nametoindex(TX_NAME);
+	if (!ASSERT_GE(tx_ifindex, 0, "if_nametoindex tx"))
+		goto close;
+
+	skel->bss->test_pass = false;
+
+	ret = send_test_packet(tx_ifindex);
+	if (!ASSERT_OK(ret, "send_test_packet"))
+		goto close;
+
+	if (!ASSERT_TRUE(skel->bss->test_pass, "test_pass"))
+		dump_err_stream(tc_prog);
+
+close:
+	close_netns(nstoken);
+	test_xdp_meta__destroy(skel);
+	netns_free(rx_ns);
+	netns_free(tx_ns);
+}
+
+static void test_tuntap(struct bpf_program *xdp_prog,
+			struct bpf_program *tc_prio_1_prog,
+			struct bpf_program *tc_prio_2_prog,
+			bool *test_pass)
+{
+	LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+	LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+	struct netns_obj *ns = NULL;
+	int tap_fd = -1;
+	int tap_ifindex;
+	int ret;
+
+	*test_pass = false;
+
+	ns = netns_new(TAP_NETNS, true);
+	if (!ASSERT_OK_PTR(ns, "create and open ns"))
+		return;
+
+	tap_fd = open_tuntap(TAP_NAME, true);
+	if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
+		goto close;
+
+	SYS(close, "ip link set dev " TAP_NAME " up");
+
+	tap_ifindex = if_nametoindex(TAP_NAME);
+	if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
+		goto close;
+
+	tc_hook.ifindex = tap_ifindex;
+	ret = bpf_tc_hook_create(&tc_hook);
+	if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+		goto close;
+
+	tc_opts.prog_fd = bpf_program__fd(tc_prio_1_prog);
+	ret = bpf_tc_attach(&tc_hook, &tc_opts);
+	if (!ASSERT_OK(ret, "bpf_tc_attach"))
+		goto close;
+
+	if (tc_prio_2_prog) {
+		LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 2,
+			    .prog_fd = bpf_program__fd(tc_prio_2_prog));
+
+		ret = bpf_tc_attach(&tc_hook, &tc_opts);
+		if (!ASSERT_OK(ret, "bpf_tc_attach"))
+			goto close;
+	}
+
+	ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog),
+			     0, NULL);
+	if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+		goto close;
+
+	ret = write_test_packet(tap_fd);
+	if (!ASSERT_OK(ret, "write_test_packet"))
+		goto close;
+
+	if (!ASSERT_TRUE(*test_pass, "test_pass"))
+		dump_err_stream(tc_prio_2_prog ? : tc_prio_1_prog);
+
+close:
+	if (tap_fd >= 0)
+		close(tap_fd);
+	netns_free(ns);
+}
+
+/* Write a packet to a tap dev and copy it to ingress of a dummy dev */
+static void test_tuntap_mirred(struct bpf_program *xdp_prog,
+			       struct bpf_program *tc_prog,
+			       bool *test_pass)
+{
+	LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+	LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+	struct netns_obj *ns = NULL;
+	int dummy_ifindex;
+	int tap_fd = -1;
+	int tap_ifindex;
+	int ret;
+
+	*test_pass = false;
+
+	ns = netns_new(TAP_NETNS, true);
+	if (!ASSERT_OK_PTR(ns, "netns_new"))
+		return;
+
+	/* Setup dummy interface */
+	SYS(close, "ip link add name " DUMMY_NAME " type dummy");
+	SYS(close, "ip link set dev " DUMMY_NAME " up");
+
+	dummy_ifindex = if_nametoindex(DUMMY_NAME);
+	if (!ASSERT_GE(dummy_ifindex, 0, "if_nametoindex"))
+		goto close;
+
+	tc_hook.ifindex = dummy_ifindex;
+	ret = bpf_tc_hook_create(&tc_hook);
+	if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+		goto close;
+
+	tc_opts.prog_fd = bpf_program__fd(tc_prog);
+	ret = bpf_tc_attach(&tc_hook, &tc_opts);
+	if (!ASSERT_OK(ret, "bpf_tc_attach"))
+		goto close;
+
+	/* Setup TAP interface */
+	tap_fd = open_tuntap(TAP_NAME, true);
+	if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
+		goto close;
+
+	SYS(close, "ip link set dev " TAP_NAME " up");
+
+	tap_ifindex = if_nametoindex(TAP_NAME);
+	if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex"))
+		goto close;
+
+	ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL);
+	if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+		goto close;
+
+	/* Copy all packets received from TAP to dummy ingress */
+	SYS(close, "tc qdisc add dev " TAP_NAME " clsact");
+	SYS(close, "tc filter add dev " TAP_NAME " ingress "
+		   "protocol all matchall "
+		   "action mirred ingress mirror dev " DUMMY_NAME);
+
+	/* Receive a packet on TAP */
+	ret = write_test_packet(tap_fd);
+	if (!ASSERT_OK(ret, "write_test_packet"))
+		goto close;
+
+	if (!ASSERT_TRUE(*test_pass, "test_pass"))
+		dump_err_stream(tc_prog);
+
+close:
+	if (tap_fd >= 0)
+		close(tap_fd);
+	netns_free(ns);
+}
+
+void test_xdp_context_tuntap(void)
+{
+	struct test_xdp_meta *skel = NULL;
+
+	skel = test_xdp_meta__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+		return;
+
+	if (test__start_subtest("data_meta"))
+		test_tuntap(skel->progs.ing_xdp,
+			    skel->progs.ing_cls,
+			    NULL, /* tc prio 2 */
+			    &skel->bss->test_pass);
+	if (test__start_subtest("dynptr_read"))
+		test_tuntap(skel->progs.ing_xdp,
+			    skel->progs.ing_cls_dynptr_read,
+			    NULL, /* tc prio 2 */
+			    &skel->bss->test_pass);
+	if (test__start_subtest("dynptr_slice"))
+		test_tuntap(skel->progs.ing_xdp,
+			    skel->progs.ing_cls_dynptr_slice,
+			    NULL, /* tc prio 2 */
+			    &skel->bss->test_pass);
+	if (test__start_subtest("dynptr_write"))
+		test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+			    skel->progs.ing_cls_dynptr_write,
+			    skel->progs.ing_cls_dynptr_read,
+			    &skel->bss->test_pass);
+	if (test__start_subtest("dynptr_slice_rdwr"))
+		test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+			    skel->progs.ing_cls_dynptr_slice_rdwr,
+			    skel->progs.ing_cls_dynptr_slice,
+			    &skel->bss->test_pass);
+	if (test__start_subtest("dynptr_offset"))
+		test_tuntap(skel->progs.ing_xdp_zalloc_meta,
+			    skel->progs.ing_cls_dynptr_offset_wr,
+			    skel->progs.ing_cls_dynptr_offset_rd,
+			    &skel->bss->test_pass);
+	if (test__start_subtest("dynptr_offset_oob"))
+		test_tuntap(skel->progs.ing_xdp,
+			    skel->progs.ing_cls_dynptr_offset_oob,
+			    skel->progs.ing_cls,
+			    &skel->bss->test_pass);
+	if (test__start_subtest("clone_data_meta_survives_data_write"))
+		test_tuntap_mirred(skel->progs.ing_xdp,
+				   skel->progs.clone_data_meta_survives_data_write,
+				   &skel->bss->test_pass);
+	if (test__start_subtest("clone_data_meta_survives_meta_write"))
+		test_tuntap_mirred(skel->progs.ing_xdp,
+				   skel->progs.clone_data_meta_survives_meta_write,
+				   &skel->bss->test_pass);
+	if (test__start_subtest("clone_meta_dynptr_survives_data_slice_write"))
+		test_tuntap_mirred(skel->progs.ing_xdp,
+				   skel->progs.clone_meta_dynptr_survives_data_slice_write,
+				   &skel->bss->test_pass);
+	if (test__start_subtest("clone_meta_dynptr_survives_meta_slice_write"))
+		test_tuntap_mirred(skel->progs.ing_xdp,
+				   skel->progs.clone_meta_dynptr_survives_meta_slice_write,
+				   &skel->bss->test_pass);
+	if (test__start_subtest("clone_meta_dynptr_rw_before_data_dynptr_write"))
+		test_tuntap_mirred(skel->progs.ing_xdp,
+				   skel->progs.clone_meta_dynptr_rw_before_data_dynptr_write,
+				   &skel->bss->test_pass);
+	if (test__start_subtest("clone_meta_dynptr_rw_before_meta_dynptr_write"))
+		test_tuntap_mirred(skel->progs.ing_xdp,
+				   skel->progs.clone_meta_dynptr_rw_before_meta_dynptr_write,
+				   &skel->bss->test_pass);
+	/* Tests for BPF helpers which touch headroom */
+	if (test__start_subtest("helper_skb_vlan_push_pop"))
+		test_tuntap(skel->progs.ing_xdp,
+			    skel->progs.helper_skb_vlan_push_pop,
+			    NULL, /* tc prio 2 */
+			    &skel->bss->test_pass);
+	if (test__start_subtest("helper_skb_adjust_room"))
+		test_tuntap(skel->progs.ing_xdp,
+			    skel->progs.helper_skb_adjust_room,
+			    NULL, /* tc prio 2 */
+			    &skel->bss->test_pass);
+	if (test__start_subtest("helper_skb_change_head_tail"))
+		test_tuntap(skel->progs.ing_xdp,
+			    skel->progs.helper_skb_change_head_tail,
+			    NULL, /* tc prio 2 */
+			    &skel->bss->test_pass);
+	if (test__start_subtest("helper_skb_change_proto"))
+		test_tuntap(skel->progs.ing_xdp,
+			    skel->progs.helper_skb_change_proto,
+			    NULL, /* tc prio 2 */
+			    &skel->bss->test_pass);
+
+	test_xdp_meta__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index c7f74f068e78..df27535995af 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -52,10 +52,10 @@ static void test_xdp_with_cpumap_helpers(void)
 	ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
 
 	/* send a packet to trigger any potential bugs in there */
-	char data[10] = {};
+	char data[ETH_HLEN] = {};
 	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
 			    .data_in = &data,
-			    .data_size_in = 10,
+			    .data_size_in = sizeof(data),
 			    .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
 			    .repeat = 1,
 		);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 27ffed17d4be..a8ab05216c38 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -7,6 +7,7 @@
 #include <test_progs.h>
 
 #include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_devmap_tailcall.skel.h"
 #include "test_xdp_with_devmap_frags_helpers.skel.h"
 #include "test_xdp_with_devmap_helpers.skel.h"
 
@@ -23,7 +24,7 @@ static void test_xdp_with_devmap_helpers(void)
 	__u32 len = sizeof(info);
 	int err, dm_fd, dm_fd_redir, map_fd;
 	struct nstoken *nstoken = NULL;
-	char data[10] = {};
+	char data[ETH_HLEN] = {};
 	__u32 idx = 0;
 
 	SYS(out_close, "ip netns add %s", TEST_NS);
@@ -58,7 +59,7 @@ static void test_xdp_with_devmap_helpers(void)
 	/* send a packet to trigger any potential bugs in there */
 	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
 			    .data_in = &data,
-			    .data_size_in = 10,
+			    .data_size_in = sizeof(data),
 			    .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
 			    .repeat = 1,
 		);
@@ -107,6 +108,29 @@ static void test_neg_xdp_devmap_helpers(void)
 	}
 }
 
+static void test_xdp_devmap_tailcall(enum bpf_attach_type prog_dev,
+				     enum bpf_attach_type prog_tail,
+				     bool expect_reject)
+{
+	struct test_xdp_devmap_tailcall *skel;
+	int err;
+
+	skel = test_xdp_devmap_tailcall__open();
+	if (!ASSERT_OK_PTR(skel, "test_xdp_devmap_tailcall__open"))
+		return;
+
+	bpf_program__set_expected_attach_type(skel->progs.xdp_devmap, prog_dev);
+	bpf_program__set_expected_attach_type(skel->progs.xdp_entry, prog_tail);
+
+	err = test_xdp_devmap_tailcall__load(skel);
+	if (expect_reject)
+		ASSERT_ERR(err, "test_xdp_devmap_tailcall__load");
+	else
+		ASSERT_OK(err, "test_xdp_devmap_tailcall__load");
+
+	test_xdp_devmap_tailcall__destroy(skel);
+}
+
 static void test_xdp_with_devmap_frags_helpers(void)
 {
 	struct test_xdp_with_devmap_frags_helpers *skel;
@@ -158,7 +182,7 @@ static void test_xdp_with_devmap_helpers_veth(void)
 	struct nstoken *nstoken = NULL;
 	__u32 len = sizeof(info);
 	int err, dm_fd, dm_fd_redir, map_fd, ifindex_dst;
-	char data[10] = {};
+	char data[ETH_HLEN] = {};
 	__u32 idx = 0;
 
 	SYS(out_close, "ip netns add %s", TEST_NS);
@@ -208,7 +232,7 @@ static void test_xdp_with_devmap_helpers_veth(void)
 	/* send a packet to trigger any potential bugs in there */
 	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
 			    .data_in = &data,
-			    .data_size_in = 10,
+			    .data_size_in = sizeof(data),
 			    .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
 			    .repeat = 1,
 		);
@@ -238,8 +262,13 @@ void serial_test_xdp_devmap_attach(void)
 	if (test__start_subtest("DEVMAP with frags programs in entries"))
 		test_xdp_with_devmap_frags_helpers();
 
-	if (test__start_subtest("Verifier check of DEVMAP programs"))
+	if (test__start_subtest("Verifier check of DEVMAP programs")) {
 		test_neg_xdp_devmap_helpers();
+		test_xdp_devmap_tailcall(BPF_XDP_DEVMAP, BPF_XDP_DEVMAP, false);
+		test_xdp_devmap_tailcall(0, 0, true);
+		test_xdp_devmap_tailcall(BPF_XDP_DEVMAP, 0, true);
+		test_xdp_devmap_tailcall(0, BPF_XDP_DEVMAP, true);
+	}
 
 	if (test__start_subtest("DEVMAP with programs in entries on veth"))
 		test_xdp_with_devmap_helpers_veth();
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index bad0ea167be7..dd34b0cc4b4e 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -7,10 +7,11 @@
 #include <linux/if_link.h>
 #include <linux/ipv6.h>
 #include <linux/in6.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 #include <bpf/bpf_endian.h>
 #include <uapi/linux/netdev.h>
 #include "test_xdp_do_redirect.skel.h"
+#include "xdp_dummy.skel.h"
 
 struct udp_packet {
 	struct ethhdr eth;
@@ -65,16 +66,25 @@ static int attach_tc_prog(struct bpf_tc_hook *hook, int fd)
 #else
 #define MAX_PKT_SIZE 3408
 #endif
+
+#define PAGE_SIZE_4K  4096
+#define PAGE_SIZE_64K 65536
+
 static void test_max_pkt_size(int fd)
 {
-	char data[MAX_PKT_SIZE + 1] = {};
+	char data[PAGE_SIZE_64K + 1] = {};
 	int err;
 	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
 			    .data_in = &data,
-			    .data_size_in = MAX_PKT_SIZE,
 			    .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
 			    .repeat = 1,
 		);
+
+	if (getpagesize() == PAGE_SIZE_64K)
+		opts.data_size_in = MAX_PKT_SIZE + PAGE_SIZE_64K - PAGE_SIZE_4K;
+	else
+		opts.data_size_in = MAX_PKT_SIZE;
+
 	err = bpf_prog_test_run_opts(fd, &opts);
 	ASSERT_OK(err, "prog_run_max_size");
 
@@ -246,3 +256,166 @@ out:
 	SYS_NOFAIL("ip netns del testns");
 	test_xdp_do_redirect__destroy(skel);
 }
+
+#define NS_NB		3
+#define NS0		"NS0"
+#define NS1		"NS1"
+#define NS2		"NS2"
+#define IPV4_NETWORK	"10.1.1"
+#define VETH1_INDEX	111
+#define VETH2_INDEX	222
+
+struct test_data {
+	struct netns_obj *ns[NS_NB];
+	u32 xdp_flags;
+};
+
+static void cleanup(struct test_data *data)
+{
+	int i;
+
+	for (i = 0; i < NS_NB; i++)
+		netns_free(data->ns[i]);
+}
+
+/**
+ * ping_setup -
+ * Create two veth peers and forward packets in-between using XDP
+ *
+ *    ------------           ------------
+ *    |    NS1   |           |    NS2   |
+ *    |   veth0  |           |   veth0  |
+ *    | 10.1.1.1 |           | 10.1.1.2 |
+ *    -----|------           ------|-----
+ *         |                       |
+ *         |                       |
+ *    -----|-----------------------|-------
+ *    |  veth1                   veth2    |
+ *    | (id:111)                (id:222)  |
+ *    |    |                        |     |
+ *    |    ----- xdp forwarding -----     |
+ *    |                                   |
+ *    |               NS0                 |
+ *    -------------------------------------
+ */
+static int ping_setup(struct test_data *data)
+{
+	int i;
+
+	data->ns[0] = netns_new(NS0, false);
+	if (!ASSERT_OK_PTR(data->ns[0], "create ns"))
+		return -1;
+
+	for (i = 1; i < NS_NB; i++) {
+		char ns_name[4] = {};
+
+		snprintf(ns_name, 4, "NS%d", i);
+		data->ns[i] = netns_new(ns_name, false);
+		if (!ASSERT_OK_PTR(data->ns[i], "create ns"))
+			goto fail;
+
+		SYS(fail,
+		    "ip -n %s link add veth%d index %d%d%d type veth peer name veth0 netns %s",
+		    NS0, i, i, i, i, ns_name);
+		SYS(fail, "ip -n %s link set veth%d up", NS0, i);
+
+		SYS(fail, "ip -n %s addr add %s.%d/24 dev veth0", ns_name, IPV4_NETWORK, i);
+		SYS(fail, "ip -n %s link set veth0 up", ns_name);
+	}
+
+	return 0;
+
+fail:
+	cleanup(data);
+	return -1;
+}
+
+static void ping_test(struct test_data *data)
+{
+	struct test_xdp_do_redirect *skel = NULL;
+	struct xdp_dummy *skel_dummy = NULL;
+	struct nstoken *nstoken = NULL;
+	int i, ret;
+
+	skel_dummy = xdp_dummy__open_and_load();
+	if (!ASSERT_OK_PTR(skel_dummy, "open and load xdp_dummy skeleton"))
+		goto close;
+
+	for (i = 1; i < NS_NB; i++) {
+		char ns_name[4] = {};
+
+		snprintf(ns_name, 4, "NS%d", i);
+		nstoken = open_netns(ns_name);
+		if (!ASSERT_OK_PTR(nstoken, "open ns"))
+			goto close;
+
+		ret = bpf_xdp_attach(if_nametoindex("veth0"),
+				     bpf_program__fd(skel_dummy->progs.xdp_dummy_prog),
+				     data->xdp_flags, NULL);
+		if (!ASSERT_GE(ret, 0, "bpf_xdp_attach dummy_prog"))
+			goto close;
+
+		close_netns(nstoken);
+		nstoken = NULL;
+	}
+
+	skel = test_xdp_do_redirect__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+		goto close;
+
+	nstoken = open_netns(NS0);
+	if (!ASSERT_OK_PTR(nstoken, "open NS0"))
+		goto close;
+
+	ret = bpf_xdp_attach(VETH2_INDEX,
+			     bpf_program__fd(skel->progs.xdp_redirect_to_111),
+			     data->xdp_flags, NULL);
+	if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+		goto close;
+
+	ret = bpf_xdp_attach(VETH1_INDEX,
+			     bpf_program__fd(skel->progs.xdp_redirect_to_222),
+			     data->xdp_flags, NULL);
+	if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+		goto close;
+
+	close_netns(nstoken);
+	nstoken = NULL;
+
+	nstoken = open_netns(NS1);
+	if (!ASSERT_OK_PTR(nstoken, "open NS1"))
+		goto close;
+
+	SYS(close, "ping -c 1 %s.2 > /dev/null", IPV4_NETWORK);
+
+close:
+	close_netns(nstoken);
+	xdp_dummy__destroy(skel_dummy);
+	test_xdp_do_redirect__destroy(skel);
+}
+
+
+static void xdp_redirect_ping(u32 xdp_flags)
+{
+	struct test_data data = {};
+
+	if (ping_setup(&data) < 0)
+		return;
+
+	data.xdp_flags = xdp_flags;
+	ping_test(&data);
+	cleanup(&data);
+}
+
+void test_xdp_index_redirect(void)
+{
+	if (test__start_subtest("noflag"))
+		xdp_redirect_ping(0);
+
+	if (test__start_subtest("drvflag"))
+		xdp_redirect_ping(XDP_FLAGS_DRV_MODE);
+
+	if (test__start_subtest("skbflag"))
+		xdp_redirect_ping(XDP_FLAGS_SKB_MODE);
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c
index e1bf141d3401..3f9146d83d79 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c
@@ -3,7 +3,7 @@
 #include <network_helpers.h>
 #include <bpf/btf.h>
 #include <linux/if_link.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 #include <net/if.h>
 #include <unistd.h>
 
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index c87ee2bf558c..19f92affc2da 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -10,7 +10,7 @@
 #include <linux/errqueue.h>
 #include <linux/if_link.h>
 #include <linux/net_tstamp.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 #include <sys/mman.h>
 #include <net/if.h>
 #include <poll.h>
@@ -133,23 +133,6 @@ static void close_xsk(struct xsk *xsk)
 	munmap(xsk->umem_area, UMEM_SIZE);
 }
 
-static void ip_csum(struct iphdr *iph)
-{
-	__u32 sum = 0;
-	__u16 *p;
-	int i;
-
-	iph->check = 0;
-	p = (void *)iph;
-	for (i = 0; i < sizeof(*iph) / sizeof(*p); i++)
-		sum += p[i];
-
-	while (sum >> 16)
-		sum = (sum & 0xffff) + (sum >> 16);
-
-	iph->check = ~sum;
-}
-
 static int generate_packet(struct xsk *xsk, __u16 dst_port)
 {
 	struct xsk_tx_metadata *meta;
@@ -192,7 +175,7 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port)
 	iph->protocol = IPPROTO_UDP;
 	ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)");
 	ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)");
-	ip_csum(iph);
+	iph->check = build_ip_csum(iph);
 
 	udph->source = htons(UDP_SOURCE_PORT);
 	udph->dest = htons(dst_port);
@@ -368,9 +351,10 @@ void test_xdp_metadata(void)
 	struct xdp_metadata2 *bpf_obj2 = NULL;
 	struct xdp_metadata *bpf_obj = NULL;
 	struct bpf_program *new_prog, *prog;
+	struct bpf_devmap_val devmap_e = {};
+	struct bpf_map *prog_arr, *devmap;
 	struct nstoken *tok = NULL;
 	__u32 queue_id = QUEUE_ID;
-	struct bpf_map *prog_arr;
 	struct xsk tx_xsk = {};
 	struct xsk rx_xsk = {};
 	__u32 val, key = 0;
@@ -426,6 +410,13 @@ void test_xdp_metadata(void)
 	bpf_program__set_ifindex(prog, rx_ifindex);
 	bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
 
+	/* Make sure we can load a dev-bound program that performs
+	 * XDP_REDIRECT into a devmap.
+	 */
+	new_prog = bpf_object__find_program_by_name(bpf_obj->obj, "redirect");
+	bpf_program__set_ifindex(new_prog, rx_ifindex);
+	bpf_program__set_flags(new_prog, BPF_F_XDP_DEV_BOUND_ONLY);
+
 	if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton"))
 		goto out;
 
@@ -440,6 +431,18 @@ void test_xdp_metadata(void)
 			"update prog_arr"))
 		goto out;
 
+	/* Make sure we can't add dev-bound programs to devmaps. */
+	devmap = bpf_object__find_map_by_name(bpf_obj->obj, "dev_map");
+	if (!ASSERT_OK_PTR(devmap, "no dev_map found"))
+		goto out;
+
+	devmap_e.bpf_prog.fd = val;
+	if (!ASSERT_ERR(bpf_map__update_elem(devmap, &key, sizeof(key),
+					     &devmap_e, sizeof(devmap_e),
+					     BPF_ANY),
+			"update dev_map"))
+		goto out;
+
 	/* Attach BPF program to RX interface. */
 
 	ret = bpf_xdp_attach(rx_ifindex,
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c
new file mode 100644
index 000000000000..efa350d04ec5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_pull_data.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_xdp_pull_data.skel.h"
+
+#define PULL_MAX	(1 << 31)
+#define PULL_PLUS_ONE	(1 << 30)
+
+#define XDP_PACKET_HEADROOM 256
+
+/* Find headroom and tailroom occupied by struct xdp_frame and struct
+ * skb_shared_info so that we can calculate the maximum pull lengths for
+ * test cases. They might not be the real size of the structures due to
+ * cache alignment.
+ */
+static int find_xdp_sizes(struct test_xdp_pull_data *skel, int frame_sz)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	struct xdp_md ctx = {};
+	int prog_fd, err;
+	__u8 *buf;
+
+	buf = calloc(frame_sz, sizeof(__u8));
+	if (!ASSERT_OK_PTR(buf, "calloc buf"))
+		return -ENOMEM;
+
+	topts.data_in = buf;
+	topts.data_out = buf;
+	topts.data_size_in = frame_sz;
+	topts.data_size_out = frame_sz;
+	/* Pass a data_end larger than the linear space available to make sure
+	 * bpf_prog_test_run_xdp() will fill the linear data area so that
+	 * xdp_find_sizes can infer the size of struct skb_shared_info
+	 */
+	ctx.data_end = frame_sz;
+	topts.ctx_in = &ctx;
+	topts.ctx_out = &ctx;
+	topts.ctx_size_in = sizeof(ctx);
+	topts.ctx_size_out = sizeof(ctx);
+
+	prog_fd = bpf_program__fd(skel->progs.xdp_find_sizes);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "bpf_prog_test_run_opts");
+
+	free(buf);
+
+	return err;
+}
+
+/* xdp_pull_data_prog will directly read a marker 0xbb stored at buf[1024]
+ * so caller expecting XDP_PASS should always pass pull_len no less than 1024
+ */
+static void run_test(struct test_xdp_pull_data *skel, int retval,
+		     int frame_sz, int buff_len, int meta_len, int data_len,
+		     int pull_len)
+{
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	struct xdp_md ctx = {};
+	int prog_fd, err;
+	__u8 *buf;
+
+	buf = calloc(buff_len, sizeof(__u8));
+	if (!ASSERT_OK_PTR(buf, "calloc buf"))
+		return;
+
+	buf[meta_len + 1023] = 0xaa;
+	buf[meta_len + 1024] = 0xbb;
+	buf[meta_len + 1025] = 0xcc;
+
+	topts.data_in = buf;
+	topts.data_out = buf;
+	topts.data_size_in = buff_len;
+	topts.data_size_out = buff_len;
+	ctx.data = meta_len;
+	ctx.data_end = meta_len + data_len;
+	topts.ctx_in = &ctx;
+	topts.ctx_out = &ctx;
+	topts.ctx_size_in = sizeof(ctx);
+	topts.ctx_size_out = sizeof(ctx);
+
+	skel->bss->data_len = data_len;
+	if (pull_len & PULL_MAX) {
+		int headroom = XDP_PACKET_HEADROOM - meta_len - skel->bss->xdpf_sz;
+		int tailroom = frame_sz - XDP_PACKET_HEADROOM -
+			       data_len - skel->bss->sinfo_sz;
+
+		pull_len = pull_len & PULL_PLUS_ONE ? 1 : 0;
+		pull_len += headroom + tailroom + data_len;
+	}
+	skel->bss->pull_len = pull_len;
+
+	prog_fd = bpf_program__fd(skel->progs.xdp_pull_data_prog);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "bpf_prog_test_run_opts");
+	ASSERT_EQ(topts.retval, retval, "xdp_pull_data_prog retval");
+
+	if (retval == XDP_DROP)
+		goto out;
+
+	ASSERT_EQ(ctx.data_end, meta_len + pull_len, "linear data size");
+	ASSERT_EQ(topts.data_size_out, buff_len, "linear + non-linear data size");
+	/* Make sure data around xdp->data_end was not messed up by
+	 * bpf_xdp_pull_data()
+	 */
+	ASSERT_EQ(buf[meta_len + 1023], 0xaa, "data[1023]");
+	ASSERT_EQ(buf[meta_len + 1024], 0xbb, "data[1024]");
+	ASSERT_EQ(buf[meta_len + 1025], 0xcc, "data[1025]");
+out:
+	free(buf);
+}
+
+static void test_xdp_pull_data_basic(void)
+{
+	u32 pg_sz, max_meta_len, max_data_len;
+	struct test_xdp_pull_data *skel;
+
+	skel = test_xdp_pull_data__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_xdp_pull_data__open_and_load"))
+		return;
+
+	pg_sz = sysconf(_SC_PAGE_SIZE);
+
+	if (find_xdp_sizes(skel, pg_sz))
+		goto out;
+
+	max_meta_len = XDP_PACKET_HEADROOM - skel->bss->xdpf_sz;
+	max_data_len = pg_sz - XDP_PACKET_HEADROOM - skel->bss->sinfo_sz;
+
+	/* linear xdp pkt, pull 0 byte */
+	run_test(skel, XDP_PASS, pg_sz, 2048, 0, 2048, 2048);
+
+	/* multi-buf pkt, pull results in linear xdp pkt */
+	run_test(skel, XDP_PASS, pg_sz, 2048, 0, 1024, 2048);
+
+	/* multi-buf pkt, pull 1 byte to linear data area */
+	run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1024, 1025);
+
+	/* multi-buf pkt, pull 0 byte to linear data area */
+	run_test(skel, XDP_PASS, pg_sz, 9000, 0, 1025, 1025);
+
+	/* multi-buf pkt, empty linear data area, pull requires memmove */
+	run_test(skel, XDP_PASS, pg_sz, 9000, 0, 0, PULL_MAX);
+
+	/* multi-buf pkt, no headroom */
+	run_test(skel, XDP_PASS, pg_sz, 9000, max_meta_len, 1024, PULL_MAX);
+
+	/* multi-buf pkt, no tailroom, pull requires memmove */
+	run_test(skel, XDP_PASS, pg_sz, 9000, 0, max_data_len, PULL_MAX);
+
+	/* Test cases with invalid pull length */
+
+	/* linear xdp pkt, pull more than total data len */
+	run_test(skel, XDP_DROP, pg_sz, 2048, 0, 2048, 2049);
+
+	/* multi-buf pkt with no space left in linear data area */
+	run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, max_data_len,
+		 PULL_MAX | PULL_PLUS_ONE);
+
+	/* multi-buf pkt, empty linear data area */
+	run_test(skel, XDP_DROP, pg_sz, 9000, 0, 0, PULL_MAX | PULL_PLUS_ONE);
+
+	/* multi-buf pkt, no headroom */
+	run_test(skel, XDP_DROP, pg_sz, 9000, max_meta_len, 1024,
+		 PULL_MAX | PULL_PLUS_ONE);
+
+	/* multi-buf pkt, no tailroom */
+	run_test(skel, XDP_DROP, pg_sz, 9000, 0, max_data_len,
+		 PULL_MAX | PULL_PLUS_ONE);
+
+out:
+	test_xdp_pull_data__destroy(skel);
+}
+
+void test_xdp_pull_data(void)
+{
+	if (test__start_subtest("xdp_pull_data"))
+		test_xdp_pull_data_basic();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c b/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c
new file mode 100644
index 000000000000..18dd25344de7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_vlan.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Network topology:
+ *  -----------        -----------
+ *  |  NS1    |        |   NS2   |
+ *  | veth0  -|--------|- veth0  |
+ *  -----------        -----------
+ *
+ */
+
+#define _GNU_SOURCE
+#include <net/if.h>
+#include <uapi/linux/if_link.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+#include "test_xdp_vlan.skel.h"
+
+
+#define VETH_NAME	"veth0"
+#define NS_MAX_SIZE	32
+#define NS1_NAME	"ns-xdp-vlan-1-"
+#define NS2_NAME	"ns-xdp-vlan-2-"
+#define NS1_IP_ADDR	"100.64.10.1"
+#define NS2_IP_ADDR	"100.64.10.2"
+#define VLAN_ID		4011
+
+static int setup_network(char *ns1, char *ns2)
+{
+	if (!ASSERT_OK(append_tid(ns1, NS_MAX_SIZE), "create ns1 name"))
+		goto fail;
+	if (!ASSERT_OK(append_tid(ns2, NS_MAX_SIZE), "create ns2 name"))
+		goto fail;
+
+	SYS(fail, "ip netns add %s", ns1);
+	SYS(fail, "ip netns add %s", ns2);
+	SYS(fail, "ip -n %s link add %s type veth peer name %s netns %s",
+	    ns1, VETH_NAME, VETH_NAME, ns2);
+
+	/* NOTICE: XDP require VLAN header inside packet payload
+	 *  - Thus, disable VLAN offloading driver features
+	 */
+	SYS(fail, "ip netns exec %s ethtool -K %s rxvlan off txvlan off", ns1, VETH_NAME);
+	SYS(fail, "ip netns exec %s ethtool -K %s rxvlan off txvlan off", ns2, VETH_NAME);
+
+	/* NS1 configuration */
+	SYS(fail, "ip -n %s addr add %s/24 dev %s", ns1, NS1_IP_ADDR, VETH_NAME);
+	SYS(fail, "ip -n %s link set %s up", ns1, VETH_NAME);
+
+	/* NS2 configuration */
+	SYS(fail, "ip -n %s link add link %s name %s.%d type vlan id %d",
+	    ns2, VETH_NAME, VETH_NAME, VLAN_ID, VLAN_ID);
+	SYS(fail, "ip -n %s addr add %s/24 dev %s.%d", ns2, NS2_IP_ADDR, VETH_NAME, VLAN_ID);
+	SYS(fail, "ip -n %s link set %s up", ns2, VETH_NAME);
+	SYS(fail, "ip -n %s link set %s.%d up", ns2, VETH_NAME, VLAN_ID);
+
+	/* At this point ping should fail because VLAN tags are only used by NS2 */
+	return !SYS_NOFAIL("ip netns exec %s ping -W 1 -c1 %s", ns2, NS1_IP_ADDR);
+
+fail:
+	return -1;
+}
+
+static void cleanup_network(const char *ns1, const char *ns2)
+{
+	SYS_NOFAIL("ip netns del %s", ns1);
+	SYS_NOFAIL("ip netns del %s", ns2);
+}
+
+static void xdp_vlan(struct bpf_program *xdp, struct bpf_program *tc, u32 flags)
+{
+	LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_EGRESS);
+	LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+	char ns1[NS_MAX_SIZE] = NS1_NAME;
+	char ns2[NS_MAX_SIZE] = NS2_NAME;
+	struct nstoken *nstoken = NULL;
+	int interface;
+	int ret;
+
+	if (!ASSERT_OK(setup_network(ns1, ns2), "setup network"))
+		goto cleanup;
+
+	nstoken = open_netns(ns1);
+	if (!ASSERT_OK_PTR(nstoken, "open NS1"))
+		goto cleanup;
+
+	interface = if_nametoindex(VETH_NAME);
+	if (!ASSERT_NEQ(interface, 0, "get interface index"))
+		goto cleanup;
+
+	ret = bpf_xdp_attach(interface, bpf_program__fd(xdp), flags, NULL);
+	if (!ASSERT_OK(ret, "attach xdp_vlan_change"))
+		goto cleanup;
+
+	tc_hook.ifindex = interface;
+	ret = bpf_tc_hook_create(&tc_hook);
+	if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+		goto detach_xdp;
+
+	/* Now we'll use BPF programs to pop/push the VLAN tags */
+	tc_opts.prog_fd = bpf_program__fd(tc);
+	ret = bpf_tc_attach(&tc_hook, &tc_opts);
+	if (!ASSERT_OK(ret, "bpf_tc_attach"))
+		goto detach_xdp;
+
+	close_netns(nstoken);
+	nstoken = NULL;
+
+	/* Now the namespaces can reach each-other, test with pings */
+	SYS(detach_tc, "ip netns exec %s ping -i 0.2 -W 2 -c 2 %s > /dev/null", ns1, NS2_IP_ADDR);
+	SYS(detach_tc, "ip netns exec %s ping -i 0.2 -W 2 -c 2 %s > /dev/null", ns2, NS1_IP_ADDR);
+
+
+detach_tc:
+	bpf_tc_detach(&tc_hook, &tc_opts);
+detach_xdp:
+	bpf_xdp_detach(interface, flags, NULL);
+cleanup:
+	close_netns(nstoken);
+	cleanup_network(ns1, ns2);
+}
+
+/* First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
+ * egress use TC to add back VLAN tag 4011
+ */
+void test_xdp_vlan_change(void)
+{
+	struct test_xdp_vlan *skel;
+
+	skel = test_xdp_vlan__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "xdp_vlan__open_and_load"))
+		return;
+
+	if (test__start_subtest("0"))
+		xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push, 0);
+
+	if (test__start_subtest("DRV_MODE"))
+		xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push,
+			 XDP_FLAGS_DRV_MODE);
+
+	if (test__start_subtest("SKB_MODE"))
+		xdp_vlan(skel->progs.xdp_vlan_change, skel->progs.tc_vlan_push,
+			 XDP_FLAGS_SKB_MODE);
+
+	test_xdp_vlan__destroy(skel);
+}
+
+/* Second test: XDP prog fully remove vlan header
+ *
+ * Catch kernel bug for generic-XDP, that doesn't allow us to
+ * remove a VLAN header, because skb->protocol still contain VLAN
+ * ETH_P_8021Q indication, and this cause overwriting of our changes.
+ */
+void test_xdp_vlan_remove(void)
+{
+	struct test_xdp_vlan *skel;
+
+	skel = test_xdp_vlan__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "xdp_vlan__open_and_load"))
+		return;
+
+	if (test__start_subtest("0"))
+		xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push, 0);
+
+	if (test__start_subtest("DRV_MODE"))
+		xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push,
+			 XDP_FLAGS_DRV_MODE);
+
+	if (test__start_subtest("SKB_MODE"))
+		xdp_vlan(skel->progs.xdp_vlan_remove_outer2, skel->progs.tc_vlan_push,
+			 XDP_FLAGS_SKB_MODE);
+
+	test_xdp_vlan__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xsk.c b/tools/testing/selftests/bpf/prog_tests/xsk.c
new file mode 100644
index 000000000000..dd4c35c0e428
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xsk.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <net/if.h>
+#include <stdarg.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+#include "test_xsk.h"
+#include "xsk_xdp_progs.skel.h"
+
+#define VETH_RX "veth0"
+#define VETH_TX "veth1"
+#define MTU	1500
+
+int setup_veth(bool busy_poll)
+{
+	SYS(fail,
+	"ip link add %s numtxqueues 4 numrxqueues 4 type veth peer name %s numtxqueues 4 numrxqueues 4",
+	VETH_RX, VETH_TX);
+	SYS(fail, "sysctl -wq net.ipv6.conf.%s.disable_ipv6=1", VETH_RX);
+	SYS(fail, "sysctl -wq net.ipv6.conf.%s.disable_ipv6=1", VETH_TX);
+
+	if (busy_poll) {
+		SYS(fail, "echo 2 > /sys/class/net/%s/napi_defer_hard_irqs", VETH_RX);
+		SYS(fail, "echo 200000 > /sys/class/net/%s/gro_flush_timeout", VETH_RX);
+		SYS(fail, "echo 2 > /sys/class/net/%s/napi_defer_hard_irqs", VETH_TX);
+		SYS(fail, "echo 200000 > /sys/class/net/%s/gro_flush_timeout", VETH_TX);
+	}
+
+	SYS(fail, "ip link set %s mtu %d", VETH_RX, MTU);
+	SYS(fail, "ip link set %s mtu %d", VETH_TX, MTU);
+	SYS(fail, "ip link set %s up", VETH_RX);
+	SYS(fail, "ip link set %s up", VETH_TX);
+
+	return 0;
+
+fail:
+	return -1;
+}
+
+void delete_veth(void)
+{
+	SYS_NOFAIL("ip link del %s", VETH_RX);
+	SYS_NOFAIL("ip link del %s", VETH_TX);
+}
+
+int configure_ifobj(struct ifobject *tx, struct ifobject *rx)
+{
+	rx->ifindex = if_nametoindex(VETH_RX);
+	if (!ASSERT_OK_FD(rx->ifindex, "get RX ifindex"))
+		return -1;
+
+	tx->ifindex = if_nametoindex(VETH_TX);
+	if (!ASSERT_OK_FD(tx->ifindex, "get TX ifindex"))
+		return -1;
+
+	tx->shared_umem = false;
+	rx->shared_umem = false;
+
+
+	return 0;
+}
+
+static void test_xsk(const struct test_spec *test_to_run, enum test_mode mode)
+{
+	struct ifobject *ifobj_tx, *ifobj_rx;
+	struct test_spec test;
+	int ret;
+
+	ifobj_tx = ifobject_create();
+	if (!ASSERT_OK_PTR(ifobj_tx, "create ifobj_tx"))
+		return;
+
+	ifobj_rx = ifobject_create();
+	if (!ASSERT_OK_PTR(ifobj_rx, "create ifobj_rx"))
+		goto delete_tx;
+
+	if (!ASSERT_OK(configure_ifobj(ifobj_tx, ifobj_rx), "conigure ifobj"))
+		goto delete_rx;
+
+	ret = get_hw_ring_size(ifobj_tx->ifname, &ifobj_tx->ring);
+	if (!ret) {
+		ifobj_tx->hw_ring_size_supp = true;
+		ifobj_tx->set_ring.default_tx = ifobj_tx->ring.tx_pending;
+		ifobj_tx->set_ring.default_rx = ifobj_tx->ring.rx_pending;
+	}
+
+	if (!ASSERT_OK(init_iface(ifobj_rx, worker_testapp_validate_rx), "init RX"))
+		goto delete_rx;
+	if (!ASSERT_OK(init_iface(ifobj_tx, worker_testapp_validate_tx), "init TX"))
+		goto delete_rx;
+
+	test_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
+
+	test.tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+	if (!ASSERT_OK_PTR(test.tx_pkt_stream_default, "TX pkt generation"))
+		goto delete_rx;
+	test.rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+	if (!ASSERT_OK_PTR(test.rx_pkt_stream_default, "RX pkt generation"))
+		goto delete_rx;
+
+
+	test_init(&test, ifobj_tx, ifobj_rx, mode, test_to_run);
+	ret = test.test_func(&test);
+	if (ret != TEST_SKIP)
+		ASSERT_OK(ret, "Run test");
+	pkt_stream_restore_default(&test);
+
+	if (ifobj_tx->hw_ring_size_supp)
+		hw_ring_size_reset(ifobj_tx);
+
+	pkt_stream_delete(test.tx_pkt_stream_default);
+	pkt_stream_delete(test.rx_pkt_stream_default);
+	xsk_xdp_progs__destroy(ifobj_tx->xdp_progs);
+	xsk_xdp_progs__destroy(ifobj_rx->xdp_progs);
+
+delete_rx:
+	ifobject_delete(ifobj_rx);
+delete_tx:
+	ifobject_delete(ifobj_tx);
+}
+
+void test_ns_xsk_skb(void)
+{
+	int i;
+
+	if (!ASSERT_OK(setup_veth(false), "setup veth"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		if (test__start_subtest(tests[i].name))
+			test_xsk(&tests[i], TEST_MODE_SKB);
+	}
+
+	delete_veth();
+}
+
+void test_ns_xsk_drv(void)
+{
+	int i;
+
+	if (!ASSERT_OK(setup_veth(false), "setup veth"))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		if (test__start_subtest(tests[i].name))
+			test_xsk(&tests[i], TEST_MODE_DRV);
+	}
+
+	delete_veth();
+}
+
diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c
index 40dd57fca5cc..d1841aac94a2 100644
--- a/tools/testing/selftests/bpf/progs/arena_atomics.c
+++ b/tools/testing/selftests/bpf/progs/arena_atomics.c
@@ -6,6 +6,8 @@
 #include <stdbool.h>
 #include <stdatomic.h>
 #include "bpf_arena_common.h"
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
 
 struct {
 	__uint(type, BPF_MAP_TYPE_ARENA);
@@ -19,9 +21,18 @@ struct {
 } arena SEC(".maps");
 
 #if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
-bool skip_tests __attribute((__section__(".data"))) = false;
+bool skip_all_tests __attribute((__section__(".data"))) = false;
 #else
-bool skip_tests = true;
+bool skip_all_tests = true;
+#endif
+
+#if defined(ENABLE_ATOMICS_TESTS) &&		  \
+	defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
+	(defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+	 (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
+bool skip_lacq_srel_tests __attribute((__section__(".data"))) = false;
+#else
+bool skip_lacq_srel_tests = true;
 #endif
 
 __u32 pid = 0;
@@ -274,4 +285,113 @@ int uaf(const void *ctx)
 	return 0;
 }
 
+#if __clang_major__ >= 18
+__u8 __arena_global load_acquire8_value = 0x12;
+__u16 __arena_global load_acquire16_value = 0x1234;
+__u32 __arena_global load_acquire32_value = 0x12345678;
+__u64 __arena_global load_acquire64_value = 0x1234567890abcdef;
+
+__u8 __arena_global load_acquire8_result = 0;
+__u16 __arena_global load_acquire16_result = 0;
+__u32 __arena_global load_acquire32_result = 0;
+__u64 __arena_global load_acquire64_result = 0;
+#else
+/* clang-17 crashes if the .addr_space.1 ELF section has holes. Work around
+ * this issue by defining the below variables as 64-bit.
+ */
+__u64 __arena_global load_acquire8_value;
+__u64 __arena_global load_acquire16_value;
+__u64 __arena_global load_acquire32_value;
+__u64 __arena_global load_acquire64_value;
+
+__u64 __arena_global load_acquire8_result;
+__u64 __arena_global load_acquire16_result;
+__u64 __arena_global load_acquire32_result;
+__u64 __arena_global load_acquire64_result;
+#endif
+
+SEC("raw_tp/sys_enter")
+int load_acquire(const void *ctx)
+{
+#if defined(ENABLE_ATOMICS_TESTS) &&		  \
+	defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
+	(defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+	 (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
+
+#define LOAD_ACQUIRE_ARENA(SIZEOP, SIZE, SRC, DST)	\
+	{ asm volatile (				\
+	"r1 = %[" #SRC "] ll;"				\
+	"r1 = addr_space_cast(r1, 0x0, 0x1);"		\
+	".8byte %[load_acquire_insn];"			\
+	"r3 = %[" #DST "] ll;"				\
+	"r3 = addr_space_cast(r3, 0x0, 0x1);"		\
+	"*(" #SIZE " *)(r3 + 0) = r2;"			\
+	:						\
+	: __imm_addr(SRC),				\
+	  __imm_insn(load_acquire_insn,			\
+		     BPF_ATOMIC_OP(BPF_##SIZEOP, BPF_LOAD_ACQ,	\
+				   BPF_REG_2, BPF_REG_1, 0)),	\
+	  __imm_addr(DST)				\
+	: __clobber_all); }				\
+
+	LOAD_ACQUIRE_ARENA(B, u8, load_acquire8_value, load_acquire8_result)
+	LOAD_ACQUIRE_ARENA(H, u16, load_acquire16_value,
+			   load_acquire16_result)
+	LOAD_ACQUIRE_ARENA(W, u32, load_acquire32_value,
+			   load_acquire32_result)
+	LOAD_ACQUIRE_ARENA(DW, u64, load_acquire64_value,
+			   load_acquire64_result)
+#undef LOAD_ACQUIRE_ARENA
+
+#endif
+	return 0;
+}
+
+#if __clang_major__ >= 18
+__u8 __arena_global store_release8_result = 0;
+__u16 __arena_global store_release16_result = 0;
+__u32 __arena_global store_release32_result = 0;
+__u64 __arena_global store_release64_result = 0;
+#else
+/* clang-17 crashes if the .addr_space.1 ELF section has holes. Work around
+ * this issue by defining the below variables as 64-bit.
+ */
+__u64 __arena_global store_release8_result;
+__u64 __arena_global store_release16_result;
+__u64 __arena_global store_release32_result;
+__u64 __arena_global store_release64_result;
+#endif
+
+SEC("raw_tp/sys_enter")
+int store_release(const void *ctx)
+{
+#if defined(ENABLE_ATOMICS_TESTS) &&		  \
+	defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \
+	(defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+	 (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64))
+
+#define STORE_RELEASE_ARENA(SIZEOP, DST, VAL)	\
+	{ asm volatile (			\
+	"r1 = " VAL ";"				\
+	"r2 = %[" #DST "] ll;"			\
+	"r2 = addr_space_cast(r2, 0x0, 0x1);"	\
+	".8byte %[store_release_insn];"		\
+	:					\
+	: __imm_addr(DST),			\
+	  __imm_insn(store_release_insn,	\
+		     BPF_ATOMIC_OP(BPF_##SIZEOP, BPF_STORE_REL,	\
+				   BPF_REG_2, BPF_REG_1, 0))	\
+	: __clobber_all); }			\
+
+	STORE_RELEASE_ARENA(B, store_release8_result, "0x12")
+	STORE_RELEASE_ARENA(H, store_release16_result, "0x1234")
+	STORE_RELEASE_ARENA(W, store_release32_result, "0x12345678")
+	STORE_RELEASE_ARENA(DW, store_release64_result,
+			    "0x1234567890abcdef ll")
+#undef STORE_RELEASE_ARENA
+
+#endif
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/arena_spin_lock.c b/tools/testing/selftests/bpf/progs/arena_spin_lock.c
new file mode 100644
index 000000000000..086b57a426cf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_spin_lock.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_arena_spin_lock.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARENA);
+	__uint(map_flags, BPF_F_MMAPABLE);
+	__uint(max_entries, 100); /* number of pages */
+#ifdef __TARGET_ARCH_arm64
+	__ulong(map_extra, 0x1ull << 32); /* start of mmap() region */
+#else
+	__ulong(map_extra, 0x1ull << 44); /* start of mmap() region */
+#endif
+} arena SEC(".maps");
+
+int cs_count;
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+arena_spinlock_t __arena lock;
+int test_skip = 1;
+#else
+int test_skip = 2;
+#endif
+
+int counter;
+int limit;
+
+SEC("tc")
+int prog(void *ctx)
+{
+	int ret = -2;
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+	unsigned long flags;
+
+	if ((ret = arena_spin_lock_irqsave(&lock, flags))) {
+		if (ret == -EOPNOTSUPP)
+			test_skip = 3;
+		return ret;
+	}
+	if (counter != limit)
+		counter++;
+	bpf_repeat(cs_count);
+	ret = 0;
+	arena_spin_unlock_irqrestore(&lock, flags);
+#endif
+	return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/arena_strsearch.c b/tools/testing/selftests/bpf/progs/arena_strsearch.c
new file mode 100644
index 000000000000..ef6b76658f7f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_strsearch.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARENA);
+	__uint(map_flags, BPF_F_MMAPABLE);
+	__uint(max_entries, 100); /* number of pages */
+} arena SEC(".maps");
+
+#include "bpf_arena_strsearch.h"
+
+struct glob_test {
+	char const __arena *pat, *str;
+	bool expected;
+};
+
+static bool test(char const __arena *pat, char const __arena *str, bool expected)
+{
+	bool match = glob_match(pat, str);
+	bool success = match == expected;
+
+	/* bpf_printk("glob_match %s %s res %d ok %d", pat, str, match, success); */
+	return success;
+}
+
+/*
+ * The tests are all jammed together in one array to make it simpler
+ * to place that array in the .init.rodata section.  The obvious
+ * "array of structures containing char *" has no way to force the
+ * pointed-to strings to be in a particular section.
+ *
+ * Anyway, a test consists of:
+ * 1. Expected glob_match result: '1' or '0'.
+ * 2. Pattern to match: null-terminated string
+ * 3. String to match against: null-terminated string
+ *
+ * The list of tests is terminated with a final '\0' instead of
+ * a glob_match result character.
+ */
+static const char __arena glob_tests[] =
+	/* Some basic tests */
+	"1" "a\0" "a\0"
+	"0" "a\0" "b\0"
+	"0" "a\0" "aa\0"
+	"0" "a\0" "\0"
+	"1" "\0" "\0"
+	"0" "\0" "a\0"
+	/* Simple character class tests */
+	"1" "[a]\0" "a\0"
+	"0" "[a]\0" "b\0"
+	"0" "[!a]\0" "a\0"
+	"1" "[!a]\0" "b\0"
+	"1" "[ab]\0" "a\0"
+	"1" "[ab]\0" "b\0"
+	"0" "[ab]\0" "c\0"
+	"1" "[!ab]\0" "c\0"
+	"1" "[a-c]\0" "b\0"
+	"0" "[a-c]\0" "d\0"
+	/* Corner cases in character class parsing */
+	"1" "[a-c-e-g]\0" "-\0"
+	"0" "[a-c-e-g]\0" "d\0"
+	"1" "[a-c-e-g]\0" "f\0"
+	"1" "[]a-ceg-ik[]\0" "a\0"
+	"1" "[]a-ceg-ik[]\0" "]\0"
+	"1" "[]a-ceg-ik[]\0" "[\0"
+	"1" "[]a-ceg-ik[]\0" "h\0"
+	"0" "[]a-ceg-ik[]\0" "f\0"
+	"0" "[!]a-ceg-ik[]\0" "h\0"
+	"0" "[!]a-ceg-ik[]\0" "]\0"
+	"1" "[!]a-ceg-ik[]\0" "f\0"
+	/* Simple wild cards */
+	"1" "?\0" "a\0"
+	"0" "?\0" "aa\0"
+	"0" "??\0" "a\0"
+	"1" "?x?\0" "axb\0"
+	"0" "?x?\0" "abx\0"
+	"0" "?x?\0" "xab\0"
+	/* Asterisk wild cards (backtracking) */
+	"0" "*??\0" "a\0"
+	"1" "*??\0" "ab\0"
+	"1" "*??\0" "abc\0"
+	"1" "*??\0" "abcd\0"
+	"0" "??*\0" "a\0"
+	"1" "??*\0" "ab\0"
+	"1" "??*\0" "abc\0"
+	"1" "??*\0" "abcd\0"
+	"0" "?*?\0" "a\0"
+	"1" "?*?\0" "ab\0"
+	"1" "?*?\0" "abc\0"
+	"1" "?*?\0" "abcd\0"
+	"1" "*b\0" "b\0"
+	"1" "*b\0" "ab\0"
+	"0" "*b\0" "ba\0"
+	"1" "*b\0" "bb\0"
+	"1" "*b\0" "abb\0"
+	"1" "*b\0" "bab\0"
+	"1" "*bc\0" "abbc\0"
+	"1" "*bc\0" "bc\0"
+	"1" "*bc\0" "bbc\0"
+	"1" "*bc\0" "bcbc\0"
+	/* Multiple asterisks (complex backtracking) */
+	"1" "*ac*\0" "abacadaeafag\0"
+	"1" "*ac*ae*ag*\0" "abacadaeafag\0"
+	"1" "*a*b*[bc]*[ef]*g*\0" "abacadaeafag\0"
+	"0" "*a*b*[ef]*[cd]*g*\0" "abacadaeafag\0"
+	"1" "*abcd*\0" "abcabcabcabcdefg\0"
+	"1" "*ab*cd*\0" "abcabcabcabcdefg\0"
+	"1" "*abcd*abcdef*\0" "abcabcdabcdeabcdefg\0"
+	"0" "*abcd*\0" "abcabcabcabcefg\0"
+	"0" "*ab*cd*\0" "abcabcabcabcefg\0";
+
+bool skip = false;
+
+SEC("syscall")
+int arena_strsearch(void *ctx)
+{
+	unsigned successes = 0;
+	unsigned n = 0;
+	char const __arena *p = glob_tests;
+
+	/*
+	 * Tests are jammed together in a string.  The first byte is '1'
+	 * or '0' to indicate the expected outcome, or '\0' to indicate the
+	 * end of the tests.  Then come two null-terminated strings: the
+	 * pattern and the string to match it against.
+	 */
+	while (*p) {
+		bool expected = *p++ & 1;
+		char const __arena *pat = p;
+
+		cond_break;
+		p += bpf_arena_strlen(p) + 1;
+		successes += test(pat, p, expected);
+		p += bpf_arena_strlen(p) + 1;
+		n++;
+	}
+
+	n -= successes;
+	/* bpf_printk("glob: %u self-tests passed, %u failed\n", successes, n); */
+
+	return n ? -1 : 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bad_struct_ops.c b/tools/testing/selftests/bpf/progs/bad_struct_ops.c
index b7e175cd0af0..b3f77b4561c8 100644
--- a/tools/testing/selftests/bpf/progs/bad_struct_ops.c
+++ b/tools/testing/selftests/bpf/progs/bad_struct_ops.c
@@ -3,7 +3,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c b/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c
new file mode 100644
index 000000000000..079bf3794b3a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bench_sockmap_prog.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+long process_byte = 0;
+int  verdict_dir = 0;
+int  dropped = 0;
+int  pkt_size = 0;
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 20);
+	__type(key, int);
+	__type(value, int);
+} sock_map_rx SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 20);
+	__type(key, int);
+	__type(value, int);
+} sock_map_tx SEC(".maps");
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+	return pkt_size;
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+	int one = 1;
+	int ret =  bpf_sk_redirect_map(skb, &sock_map_rx, one, verdict_dir);
+
+	if (ret == SK_DROP)
+		dropped++;
+	__sync_fetch_and_add(&process_byte, skb->len);
+	return ret;
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_pass(struct __sk_buff *skb)
+{
+	__sync_fetch_and_add(&process_byte, skb->len);
+	return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_skmsg_verdict(struct sk_msg_md *msg)
+{
+	int one = 1;
+
+	__sync_fetch_and_add(&process_byte, msg->size);
+	return bpf_msg_redirect_map(msg, &sock_map_tx, one, verdict_dir);
+}
+
+SEC("sk_msg")
+int prog_skmsg_pass(struct sk_msg_md *msg)
+{
+	__sync_fetch_and_add(&process_byte, msg->size);
+	return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
new file mode 100644
index 000000000000..f90531cf3ee5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_arena_spin_lock.h
@@ -0,0 +1,542 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#ifndef BPF_ARENA_SPIN_LOCK_H
+#define BPF_ARENA_SPIN_LOCK_H
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_atomic.h"
+
+#define arch_mcs_spin_lock_contended_label(l, label) smp_cond_load_acquire_label(l, VAL, label)
+#define arch_mcs_spin_unlock_contended(l) smp_store_release((l), 1)
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+
+#define EBUSY 16
+#define EOPNOTSUPP 95
+#define ETIMEDOUT 110
+
+#ifndef __arena
+#define __arena __attribute__((address_space(1)))
+#endif
+
+extern unsigned long CONFIG_NR_CPUS __kconfig;
+
+/*
+ * Typically, we'd just rely on the definition in vmlinux.h for qspinlock, but
+ * PowerPC overrides the definition to define lock->val as u32 instead of
+ * atomic_t, leading to compilation errors.  Import a local definition below so
+ * that we don't depend on the vmlinux.h version.
+ */
+
+struct __qspinlock {
+	union {
+		atomic_t val;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+		struct {
+			u8 locked;
+			u8 pending;
+		};
+		struct {
+			u16 locked_pending;
+			u16 tail;
+		};
+#else
+		struct {
+			u16 tail;
+			u16 locked_pending;
+		};
+		struct {
+			u8 reserved[2];
+			u8 pending;
+			u8 locked;
+		};
+#endif
+	};
+};
+
+#define arena_spinlock_t struct __qspinlock
+/* FIXME: Using typedef causes CO-RE relocation error */
+/* typedef struct qspinlock arena_spinlock_t; */
+
+struct arena_mcs_spinlock {
+	struct arena_mcs_spinlock __arena *next;
+	int locked;
+	int count;
+};
+
+struct arena_qnode {
+	struct arena_mcs_spinlock mcs;
+};
+
+#define _Q_MAX_NODES		4
+#define _Q_PENDING_LOOPS	1
+
+/*
+ * Bitfields in the atomic value:
+ *
+ *  0- 7: locked byte
+ *     8: pending
+ *  9-15: not used
+ * 16-17: tail index
+ * 18-31: tail cpu (+1)
+ */
+#define _Q_MAX_CPUS		1024
+
+#define	_Q_SET_MASK(type)	(((1U << _Q_ ## type ## _BITS) - 1)\
+				      << _Q_ ## type ## _OFFSET)
+#define _Q_LOCKED_OFFSET	0
+#define _Q_LOCKED_BITS		8
+#define _Q_LOCKED_MASK		_Q_SET_MASK(LOCKED)
+
+#define _Q_PENDING_OFFSET	(_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
+#define _Q_PENDING_BITS		8
+#define _Q_PENDING_MASK		_Q_SET_MASK(PENDING)
+
+#define _Q_TAIL_IDX_OFFSET	(_Q_PENDING_OFFSET + _Q_PENDING_BITS)
+#define _Q_TAIL_IDX_BITS	2
+#define _Q_TAIL_IDX_MASK	_Q_SET_MASK(TAIL_IDX)
+
+#define _Q_TAIL_CPU_OFFSET	(_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS)
+#define _Q_TAIL_CPU_BITS	(32 - _Q_TAIL_CPU_OFFSET)
+#define _Q_TAIL_CPU_MASK	_Q_SET_MASK(TAIL_CPU)
+
+#define _Q_TAIL_OFFSET		_Q_TAIL_IDX_OFFSET
+#define _Q_TAIL_MASK		(_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
+
+#define _Q_LOCKED_VAL		(1U << _Q_LOCKED_OFFSET)
+#define _Q_PENDING_VAL		(1U << _Q_PENDING_OFFSET)
+
+struct arena_qnode __arena qnodes[_Q_MAX_CPUS][_Q_MAX_NODES];
+
+static inline u32 encode_tail(int cpu, int idx)
+{
+	u32 tail;
+
+	tail  = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
+	tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
+
+	return tail;
+}
+
+static inline struct arena_mcs_spinlock __arena *decode_tail(u32 tail)
+{
+	u32 cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
+	u32 idx = (tail &  _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
+
+	return &qnodes[cpu][idx].mcs;
+}
+
+static inline
+struct arena_mcs_spinlock __arena *grab_mcs_node(struct arena_mcs_spinlock __arena *base, int idx)
+{
+	return &((struct arena_qnode __arena *)base + idx)->mcs;
+}
+
+#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
+
+/**
+ * xchg_tail - Put in the new queue tail code word & retrieve previous one
+ * @lock : Pointer to queued spinlock structure
+ * @tail : The new queue tail code word
+ * Return: The previous queue tail code word
+ *
+ * xchg(lock, tail)
+ *
+ * p,*,* -> n,*,* ; prev = xchg(lock, node)
+ */
+static __always_inline u32 xchg_tail(arena_spinlock_t __arena *lock, u32 tail)
+{
+	u32 old, new;
+
+	old = atomic_read(&lock->val);
+	do {
+		new = (old & _Q_LOCKED_PENDING_MASK) | tail;
+		/*
+		 * We can use relaxed semantics since the caller ensures that
+		 * the MCS node is properly initialized before updating the
+		 * tail.
+		 */
+		/* These loops are not expected to stall, but we still need to
+		 * prove to the verifier they will terminate eventually.
+		 */
+		cond_break_label(out);
+	} while (!atomic_try_cmpxchg_relaxed(&lock->val, &old, new));
+
+	return old;
+out:
+	bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__);
+	return old;
+}
+
+/**
+ * clear_pending - clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,* -> *,0,*
+ */
+static __always_inline void clear_pending(arena_spinlock_t __arena *lock)
+{
+	WRITE_ONCE(lock->pending, 0);
+}
+
+/**
+ * clear_pending_set_locked - take ownership and clear the pending bit.
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,1,0 -> *,0,1
+ *
+ * Lock stealing is not allowed if this function is used.
+ */
+static __always_inline void clear_pending_set_locked(arena_spinlock_t __arena *lock)
+{
+	WRITE_ONCE(lock->locked_pending, _Q_LOCKED_VAL);
+}
+
+/**
+ * set_locked - Set the lock bit and own the lock
+ * @lock: Pointer to queued spinlock structure
+ *
+ * *,*,0 -> *,0,1
+ */
+static __always_inline void set_locked(arena_spinlock_t __arena *lock)
+{
+	WRITE_ONCE(lock->locked, _Q_LOCKED_VAL);
+}
+
+static __always_inline
+u32 arena_fetch_set_pending_acquire(arena_spinlock_t __arena *lock)
+{
+	u32 old, new;
+
+	old = atomic_read(&lock->val);
+	do {
+		new = old | _Q_PENDING_VAL;
+		/*
+		 * These loops are not expected to stall, but we still need to
+		 * prove to the verifier they will terminate eventually.
+		 */
+		cond_break_label(out);
+	} while (!atomic_try_cmpxchg_acquire(&lock->val, &old, new));
+
+	return old;
+out:
+	bpf_printk("RUNTIME ERROR: %s unexpected cond_break exit!!!", __func__);
+	return old;
+}
+
+/**
+ * arena_spin_trylock - try to acquire the queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ * Return: 1 if lock acquired, 0 if failed
+ */
+static __always_inline int arena_spin_trylock(arena_spinlock_t __arena *lock)
+{
+	int val = atomic_read(&lock->val);
+
+	if (unlikely(val))
+		return 0;
+
+	return likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL));
+}
+
+__noinline
+int arena_spin_lock_slowpath(arena_spinlock_t __arena __arg_arena *lock, u32 val)
+{
+	struct arena_mcs_spinlock __arena *prev, *next, *node0, *node;
+	int ret = -ETIMEDOUT;
+	u32 old, tail;
+	int idx;
+
+	/*
+	 * Wait for in-progress pending->locked hand-overs with a bounded
+	 * number of spins so that we guarantee forward progress.
+	 *
+	 * 0,1,0 -> 0,0,1
+	 */
+	if (val == _Q_PENDING_VAL) {
+		int cnt = _Q_PENDING_LOOPS;
+		val = atomic_cond_read_relaxed_label(&lock->val,
+						     (VAL != _Q_PENDING_VAL) || !cnt--,
+						     release_err);
+	}
+
+	/*
+	 * If we observe any contention; queue.
+	 */
+	if (val & ~_Q_LOCKED_MASK)
+		goto queue;
+
+	/*
+	 * trylock || pending
+	 *
+	 * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock
+	 */
+	val = arena_fetch_set_pending_acquire(lock);
+
+	/*
+	 * If we observe contention, there is a concurrent locker.
+	 *
+	 * Undo and queue; our setting of PENDING might have made the
+	 * n,0,0 -> 0,0,0 transition fail and it will now be waiting
+	 * on @next to become !NULL.
+	 */
+	if (unlikely(val & ~_Q_LOCKED_MASK)) {
+
+		/* Undo PENDING if we set it. */
+		if (!(val & _Q_PENDING_MASK))
+			clear_pending(lock);
+
+		goto queue;
+	}
+
+	/*
+	 * We're pending, wait for the owner to go away.
+	 *
+	 * 0,1,1 -> *,1,0
+	 *
+	 * this wait loop must be a load-acquire such that we match the
+	 * store-release that clears the locked bit and create lock
+	 * sequentiality; this is because not all
+	 * clear_pending_set_locked() implementations imply full
+	 * barriers.
+	 */
+	if (val & _Q_LOCKED_MASK)
+		(void)smp_cond_load_acquire_label(&lock->locked, !VAL, release_err);
+
+	/*
+	 * take ownership and clear the pending bit.
+	 *
+	 * 0,1,0 -> 0,0,1
+	 */
+	clear_pending_set_locked(lock);
+	return 0;
+
+	/*
+	 * End of pending bit optimistic spinning and beginning of MCS
+	 * queuing.
+	 */
+queue:
+	node0 = &(qnodes[bpf_get_smp_processor_id()])[0].mcs;
+	idx = node0->count++;
+	tail = encode_tail(bpf_get_smp_processor_id(), idx);
+
+	/*
+	 * 4 nodes are allocated based on the assumption that there will not be
+	 * nested NMIs taking spinlocks. That may not be true in some
+	 * architectures even though the chance of needing more than 4 nodes
+	 * will still be extremely unlikely. When that happens, we simply return
+	 * an error. Original qspinlock has a trylock fallback in this case.
+	 */
+	if (unlikely(idx >= _Q_MAX_NODES)) {
+		ret = -EBUSY;
+		goto release_node_err;
+	}
+
+	node = grab_mcs_node(node0, idx);
+
+	/*
+	 * Ensure that we increment the head node->count before initialising
+	 * the actual node. If the compiler is kind enough to reorder these
+	 * stores, then an IRQ could overwrite our assignments.
+	 */
+	barrier();
+
+	node->locked = 0;
+	node->next = NULL;
+
+	/*
+	 * We touched a (possibly) cold cacheline in the per-cpu queue node;
+	 * attempt the trylock once more in the hope someone let go while we
+	 * weren't watching.
+	 */
+	if (arena_spin_trylock(lock))
+		goto release;
+
+	/*
+	 * Ensure that the initialisation of @node is complete before we
+	 * publish the updated tail via xchg_tail() and potentially link
+	 * @node into the waitqueue via WRITE_ONCE(prev->next, node) below.
+	 */
+	smp_wmb();
+
+	/*
+	 * Publish the updated tail.
+	 * We have already touched the queueing cacheline; don't bother with
+	 * pending stuff.
+	 *
+	 * p,*,* -> n,*,*
+	 */
+	old = xchg_tail(lock, tail);
+	next = NULL;
+
+	/*
+	 * if there was a previous node; link it and wait until reaching the
+	 * head of the waitqueue.
+	 */
+	if (old & _Q_TAIL_MASK) {
+		prev = decode_tail(old);
+
+		/* Link @node into the waitqueue. */
+		WRITE_ONCE(prev->next, node);
+
+		(void)arch_mcs_spin_lock_contended_label(&node->locked, release_node_err);
+
+		/*
+		 * While waiting for the MCS lock, the next pointer may have
+		 * been set by another lock waiter. We cannot prefetch here
+		 * due to lack of equivalent instruction in BPF ISA.
+		 */
+		next = READ_ONCE(node->next);
+	}
+
+	/*
+	 * we're at the head of the waitqueue, wait for the owner & pending to
+	 * go away.
+	 *
+	 * *,x,y -> *,0,0
+	 *
+	 * this wait loop must use a load-acquire such that we match the
+	 * store-release that clears the locked bit and create lock
+	 * sequentiality; this is because the set_locked() function below
+	 * does not imply a full barrier.
+	 */
+	val = atomic_cond_read_acquire_label(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK),
+					     release_node_err);
+
+	/*
+	 * claim the lock:
+	 *
+	 * n,0,0 -> 0,0,1 : lock, uncontended
+	 * *,*,0 -> *,*,1 : lock, contended
+	 *
+	 * If the queue head is the only one in the queue (lock value == tail)
+	 * and nobody is pending, clear the tail code and grab the lock.
+	 * Otherwise, we only need to grab the lock.
+	 */
+
+	/*
+	 * In the PV case we might already have _Q_LOCKED_VAL set, because
+	 * of lock stealing; therefore we must also allow:
+	 *
+	 * n,0,1 -> 0,0,1
+	 *
+	 * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the
+	 *       above wait condition, therefore any concurrent setting of
+	 *       PENDING will make the uncontended transition fail.
+	 */
+	if ((val & _Q_TAIL_MASK) == tail) {
+		if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL))
+			goto release; /* No contention */
+	}
+
+	/*
+	 * Either somebody is queued behind us or _Q_PENDING_VAL got set
+	 * which will then detect the remaining tail and queue behind us
+	 * ensuring we'll see a @next.
+	 */
+	set_locked(lock);
+
+	/*
+	 * contended path; wait for next if not observed yet, release.
+	 */
+	if (!next)
+		next = smp_cond_load_relaxed_label(&node->next, (VAL), release_node_err);
+
+	arch_mcs_spin_unlock_contended(&next->locked);
+
+release:;
+	/*
+	 * release the node
+	 *
+	 * Doing a normal dec vs this_cpu_dec is fine. An upper context always
+	 * decrements count it incremented before returning, thus we're fine.
+	 * For contexts interrupting us, they either observe our dec or not.
+	 * Just ensure the compiler doesn't reorder this statement, as a
+	 * this_cpu_dec implicitly implied that.
+	 */
+	barrier();
+	node0->count--;
+	return 0;
+release_node_err:
+	barrier();
+	node0->count--;
+	goto release_err;
+release_err:
+	return ret;
+}
+
+/**
+ * arena_spin_lock - acquire a queued spinlock
+ * @lock: Pointer to queued spinlock structure
+ *
+ * On error, returned value will be negative.
+ * On success, zero is returned.
+ *
+ * The return value _must_ be tested against zero for success,
+ * instead of checking it against negative, for passing the
+ * BPF verifier.
+ *
+ * The user should do:
+ *	if (arena_spin_lock(...) != 0) // failure
+ *		or
+ *	if (arena_spin_lock(...) == 0) // success
+ *		or
+ *	if (arena_spin_lock(...)) // failure
+ *		or
+ *	if (!arena_spin_lock(...)) // success
+ * instead of:
+ *	if (arena_spin_lock(...) < 0) // failure
+ *
+ * The return value can still be inspected later.
+ */
+static __always_inline int arena_spin_lock(arena_spinlock_t __arena *lock)
+{
+	int val = 0;
+
+	if (CONFIG_NR_CPUS > 1024)
+		return -EOPNOTSUPP;
+
+	bpf_preempt_disable();
+	if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
+		return 0;
+
+	val = arena_spin_lock_slowpath(lock, val);
+	/* FIXME: bpf_assert_range(-MAX_ERRNO, 0) once we have it working for all cases. */
+	if (val)
+		bpf_preempt_enable();
+	return val;
+}
+
+/**
+ * arena_spin_unlock - release a queued spinlock
+ * @lock : Pointer to queued spinlock structure
+ */
+static __always_inline void arena_spin_unlock(arena_spinlock_t __arena *lock)
+{
+	/*
+	 * unlock() needs release semantics:
+	 */
+	smp_store_release(&lock->locked, 0);
+	bpf_preempt_enable();
+}
+
+#define arena_spin_lock_irqsave(lock, flags)             \
+	({                                               \
+		int __ret;                               \
+		bpf_local_irq_save(&(flags));            \
+		__ret = arena_spin_lock((lock));         \
+		if (__ret)                               \
+			bpf_local_irq_restore(&(flags)); \
+		(__ret);                                 \
+	})
+
+#define arena_spin_unlock_irqrestore(lock, flags) \
+	({                                        \
+		arena_spin_unlock((lock));        \
+		bpf_local_irq_restore(&(flags));  \
+	})
+
+#endif
+
+#endif /* BPF_ARENA_SPIN_LOCK_H */
diff --git a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
index 1654a530aa3d..9af19dfe4e80 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
@@ -22,10 +22,6 @@
 #define TCP_PACING_CA_RATIO (120)
 #define TCP_REORDERING (12)
 
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#define max(a, b) ((a) > (b) ? (a) : (b))
-#define after(seq2, seq1) before(seq1, seq2)
-
 extern void cubictcp_init(struct sock *sk) __ksym;
 extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
 extern __u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym;
@@ -34,11 +30,6 @@ extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
 extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym;
 extern void cubictcp_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
 
-static bool before(__u32 seq1, __u32 seq2)
-{
-	return (__s32)(seq1-seq2) < 0;
-}
-
 static __u64 div64_u64(__u64 dividend, __u64 divisor)
 {
 	return dividend / divisor;
@@ -101,7 +92,7 @@ static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
 	tp->snd_cwnd = pkts_in_flight + sndcnt;
 }
 
-/* Decide wheather to run the increase function of congestion control. */
+/* Decide whether to run the increase function of congestion control. */
 static bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
 {
 	if (tcp_sk(sk)->reordering > TCP_REORDERING)
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index f089faa97ae6..46fb2b37d3a7 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -20,13 +20,6 @@
 char _license[] SEC("license") = "GPL";
 
 #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#define max(a, b) ((a) > (b) ? (a) : (b))
-static bool before(__u32 seq1, __u32 seq2)
-{
-	return (__s32)(seq1-seq2) < 0;
-}
-#define after(seq2, seq1) 	before(seq1, seq2)
 
 extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
 extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 7cd73e75f52a..1cc83140849f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
 
-/* WARNING: This implemenation is not necessarily the same
+/* WARNING: This implementation is not necessarily the same
  * as the tcp_dctcp.c.  The purpose is mainly for testing
  * the kernel BPF logic.
  */
@@ -13,16 +13,10 @@
 #ifndef EBUSY
 #define EBUSY 16
 #endif
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#define max(a, b) ((a) > (b) ? (a) : (b))
 #define min_not_zero(x, y) ({			\
 	typeof(x) __x = (x);			\
 	typeof(y) __y = (y);			\
 	__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
-static bool before(__u32 seq1, __u32 seq2)
-{
-	return (__s32)(seq1-seq2) < 0;
-}
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_gotox.c b/tools/testing/selftests/bpf/progs/bpf_gotox.c
new file mode 100644
index 000000000000..216c71b94c64
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_gotox.c
@@ -0,0 +1,448 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+__u64 in_user;
+__u64 ret_user;
+
+int pid;
+
+/*
+ * Skip all the tests if compiler doesn't support indirect jumps.
+ *
+ * If tests are skipped, then all functions below are compiled as
+ * dummy, such that the skeleton looks the same, and the userspace
+ * program can avoid any checks rather than if data->skip is set.
+ */
+#ifdef __BPF_FEATURE_GOTOX
+__u64 skip SEC(".data") = 0;
+#else
+__u64 skip = 1;
+#endif
+
+struct simple_ctx {
+	__u64 x;
+};
+
+#ifdef __BPF_FEATURE_GOTOX
+__u64 some_var;
+
+/*
+ * This function adds code which will be replaced by a different
+ * number of instructions by the verifier. This adds additional
+ * stress on testing the insn_array maps corresponding to indirect jumps.
+ */
+static __always_inline void adjust_insns(__u64 x)
+{
+	some_var ^= x + bpf_jiffies64();
+}
+
+SEC("syscall")
+int one_switch(struct simple_ctx *ctx)
+{
+	switch (ctx->x) {
+	case 0:
+		adjust_insns(ctx->x + 1);
+		ret_user = 2;
+		break;
+	case 1:
+		adjust_insns(ctx->x + 7);
+		ret_user = 3;
+		break;
+	case 2:
+		adjust_insns(ctx->x + 9);
+		ret_user = 4;
+		break;
+	case 3:
+		adjust_insns(ctx->x + 11);
+		ret_user = 5;
+		break;
+	case 4:
+		adjust_insns(ctx->x + 17);
+		ret_user = 7;
+		break;
+	default:
+		adjust_insns(ctx->x + 177);
+		ret_user = 19;
+		break;
+	}
+
+	return 0;
+}
+
+SEC("syscall")
+int one_switch_non_zero_sec_off(struct simple_ctx *ctx)
+{
+	switch (ctx->x) {
+	case 0:
+		adjust_insns(ctx->x + 1);
+		ret_user = 2;
+		break;
+	case 1:
+		adjust_insns(ctx->x + 7);
+		ret_user = 3;
+		break;
+	case 2:
+		adjust_insns(ctx->x + 9);
+		ret_user = 4;
+		break;
+	case 3:
+		adjust_insns(ctx->x + 11);
+		ret_user = 5;
+		break;
+	case 4:
+		adjust_insns(ctx->x + 17);
+		ret_user = 7;
+		break;
+	default:
+		adjust_insns(ctx->x + 177);
+		ret_user = 19;
+		break;
+	}
+
+	return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int simple_test_other_sec(struct pt_regs *ctx)
+{
+	__u64 x = in_user;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	switch (x) {
+	case 0:
+		adjust_insns(x + 1);
+		ret_user = 2;
+		break;
+	case 1:
+		adjust_insns(x + 7);
+		ret_user = 3;
+		break;
+	case 2:
+		adjust_insns(x + 9);
+		ret_user = 4;
+		break;
+	case 3:
+		adjust_insns(x + 11);
+		ret_user = 5;
+		break;
+	case 4:
+		adjust_insns(x + 17);
+		ret_user = 7;
+		break;
+	default:
+		adjust_insns(x + 177);
+		ret_user = 19;
+		break;
+	}
+
+	return 0;
+}
+
+SEC("syscall")
+int two_switches(struct simple_ctx *ctx)
+{
+	switch (ctx->x) {
+	case 0:
+		adjust_insns(ctx->x + 1);
+		ret_user = 2;
+		break;
+	case 1:
+		adjust_insns(ctx->x + 7);
+		ret_user = 3;
+		break;
+	case 2:
+		adjust_insns(ctx->x + 9);
+		ret_user = 4;
+		break;
+	case 3:
+		adjust_insns(ctx->x + 11);
+		ret_user = 5;
+		break;
+	case 4:
+		adjust_insns(ctx->x + 17);
+		ret_user = 7;
+		break;
+	default:
+		adjust_insns(ctx->x + 177);
+		ret_user = 19;
+		break;
+	}
+
+	switch (ctx->x + !!ret_user) {
+	case 1:
+		adjust_insns(ctx->x + 7);
+		ret_user = 103;
+		break;
+	case 2:
+		adjust_insns(ctx->x + 9);
+		ret_user = 104;
+		break;
+	case 3:
+		adjust_insns(ctx->x + 11);
+		ret_user = 107;
+		break;
+	case 4:
+		adjust_insns(ctx->x + 11);
+		ret_user = 205;
+		break;
+	case 5:
+		adjust_insns(ctx->x + 11);
+		ret_user = 115;
+		break;
+	default:
+		adjust_insns(ctx->x + 177);
+		ret_user = 1019;
+		break;
+	}
+
+	return 0;
+}
+
+SEC("syscall")
+int big_jump_table(struct simple_ctx *ctx __attribute__((unused)))
+{
+	const void *const jt[256] = {
+		[0 ... 255] = &&default_label,
+		[0] = &&l0,
+		[11] = &&l11,
+		[27] = &&l27,
+		[31] = &&l31,
+	};
+
+	goto *jt[ctx->x & 0xff];
+
+l0:
+	adjust_insns(ctx->x + 1);
+	ret_user = 2;
+	return 0;
+
+l11:
+	adjust_insns(ctx->x + 7);
+	ret_user = 3;
+	return 0;
+
+l27:
+	adjust_insns(ctx->x + 9);
+	ret_user = 4;
+	return 0;
+
+l31:
+	adjust_insns(ctx->x + 11);
+	ret_user = 5;
+	return 0;
+
+default_label:
+	adjust_insns(ctx->x + 177);
+	ret_user = 19;
+	return 0;
+}
+
+SEC("syscall")
+int one_jump_two_maps(struct simple_ctx *ctx __attribute__((unused)))
+{
+	__label__ l1, l2, l3, l4;
+	void *jt1[2] = { &&l1, &&l2 };
+	void *jt2[2] = { &&l3, &&l4 };
+	unsigned int a = ctx->x % 2;
+	unsigned int b = (ctx->x / 2) % 2;
+	volatile int ret = 0;
+
+	if (!(a < 2 && b < 2))
+		return 19;
+
+	if (ctx->x % 2)
+		goto *jt1[a];
+	else
+		goto *jt2[b];
+
+	l1: ret += 1;
+	l2: ret += 3;
+	l3: ret += 5;
+	l4: ret += 7;
+
+	ret_user = ret;
+	return ret;
+}
+
+SEC("syscall")
+int one_map_two_jumps(struct simple_ctx *ctx __attribute__((unused)))
+{
+	__label__ l1, l2, l3;
+	void *jt[3] = { &&l1, &&l2, &&l3 };
+	unsigned int a = (ctx->x >> 2) & 1;
+	unsigned int b = (ctx->x >> 3) & 1;
+	volatile int ret = 0;
+
+	if (ctx->x % 2)
+		goto *jt[a];
+
+	if (ctx->x % 3)
+		goto *jt[a + b];
+
+	l1: ret += 3;
+	l2: ret += 5;
+	l3: ret += 7;
+
+	ret_user = ret;
+	return ret;
+}
+
+/* Just to introduce some non-zero offsets in .text */
+static __noinline int f0(volatile struct simple_ctx *ctx __arg_ctx)
+{
+	if (ctx)
+		return 1;
+	else
+		return 13;
+}
+
+SEC("syscall") int f1(struct simple_ctx *ctx)
+{
+	ret_user = 0;
+	return f0(ctx);
+}
+
+static __noinline int __static_global(__u64 x)
+{
+	switch (x) {
+	case 0:
+		adjust_insns(x + 1);
+		ret_user = 2;
+		break;
+	case 1:
+		adjust_insns(x + 7);
+		ret_user = 3;
+		break;
+	case 2:
+		adjust_insns(x + 9);
+		ret_user = 4;
+		break;
+	case 3:
+		adjust_insns(x + 11);
+		ret_user = 5;
+		break;
+	case 4:
+		adjust_insns(x + 17);
+		ret_user = 7;
+		break;
+	default:
+		adjust_insns(x + 177);
+		ret_user = 19;
+		break;
+	}
+
+	return 0;
+}
+
+SEC("syscall")
+int use_static_global1(struct simple_ctx *ctx)
+{
+	ret_user = 0;
+	return __static_global(ctx->x);
+}
+
+SEC("syscall")
+int use_static_global2(struct simple_ctx *ctx)
+{
+	ret_user = 0;
+	adjust_insns(ctx->x + 1);
+	return __static_global(ctx->x);
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int use_static_global_other_sec(void *ctx)
+{
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	return __static_global(in_user);
+}
+
+__noinline int __nonstatic_global(__u64 x)
+{
+	switch (x) {
+	case 0:
+		adjust_insns(x + 1);
+		ret_user = 2;
+		break;
+	case 1:
+		adjust_insns(x + 7);
+		ret_user = 3;
+		break;
+	case 2:
+		adjust_insns(x + 9);
+		ret_user = 4;
+		break;
+	case 3:
+		adjust_insns(x + 11);
+		ret_user = 5;
+		break;
+	case 4:
+		adjust_insns(x + 17);
+		ret_user = 7;
+		break;
+	default:
+		adjust_insns(x + 177);
+		ret_user = 19;
+		break;
+	}
+
+	return 0;
+}
+
+SEC("syscall")
+int use_nonstatic_global1(struct simple_ctx *ctx)
+{
+	ret_user = 0;
+	return __nonstatic_global(ctx->x);
+}
+
+SEC("syscall")
+int use_nonstatic_global2(struct simple_ctx *ctx)
+{
+	ret_user = 0;
+	adjust_insns(ctx->x + 1);
+	return __nonstatic_global(ctx->x);
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int use_nonstatic_global_other_sec(void *ctx)
+{
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	return __nonstatic_global(in_user);
+}
+
+#else /* __BPF_FEATURE_GOTOX */
+
+#define SKIP_TEST(TEST_NAME)				\
+	SEC("syscall") int TEST_NAME(void *ctx)		\
+	{						\
+		return 0;				\
+	}
+
+SKIP_TEST(one_switch);
+SKIP_TEST(one_switch_non_zero_sec_off);
+SKIP_TEST(simple_test_other_sec);
+SKIP_TEST(two_switches);
+SKIP_TEST(big_jump_table);
+SKIP_TEST(one_jump_two_maps);
+SKIP_TEST(one_map_two_jumps);
+SKIP_TEST(use_static_global1);
+SKIP_TEST(use_static_global2);
+SKIP_TEST(use_static_global_other_sec);
+SKIP_TEST(use_nonstatic_global1);
+SKIP_TEST(use_nonstatic_global2);
+SKIP_TEST(use_nonstatic_global_other_sec);
+
+#endif /* __BPF_FEATURE_GOTOX */
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c
new file mode 100644
index 000000000000..2f20485e0de3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_map_elem.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 value_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_map_values(struct bpf_iter__bpf_map_elem *ctx)
+{
+	__u32 value = 0;
+
+	if (ctx->value == (void *)0)
+		return 0;
+
+	bpf_probe_read_kernel(&value, sizeof(value), ctx->value);
+	value_sum += value;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
index 774d4dbe8189..a8aa5a71d846 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
@@ -18,23 +18,10 @@
 
 unsigned short reuse_listen_hport = 0;
 unsigned short listen_hport = 0;
-char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic";
+const char cubic_cc[] = "bpf_cubic";
 char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp";
 bool random_retry = false;
 
-static bool tcp_cc_eq(const char *a, const char *b)
-{
-	int i;
-
-	for (i = 0; i < TCP_CA_NAME_MAX; i++) {
-		if (a[i] != b[i])
-			return false;
-		if (!a[i])
-			break;
-	}
-
-	return true;
-}
 
 SEC("iter/tcp")
 int change_tcp_cc(struct bpf_iter__tcp *ctx)
@@ -58,7 +45,7 @@ int change_tcp_cc(struct bpf_iter__tcp *ctx)
 			   cur_cc, sizeof(cur_cc)))
 		return 0;
 
-	if (!tcp_cc_eq(cur_cc, cubic_cc))
+	if (bpf_strncmp(cur_cc, TCP_CA_NAME_MAX, cubic_cc))
 		return 0;
 
 	if (random_retry && bpf_get_prandom_u32() % 4 == 1)
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
index bc10c4e4b4fa..966ee5a7b066 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
@@ -9,6 +9,13 @@ char _license[] SEC("license") = "GPL";
 uint32_t tid = 0;
 int num_unknown_tid = 0;
 int num_known_tid = 0;
+void *user_ptr = 0;
+void *user_ptr_long = 0;
+uint32_t pid = 0;
+
+static char big_str1[5000];
+static char big_str2[5005];
+static char big_str3[4996];
 
 SEC("iter/task")
 int dump_task(struct bpf_iter__task *ctx)
@@ -35,7 +42,9 @@ int dump_task(struct bpf_iter__task *ctx)
 }
 
 int num_expected_failure_copy_from_user_task = 0;
+int num_expected_failure_copy_from_user_task_str = 0;
 int num_success_copy_from_user_task = 0;
+int num_success_copy_from_user_task_str = 0;
 
 SEC("iter.s/task")
 int dump_task_sleepable(struct bpf_iter__task *ctx)
@@ -44,6 +53,9 @@ int dump_task_sleepable(struct bpf_iter__task *ctx)
 	struct task_struct *task = ctx->task;
 	static const char info[] = "    === END ===";
 	struct pt_regs *regs;
+	char task_str1[10] = "aaaaaaaaaa";
+	char task_str2[10], task_str3[10];
+	char task_str4[20] = "aaaaaaaaaaaaaaaaaaaa";
 	void *ptr;
 	uint32_t user_data = 0;
 	int ret;
@@ -78,8 +90,106 @@ int dump_task_sleepable(struct bpf_iter__task *ctx)
 		BPF_SEQ_PRINTF(seq, "%s\n", info);
 		return 0;
 	}
+
 	++num_success_copy_from_user_task;
 
+	/* Read an invalid pointer and ensure we get an error */
+	ptr = NULL;
+	ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1), ptr, task, 0);
+	if (ret >= 0 || task_str1[9] != 'a' || task_str1[0] != '\0') {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	/* Read an invalid pointer and ensure we get error with pad zeros flag */
+	ptr = NULL;
+	ret = bpf_copy_from_user_task_str((char *)task_str1, sizeof(task_str1),
+					  ptr, task, BPF_F_PAD_ZEROS);
+	if (ret >= 0 || task_str1[9] != '\0' || task_str1[0] != '\0') {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	++num_expected_failure_copy_from_user_task_str;
+
+	/* Same length as the string */
+	ret = bpf_copy_from_user_task_str((char *)task_str2, 10, user_ptr, task, 0);
+	/* only need to do the task pid check once */
+	if (bpf_strncmp(task_str2, 10, "test_data\0") != 0 || ret != 10 || task->tgid != pid) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	/* Shorter length than the string */
+	ret = bpf_copy_from_user_task_str((char *)task_str3, 2, user_ptr, task, 0);
+	if (bpf_strncmp(task_str3, 2, "t\0") != 0 || ret != 2) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	/* Longer length than the string */
+	ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, 0);
+	if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10
+	    || task_str4[sizeof(task_str4) - 1] != 'a') {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	/* Longer length than the string with pad zeros flag */
+	ret = bpf_copy_from_user_task_str((char *)task_str4, 20, user_ptr, task, BPF_F_PAD_ZEROS);
+	if (bpf_strncmp(task_str4, 10, "test_data\0") != 0 || ret != 10
+	    || task_str4[sizeof(task_str4) - 1] != '\0') {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	/* Longer length than the string past a page boundary */
+	ret = bpf_copy_from_user_task_str(big_str1, 5000, user_ptr, task, 0);
+	if (bpf_strncmp(big_str1, 10, "test_data\0") != 0 || ret != 10) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	/* String that crosses a page boundary */
+	ret = bpf_copy_from_user_task_str(big_str1, 5000, user_ptr_long, task, BPF_F_PAD_ZEROS);
+	if (bpf_strncmp(big_str1, 4, "baba") != 0 || ret != 5000
+	    || bpf_strncmp(big_str1 + 4996, 4, "bab\0") != 0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	for (int i = 0; i < 4999; ++i) {
+		if (i % 2 == 0) {
+			if (big_str1[i] != 'b') {
+				BPF_SEQ_PRINTF(seq, "%s\n", info);
+				return 0;
+			}
+		} else {
+			if (big_str1[i] != 'a') {
+				BPF_SEQ_PRINTF(seq, "%s\n", info);
+				return 0;
+			}
+		}
+	}
+
+	/* Longer length than the string that crosses a page boundary */
+	ret = bpf_copy_from_user_task_str(big_str2, 5005, user_ptr_long, task, BPF_F_PAD_ZEROS);
+	if (bpf_strncmp(big_str2, 4, "baba") != 0 || ret != 5000
+	    || bpf_strncmp(big_str2 + 4996, 5, "bab\0\0") != 0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	/* Shorter length than the string that crosses a page boundary */
+	ret = bpf_copy_from_user_task_str(big_str3, 4996, user_ptr_long, task, 0);
+	if (bpf_strncmp(big_str3, 4, "baba") != 0 || ret != 4996
+	    || bpf_strncmp(big_str3 + 4992, 4, "bab\0") != 0) {
+		BPF_SEQ_PRINTF(seq, "%s\n", info);
+		return 0;
+	}
+
+	++num_success_copy_from_user_task_str;
+
 	if (ctx->meta->seq_num == 0)
 		BPF_SEQ_PRINTF(seq, "    tgid      gid     data\n");
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
index d22449c69363..b1e509b231cd 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -99,13 +99,13 @@ static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 		timer_active = 1;
-		timer_expires = icsk->icsk_timeout;
+		timer_expires = sp->tcp_retransmit_timer.expires;
 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
 		timer_active = 4;
-		timer_expires = icsk->icsk_timeout;
-	} else if (timer_pending(&sp->sk_timer)) {
+		timer_expires = sp->tcp_retransmit_timer.expires;
+	} else if (timer_pending(&icsk->icsk_keepalive_timer)) {
 		timer_active = 2;
-		timer_expires = sp->sk_timer.expires;
+		timer_expires = icsk->icsk_keepalive_timer.expires;
 	} else {
 		timer_active = 0;
 		timer_expires = bpf_jiffies64();
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
index 8b072666f9d9..dbc7166aee91 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -99,13 +99,13 @@ static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
 	    icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
 	    icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
 		timer_active = 1;
-		timer_expires = icsk->icsk_timeout;
+		timer_expires = sp->tcp_retransmit_timer.expires;
 	} else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
 		timer_active = 4;
-		timer_expires = icsk->icsk_timeout;
-	} else if (timer_pending(&sp->sk_timer)) {
+		timer_expires = sp->tcp_retransmit_timer.expires;
+	} else if (timer_pending(&icsk->icsk_keepalive_timer)) {
 		timer_active = 2;
-		timer_expires = sp->sk_timer.expires;
+		timer_expires = icsk->icsk_keepalive_timer.expires;
 	} else {
 		timer_active = 0;
 		timer_expires = bpf_jiffies64();
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
index ffbd4b116d17..23b2aa2604de 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -64,7 +64,8 @@ int dump_udp4(struct bpf_iter__udp *ctx)
 		       0, 0L, 0, ctx->uid, 0,
 		       sock_i_ino(&inet->sk),
 		       inet->sk.sk_refcnt.refs.counter, udp_sk,
-		       inet->sk.sk_drops.counter);
+		       udp_sk->drop_counters.drops0.counter +
+		       udp_sk->drop_counters.drops1.counter);
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
index 47ff7754f4fd..c48b05aa2a4b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -72,7 +72,7 @@ int dump_udp6(struct bpf_iter__udp *ctx)
 		       0, 0L, 0, ctx->uid, 0,
 		       sock_i_ino(&inet->sk),
 		       inet->sk.sk_refcnt.refs.counter, udp_sk,
-		       inet->sk.sk_drops.counter);
-
+		       udp_sk->drop_counters.drops0.counter +
+		       udp_sk->drop_counters.drops1.counter);
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index eccaf955e394..c9bfbe1bafc1 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -5,6 +5,10 @@
 #define XSTR(s) STR(s)
 #define STR(s) #s
 
+/* Expand a macro and then stringize the expansion */
+#define QUOTE(str) #str
+#define EXPAND_QUOTE(str) QUOTE(str)
+
 /* This set of attributes controls behavior of the
  * test_loader.c:test_loader__run_subtests().
  *
@@ -29,7 +33,20 @@
  *                   e.g. "foo{{[0-9]+}}"  matches strings like "foo007".
  *                   Extended POSIX regular expression syntax is allowed
  *                   inside the brackets.
+ * __not_msg         Message not expected to be found in verifier log.
+ *                   If __msg_not is situated between __msg tags
+ *                   framework matches __msg tags first, and then
+ *                   checks that __msg_not is not present in a portion of
+ *                   a log between bracketing __msg tags.
+ *                   Same regex syntax as for __msg is supported.
  * __msg_unpriv      Same as __msg but for unprivileged mode.
+ * __not_msg_unpriv  Same as __not_msg but for unprivileged mode.
+ *
+ * __stderr          Message expected to be found in bpf stderr stream. The
+ *                   same regex rules apply like __msg.
+ * __stderr_unpriv   Same as __stderr but for unpriveleged mode.
+ * __stdout          Same as __stderr but for stdout stream.
+ * __stdout_unpriv   Same as __stdout but for unpriveleged mode.
  *
  * __xlated          Expect a line in a disassembly log after verifier applies rewrites.
  *                   Multiple __xlated attributes could be specified.
@@ -79,9 +96,11 @@
  *                   expect return value to match passed parameter:
  *                   - a decimal number
  *                   - a hexadecimal number, when starts from 0x
- *                   - literal INT_MIN
- *                   - literal POINTER_VALUE (see definition below)
- *                   - literal TEST_DATA_LEN (see definition below)
+ *                   - a macro which expands to one of the above
+ *                   - literal _INT_MIN (expands to INT_MIN)
+ *                   In addition, two special macros are defined below:
+ *                   - POINTER_VALUE
+ *                   - TEST_DATA_LEN
  * __retval_unpriv   Same, but load program in unprivileged mode.
  *
  * __description     Text to be used instead of a program name for display
@@ -106,22 +125,28 @@
  * __arch_*          Specify on which architecture the test case should be tested.
  *                   Several __arch_* annotations could be specified at once.
  *                   When test case is not run on current arch it is marked as skipped.
+ * __caps_unpriv     Specify the capabilities that should be set when running the test.
+ *
+ * __linear_size     Specify the size of the linear area of non-linear skbs, or
+ *                   0 for linear skbs.
  */
 #define __msg(msg)		__attribute__((btf_decl_tag("comment:test_expect_msg=" XSTR(__COUNTER__) "=" msg)))
+#define __not_msg(msg)		__attribute__((btf_decl_tag("comment:test_expect_not_msg=" XSTR(__COUNTER__) "=" msg)))
 #define __xlated(msg)		__attribute__((btf_decl_tag("comment:test_expect_xlated=" XSTR(__COUNTER__) "=" msg)))
 #define __jited(msg)		__attribute__((btf_decl_tag("comment:test_jited=" XSTR(__COUNTER__) "=" msg)))
 #define __failure		__attribute__((btf_decl_tag("comment:test_expect_failure")))
 #define __success		__attribute__((btf_decl_tag("comment:test_expect_success")))
 #define __description(desc)	__attribute__((btf_decl_tag("comment:test_description=" desc)))
 #define __msg_unpriv(msg)	__attribute__((btf_decl_tag("comment:test_expect_msg_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __not_msg_unpriv(msg)	__attribute__((btf_decl_tag("comment:test_expect_not_msg_unpriv=" XSTR(__COUNTER__) "=" msg)))
 #define __xlated_unpriv(msg)	__attribute__((btf_decl_tag("comment:test_expect_xlated_unpriv=" XSTR(__COUNTER__) "=" msg)))
 #define __jited_unpriv(msg)	__attribute__((btf_decl_tag("comment:test_jited=" XSTR(__COUNTER__) "=" msg)))
 #define __failure_unpriv	__attribute__((btf_decl_tag("comment:test_expect_failure_unpriv")))
 #define __success_unpriv	__attribute__((btf_decl_tag("comment:test_expect_success_unpriv")))
 #define __log_level(lvl)	__attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
 #define __flag(flag)		__attribute__((btf_decl_tag("comment:test_prog_flags="#flag)))
-#define __retval(val)		__attribute__((btf_decl_tag("comment:test_retval="#val)))
-#define __retval_unpriv(val)	__attribute__((btf_decl_tag("comment:test_retval_unpriv="#val)))
+#define __retval(val)		__attribute__((btf_decl_tag("comment:test_retval="XSTR(val))))
+#define __retval_unpriv(val)	__attribute__((btf_decl_tag("comment:test_retval_unpriv="XSTR(val))))
 #define __auxiliary		__attribute__((btf_decl_tag("comment:test_auxiliary")))
 #define __auxiliary_unpriv	__attribute__((btf_decl_tag("comment:test_auxiliary_unpriv")))
 #define __btf_path(path)	__attribute__((btf_decl_tag("comment:test_btf_path=" path)))
@@ -129,6 +154,21 @@
 #define __arch_x86_64		__arch("X86_64")
 #define __arch_arm64		__arch("ARM64")
 #define __arch_riscv64		__arch("RISCV64")
+#define __arch_s390x		__arch("s390x")
+#define __caps_unpriv(caps)	__attribute__((btf_decl_tag("comment:test_caps_unpriv=" EXPAND_QUOTE(caps))))
+#define __load_if_JITed()	__attribute__((btf_decl_tag("comment:load_mode=jited")))
+#define __load_if_no_JITed()	__attribute__((btf_decl_tag("comment:load_mode=no_jited")))
+#define __stderr(msg)		__attribute__((btf_decl_tag("comment:test_expect_stderr=" XSTR(__COUNTER__) "=" msg)))
+#define __stderr_unpriv(msg)	__attribute__((btf_decl_tag("comment:test_expect_stderr_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __stdout(msg)		__attribute__((btf_decl_tag("comment:test_expect_stdout=" XSTR(__COUNTER__) "=" msg)))
+#define __stdout_unpriv(msg)	__attribute__((btf_decl_tag("comment:test_expect_stdout_unpriv=" XSTR(__COUNTER__) "=" msg)))
+#define __linear_size(sz)	__attribute__((btf_decl_tag("comment:test_linear_size=" XSTR(sz))))
+
+/* Define common capabilities tested using __caps_unpriv */
+#define CAP_NET_ADMIN		12
+#define CAP_SYS_ADMIN		21
+#define CAP_PERFMON		38
+#define CAP_BPF			39
 
 /* Convenience macro for use with 'asm volatile' blocks */
 #define __naked __attribute__((naked))
@@ -140,8 +180,12 @@
 #define __imm_ptr(name) [name]"r"(&name)
 #define __imm_insn(name, expr) [name]"i"(*(long *)&(expr))
 
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#define offsetofend(TYPE, MEMBER) \
+	(offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
+
 /* Magic constants used with __retval() */
-#define POINTER_VALUE	0xcafe4all
+#define POINTER_VALUE	0xbadcafe
 #define TEST_DATA_LEN	64
 
 #ifndef __used
@@ -160,6 +204,9 @@
 #elif defined(__TARGET_ARCH_riscv)
 #define SYSCALL_WRAPPER 1
 #define SYS_PREFIX "__riscv_"
+#elif defined(__TARGET_ARCH_powerpc)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX ""
 #else
 #define SYSCALL_WRAPPER 0
 #define SYS_PREFIX "__se_"
@@ -196,4 +243,31 @@
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #endif
 
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) ||	\
+     (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) ||		\
+     defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) ||	\
+     defined(__TARGET_ARCH_loongarch)) &&				\
+	__clang_major__ >= 18
+#define CAN_USE_GOTOL
+#endif
+
+#if __clang_major__ >= 18
+#define CAN_USE_BPF_ST
+#endif
+
+#if __clang_major__ >= 18 && defined(ENABLE_ATOMICS_TESTS) &&		\
+	(defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) ||	\
+	 (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) || \
+	  (defined(__TARGET_ARCH_powerpc))
+#define CAN_USE_LOAD_ACQ_STORE_REL
+#endif
+
+#if defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)
+#define SPEC_V1
+#endif
+
+#if defined(__TARGET_ARCH_x86)
+#define SPEC_V4
+#endif
+
 #endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
new file mode 100644
index 000000000000..3754f581b328
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BPF_QDISC_COMMON_H
+#define _BPF_QDISC_COMMON_H
+
+#define NET_XMIT_SUCCESS        0x00
+#define NET_XMIT_DROP           0x01    /* skb dropped                  */
+#define NET_XMIT_CN             0x02    /* congestion notification      */
+
+#define TC_PRIO_CONTROL  7
+#define TC_PRIO_MAX      15
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+
+struct bpf_sk_buff_ptr;
+
+static struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb)
+{
+	return (struct qdisc_skb_cb *)skb->cb;
+}
+
+static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb)
+{
+	return qdisc_skb_cb(skb)->pkt_len;
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c
new file mode 100644
index 000000000000..f188062ed730
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops")
+int BPF_PROG(bpf_qdisc_test_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+	     struct bpf_sk_buff_ptr *to_free)
+{
+	bpf_qdisc_skb_drop(skb, to_free);
+	return NET_XMIT_DROP;
+}
+
+SEC("struct_ops")
+struct sk_buff *BPF_PROG(bpf_qdisc_test_dequeue, struct Qdisc *sch)
+{
+	return NULL;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_qdisc_test_reset, struct Qdisc *sch)
+{
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_qdisc_test_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops test = {
+	.enqueue   = (void *)bpf_qdisc_test_enqueue,
+	.dequeue   = (void *)bpf_qdisc_test_dequeue,
+	.reset     = (void *)bpf_qdisc_test_reset,
+	.destroy   = (void *)bpf_qdisc_test_destroy,
+	.id        = "bpf_qdisc_test",
+};
+
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
new file mode 100644
index 000000000000..1de2be3e370b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct skb_node {
+	struct sk_buff __kptr * skb;
+	struct bpf_list_node node;
+};
+
+private(A) struct bpf_spin_lock q_fifo_lock;
+private(A) struct bpf_list_head q_fifo __contains(skb_node, node);
+
+bool init_called;
+
+SEC("struct_ops/bpf_fifo_enqueue")
+int BPF_PROG(bpf_fifo_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+	     struct bpf_sk_buff_ptr *to_free)
+{
+	struct skb_node *skbn;
+	u32 pkt_len;
+
+	if (sch->q.qlen == sch->limit)
+		goto drop;
+
+	skbn = bpf_obj_new(typeof(*skbn));
+	if (!skbn)
+		goto drop;
+
+	pkt_len = qdisc_pkt_len(skb);
+
+	sch->q.qlen++;
+	skb = bpf_kptr_xchg(&skbn->skb, skb);
+	if (skb)
+		bpf_qdisc_skb_drop(skb, to_free);
+
+	bpf_spin_lock(&q_fifo_lock);
+	bpf_list_push_back(&q_fifo, &skbn->node);
+	bpf_spin_unlock(&q_fifo_lock);
+
+	sch->qstats.backlog += pkt_len;
+	return NET_XMIT_SUCCESS;
+drop:
+	bpf_qdisc_skb_drop(skb, to_free);
+	return NET_XMIT_DROP;
+}
+
+SEC("struct_ops/bpf_fifo_dequeue")
+struct sk_buff *BPF_PROG(bpf_fifo_dequeue, struct Qdisc *sch)
+{
+	struct bpf_list_node *node;
+	struct sk_buff *skb = NULL;
+	struct skb_node *skbn;
+
+	bpf_spin_lock(&q_fifo_lock);
+	node = bpf_list_pop_front(&q_fifo);
+	bpf_spin_unlock(&q_fifo_lock);
+	if (!node)
+		return NULL;
+
+	skbn = container_of(node, struct skb_node, node);
+	skb = bpf_kptr_xchg(&skbn->skb, skb);
+	bpf_obj_drop(skbn);
+	if (!skb)
+		return NULL;
+
+	sch->qstats.backlog -= qdisc_pkt_len(skb);
+	bpf_qdisc_bstats_update(sch, skb);
+	sch->q.qlen--;
+
+	return skb;
+}
+
+SEC("struct_ops/bpf_fifo_init")
+int BPF_PROG(bpf_fifo_init, struct Qdisc *sch, struct nlattr *opt,
+	     struct netlink_ext_ack *extack)
+{
+	sch->limit = 1000;
+	init_called = true;
+	return 0;
+}
+
+SEC("struct_ops/bpf_fifo_reset")
+void BPF_PROG(bpf_fifo_reset, struct Qdisc *sch)
+{
+	struct bpf_list_node *node;
+	struct skb_node *skbn;
+	int i;
+
+	bpf_for(i, 0, sch->q.qlen) {
+		struct sk_buff *skb = NULL;
+
+		bpf_spin_lock(&q_fifo_lock);
+		node = bpf_list_pop_front(&q_fifo);
+		bpf_spin_unlock(&q_fifo_lock);
+
+		if (!node)
+			break;
+
+		skbn = container_of(node, struct skb_node, node);
+		skb = bpf_kptr_xchg(&skbn->skb, skb);
+		if (skb)
+			bpf_kfree_skb(skb);
+		bpf_obj_drop(skbn);
+	}
+	sch->q.qlen = 0;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_fifo_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops fifo = {
+	.enqueue   = (void *)bpf_fifo_enqueue,
+	.dequeue   = (void *)bpf_fifo_dequeue,
+	.init      = (void *)bpf_fifo_init,
+	.reset     = (void *)bpf_fifo_reset,
+	.destroy   = (void *)bpf_fifo_destroy,
+	.id        = "bpf_fifo",
+};
+
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c
new file mode 100644
index 000000000000..1a3233a275c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c
@@ -0,0 +1,756 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* bpf_fq is intended for testing the bpf qdisc infrastructure and not a direct
+ * copy of sch_fq. bpf_fq implements the scheduling algorithm of sch_fq before
+ * 29f834aa326e ("net_sched: sch_fq: add 3 bands and WRR scheduling") was
+ * introduced. It gives each flow a fair chance to transmit packets in a
+ * round-robin fashion. Note that for flow pacing, bpf_fq currently only
+ * respects skb->tstamp but not skb->sk->sk_pacing_rate. In addition, if there
+ * are multiple bpf_fq instances, they will have a shared view of flows and
+ * configuration since some key data structure such as fq_prio_flows,
+ * fq_nonprio_flows, and fq_bpf_data are global.
+ *
+ * To use bpf_fq alone without running selftests, use the following commands.
+ *
+ * 1. Register bpf_fq to the kernel
+ *     bpftool struct_ops register bpf_qdisc_fq.bpf.o /sys/fs/bpf
+ * 2. Add bpf_fq to an interface
+ *     tc qdisc add dev <interface name> root handle <handle> bpf_fq
+ * 3. Delete bpf_fq attached to the interface
+ *     tc qdisc delete dev <interface name> root
+ * 4. Unregister bpf_fq
+ *     bpftool struct_ops unregister name fq
+ *
+ * The qdisc name, bpf_fq, used in tc commands is defined by Qdisc_ops.id.
+ * The struct_ops_map_name, fq, used in the bpftool command is the name of the
+ * Qdisc_ops.
+ *
+ * SEC(".struct_ops")
+ * struct Qdisc_ops fq = {
+ *         ...
+ *         .id        = "bpf_fq",
+ * };
+ */
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define NSEC_PER_USEC 1000L
+#define NSEC_PER_SEC 1000000000L
+
+#define NUM_QUEUE (1 << 20)
+
+struct fq_bpf_data {
+	u32 quantum;
+	u32 initial_quantum;
+	u32 flow_refill_delay;
+	u32 flow_plimit;
+	u64 horizon;
+	u32 orphan_mask;
+	u32 timer_slack;
+	u64 time_next_delayed_flow;
+	u64 unthrottle_latency_ns;
+	u8 horizon_drop;
+	u32 new_flow_cnt;
+	u32 old_flow_cnt;
+	u64 ktime_cache;
+};
+
+enum {
+	CLS_RET_PRIO	= 0,
+	CLS_RET_NONPRIO = 1,
+	CLS_RET_ERR	= 2,
+};
+
+struct skb_node {
+	u64 tstamp;
+	struct sk_buff __kptr * skb;
+	struct bpf_rb_node node;
+};
+
+struct fq_flow_node {
+	int credit;
+	u32 qlen;
+	u64 age;
+	u64 time_next_packet;
+	struct bpf_list_node list_node;
+	struct bpf_rb_node rb_node;
+	struct bpf_rb_root queue __contains(skb_node, node);
+	struct bpf_spin_lock lock;
+	struct bpf_refcount refcount;
+};
+
+struct dequeue_nonprio_ctx {
+	bool stop_iter;
+	u64 expire;
+	u64 now;
+};
+
+struct remove_flows_ctx {
+	bool gc_only;
+	u32 reset_cnt;
+	u32 reset_max;
+};
+
+struct unset_throttled_flows_ctx {
+	bool unset_all;
+	u64 now;
+};
+
+struct fq_stashed_flow {
+	struct fq_flow_node __kptr * flow;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, __u64);
+	__type(value, struct fq_stashed_flow);
+	__uint(max_entries, NUM_QUEUE);
+} fq_nonprio_flows SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, __u64);
+	__type(value, struct fq_stashed_flow);
+	__uint(max_entries, 1);
+} fq_prio_flows SEC(".maps");
+
+private(A) struct bpf_spin_lock fq_delayed_lock;
+private(A) struct bpf_rb_root fq_delayed __contains(fq_flow_node, rb_node);
+
+private(B) struct bpf_spin_lock fq_new_flows_lock;
+private(B) struct bpf_list_head fq_new_flows __contains(fq_flow_node, list_node);
+
+private(C) struct bpf_spin_lock fq_old_flows_lock;
+private(C) struct bpf_list_head fq_old_flows __contains(fq_flow_node, list_node);
+
+private(D) struct fq_bpf_data q;
+
+/* Wrapper for bpf_kptr_xchg that expects NULL dst */
+static void bpf_kptr_xchg_back(void *map_val, void *ptr)
+{
+	void *ret;
+
+	ret = bpf_kptr_xchg(map_val, ptr);
+	if (ret)
+		bpf_obj_drop(ret);
+}
+
+static bool skbn_tstamp_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+	struct skb_node *skbn_a;
+	struct skb_node *skbn_b;
+
+	skbn_a = container_of(a, struct skb_node, node);
+	skbn_b = container_of(b, struct skb_node, node);
+
+	return skbn_a->tstamp < skbn_b->tstamp;
+}
+
+static bool fn_time_next_packet_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+	struct fq_flow_node *flow_a;
+	struct fq_flow_node *flow_b;
+
+	flow_a = container_of(a, struct fq_flow_node, rb_node);
+	flow_b = container_of(b, struct fq_flow_node, rb_node);
+
+	return flow_a->time_next_packet < flow_b->time_next_packet;
+}
+
+static void
+fq_flows_add_head(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+		  struct fq_flow_node *flow, u32 *flow_cnt)
+{
+	bpf_spin_lock(lock);
+	bpf_list_push_front(head, &flow->list_node);
+	bpf_spin_unlock(lock);
+	*flow_cnt += 1;
+}
+
+static void
+fq_flows_add_tail(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+		  struct fq_flow_node *flow, u32 *flow_cnt)
+{
+	bpf_spin_lock(lock);
+	bpf_list_push_back(head, &flow->list_node);
+	bpf_spin_unlock(lock);
+	*flow_cnt += 1;
+}
+
+static void
+fq_flows_remove_front(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+		      struct bpf_list_node **node, u32 *flow_cnt)
+{
+	bpf_spin_lock(lock);
+	*node = bpf_list_pop_front(head);
+	bpf_spin_unlock(lock);
+	*flow_cnt -= 1;
+}
+
+static bool
+fq_flows_is_empty(struct bpf_list_head *head, struct bpf_spin_lock *lock)
+{
+	struct bpf_list_node *node;
+
+	bpf_spin_lock(lock);
+	node = bpf_list_pop_front(head);
+	if (node) {
+		bpf_list_push_front(head, node);
+		bpf_spin_unlock(lock);
+		return false;
+	}
+	bpf_spin_unlock(lock);
+
+	return true;
+}
+
+/* flow->age is used to denote the state of the flow (not-detached, detached, throttled)
+ * as well as the timestamp when the flow is detached.
+ *
+ * 0: not-detached
+ * 1 - (~0ULL-1): detached
+ * ~0ULL: throttled
+ */
+static void fq_flow_set_detached(struct fq_flow_node *flow)
+{
+	flow->age = bpf_jiffies64();
+}
+
+static bool fq_flow_is_detached(struct fq_flow_node *flow)
+{
+	return flow->age != 0 && flow->age != ~0ULL;
+}
+
+static bool sk_listener(struct sock *sk)
+{
+	return (1 << sk->__sk_common.skc_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
+}
+
+static void fq_gc(void);
+
+static int fq_new_flow(void *flow_map, struct fq_stashed_flow **sflow, u64 hash)
+{
+	struct fq_stashed_flow tmp = {};
+	struct fq_flow_node *flow;
+	int ret;
+
+	flow = bpf_obj_new(typeof(*flow));
+	if (!flow)
+		return -ENOMEM;
+
+	flow->credit = q.initial_quantum,
+	flow->qlen = 0,
+	flow->age = 1,
+	flow->time_next_packet = 0,
+
+	ret = bpf_map_update_elem(flow_map, &hash, &tmp, 0);
+	if (ret == -ENOMEM || ret == -E2BIG) {
+		fq_gc();
+		bpf_map_update_elem(&fq_nonprio_flows, &hash, &tmp, 0);
+	}
+
+	*sflow = bpf_map_lookup_elem(flow_map, &hash);
+	if (!*sflow) {
+		bpf_obj_drop(flow);
+		return -ENOMEM;
+	}
+
+	bpf_kptr_xchg_back(&(*sflow)->flow, flow);
+	return 0;
+}
+
+static int
+fq_classify(struct sk_buff *skb, struct fq_stashed_flow **sflow)
+{
+	struct sock *sk = skb->sk;
+	int ret = CLS_RET_NONPRIO;
+	u64 hash = 0;
+
+	if ((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL) {
+		*sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash);
+		ret = CLS_RET_PRIO;
+	} else {
+		if (!sk || sk_listener(sk)) {
+			hash = bpf_skb_get_hash(skb) & q.orphan_mask;
+			/* Avoid collision with an existing flow hash, which
+			 * only uses the lower 32 bits of hash, by setting the
+			 * upper half of hash to 1.
+			 */
+			hash |= (1ULL << 32);
+		} else if (sk->__sk_common.skc_state == TCP_CLOSE) {
+			hash = bpf_skb_get_hash(skb) & q.orphan_mask;
+			hash |= (1ULL << 32);
+		} else {
+			hash = sk->__sk_common.skc_hash;
+		}
+		*sflow = bpf_map_lookup_elem(&fq_nonprio_flows, &hash);
+	}
+
+	if (!*sflow)
+		ret = fq_new_flow(&fq_nonprio_flows, sflow, hash) < 0 ?
+		      CLS_RET_ERR : CLS_RET_NONPRIO;
+
+	return ret;
+}
+
+static bool fq_packet_beyond_horizon(struct sk_buff *skb)
+{
+	return (s64)skb->tstamp > (s64)(q.ktime_cache + q.horizon);
+}
+
+SEC("struct_ops/bpf_fq_enqueue")
+int BPF_PROG(bpf_fq_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+	     struct bpf_sk_buff_ptr *to_free)
+{
+	struct fq_flow_node *flow = NULL, *flow_copy;
+	struct fq_stashed_flow *sflow;
+	u64 time_to_send, jiffies;
+	struct skb_node *skbn;
+	int ret;
+
+	if (sch->q.qlen >= sch->limit)
+		goto drop;
+
+	if (!skb->tstamp) {
+		time_to_send = q.ktime_cache = bpf_ktime_get_ns();
+	} else {
+		if (fq_packet_beyond_horizon(skb)) {
+			q.ktime_cache = bpf_ktime_get_ns();
+			if (fq_packet_beyond_horizon(skb)) {
+				if (q.horizon_drop)
+					goto drop;
+
+				skb->tstamp = q.ktime_cache + q.horizon;
+			}
+		}
+		time_to_send = skb->tstamp;
+	}
+
+	ret = fq_classify(skb, &sflow);
+	if (ret == CLS_RET_ERR)
+		goto drop;
+
+	flow = bpf_kptr_xchg(&sflow->flow, flow);
+	if (!flow)
+		goto drop;
+
+	if (ret == CLS_RET_NONPRIO) {
+		if (flow->qlen >= q.flow_plimit) {
+			bpf_kptr_xchg_back(&sflow->flow, flow);
+			goto drop;
+		}
+
+		if (fq_flow_is_detached(flow)) {
+			flow_copy = bpf_refcount_acquire(flow);
+
+			jiffies = bpf_jiffies64();
+			if ((s64)(jiffies - (flow_copy->age + q.flow_refill_delay)) > 0) {
+				if (flow_copy->credit < q.quantum)
+					flow_copy->credit = q.quantum;
+			}
+			flow_copy->age = 0;
+			fq_flows_add_tail(&fq_new_flows, &fq_new_flows_lock, flow_copy,
+					  &q.new_flow_cnt);
+		}
+	}
+
+	skbn = bpf_obj_new(typeof(*skbn));
+	if (!skbn) {
+		bpf_kptr_xchg_back(&sflow->flow, flow);
+		goto drop;
+	}
+
+	skbn->tstamp = skb->tstamp = time_to_send;
+
+	sch->qstats.backlog += qdisc_pkt_len(skb);
+
+	skb = bpf_kptr_xchg(&skbn->skb, skb);
+	if (skb)
+		bpf_qdisc_skb_drop(skb, to_free);
+
+	bpf_spin_lock(&flow->lock);
+	bpf_rbtree_add(&flow->queue, &skbn->node, skbn_tstamp_less);
+	bpf_spin_unlock(&flow->lock);
+
+	flow->qlen++;
+	bpf_kptr_xchg_back(&sflow->flow, flow);
+
+	sch->q.qlen++;
+	return NET_XMIT_SUCCESS;
+
+drop:
+	bpf_qdisc_skb_drop(skb, to_free);
+	sch->qstats.drops++;
+	return NET_XMIT_DROP;
+}
+
+static int fq_unset_throttled_flows(u32 index, struct unset_throttled_flows_ctx *ctx)
+{
+	struct bpf_rb_node *node = NULL;
+	struct fq_flow_node *flow;
+
+	bpf_spin_lock(&fq_delayed_lock);
+
+	node = bpf_rbtree_first(&fq_delayed);
+	if (!node) {
+		bpf_spin_unlock(&fq_delayed_lock);
+		return 1;
+	}
+
+	flow = container_of(node, struct fq_flow_node, rb_node);
+	if (!ctx->unset_all && flow->time_next_packet > ctx->now) {
+		q.time_next_delayed_flow = flow->time_next_packet;
+		bpf_spin_unlock(&fq_delayed_lock);
+		return 1;
+	}
+
+	node = bpf_rbtree_remove(&fq_delayed, &flow->rb_node);
+
+	bpf_spin_unlock(&fq_delayed_lock);
+
+	if (!node)
+		return 1;
+
+	flow = container_of(node, struct fq_flow_node, rb_node);
+	flow->age = 0;
+	fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+
+	return 0;
+}
+
+static void fq_flow_set_throttled(struct fq_flow_node *flow)
+{
+	flow->age = ~0ULL;
+
+	if (q.time_next_delayed_flow > flow->time_next_packet)
+		q.time_next_delayed_flow = flow->time_next_packet;
+
+	bpf_spin_lock(&fq_delayed_lock);
+	bpf_rbtree_add(&fq_delayed, &flow->rb_node, fn_time_next_packet_less);
+	bpf_spin_unlock(&fq_delayed_lock);
+}
+
+static void fq_check_throttled(u64 now)
+{
+	struct unset_throttled_flows_ctx ctx = {
+		.unset_all = false,
+		.now = now,
+	};
+	unsigned long sample;
+
+	if (q.time_next_delayed_flow > now)
+		return;
+
+	sample = (unsigned long)(now - q.time_next_delayed_flow);
+	q.unthrottle_latency_ns -= q.unthrottle_latency_ns >> 3;
+	q.unthrottle_latency_ns += sample >> 3;
+
+	q.time_next_delayed_flow = ~0ULL;
+	bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &ctx, 0);
+}
+
+static struct sk_buff*
+fq_dequeue_nonprio_flows(u32 index, struct dequeue_nonprio_ctx *ctx)
+{
+	u64 time_next_packet, time_to_send;
+	struct bpf_rb_node *rb_node;
+	struct sk_buff *skb = NULL;
+	struct bpf_list_head *head;
+	struct bpf_list_node *node;
+	struct bpf_spin_lock *lock;
+	struct fq_flow_node *flow;
+	struct skb_node *skbn;
+	bool is_empty;
+	u32 *cnt;
+
+	if (q.new_flow_cnt) {
+		head = &fq_new_flows;
+		lock = &fq_new_flows_lock;
+		cnt = &q.new_flow_cnt;
+	} else if (q.old_flow_cnt) {
+		head = &fq_old_flows;
+		lock = &fq_old_flows_lock;
+		cnt = &q.old_flow_cnt;
+	} else {
+		if (q.time_next_delayed_flow != ~0ULL)
+			ctx->expire = q.time_next_delayed_flow;
+		goto break_loop;
+	}
+
+	fq_flows_remove_front(head, lock, &node, cnt);
+	if (!node)
+		goto break_loop;
+
+	flow = container_of(node, struct fq_flow_node, list_node);
+	if (flow->credit <= 0) {
+		flow->credit += q.quantum;
+		fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+		return NULL;
+	}
+
+	bpf_spin_lock(&flow->lock);
+	rb_node = bpf_rbtree_first(&flow->queue);
+	if (!rb_node) {
+		bpf_spin_unlock(&flow->lock);
+		is_empty = fq_flows_is_empty(&fq_old_flows, &fq_old_flows_lock);
+		if (head == &fq_new_flows && !is_empty) {
+			fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+		} else {
+			fq_flow_set_detached(flow);
+			bpf_obj_drop(flow);
+		}
+		return NULL;
+	}
+
+	skbn = container_of(rb_node, struct skb_node, node);
+	time_to_send = skbn->tstamp;
+
+	time_next_packet = (time_to_send > flow->time_next_packet) ?
+		time_to_send : flow->time_next_packet;
+	if (ctx->now < time_next_packet) {
+		bpf_spin_unlock(&flow->lock);
+		flow->time_next_packet = time_next_packet;
+		fq_flow_set_throttled(flow);
+		return NULL;
+	}
+
+	rb_node = bpf_rbtree_remove(&flow->queue, rb_node);
+	bpf_spin_unlock(&flow->lock);
+
+	if (!rb_node)
+		goto add_flow_and_break;
+
+	skbn = container_of(rb_node, struct skb_node, node);
+	skb = bpf_kptr_xchg(&skbn->skb, skb);
+	bpf_obj_drop(skbn);
+
+	if (!skb)
+		goto add_flow_and_break;
+
+	flow->credit -= qdisc_skb_cb(skb)->pkt_len;
+	flow->qlen--;
+
+add_flow_and_break:
+	fq_flows_add_head(head, lock, flow, cnt);
+
+break_loop:
+	ctx->stop_iter = true;
+	return skb;
+}
+
+static struct sk_buff *fq_dequeue_prio(void)
+{
+	struct fq_flow_node *flow = NULL;
+	struct fq_stashed_flow *sflow;
+	struct bpf_rb_node *rb_node;
+	struct sk_buff *skb = NULL;
+	struct skb_node *skbn;
+	u64 hash = 0;
+
+	sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash);
+	if (!sflow)
+		return NULL;
+
+	flow = bpf_kptr_xchg(&sflow->flow, flow);
+	if (!flow)
+		return NULL;
+
+	bpf_spin_lock(&flow->lock);
+	rb_node = bpf_rbtree_first(&flow->queue);
+	if (!rb_node) {
+		bpf_spin_unlock(&flow->lock);
+		goto out;
+	}
+
+	skbn = container_of(rb_node, struct skb_node, node);
+	rb_node = bpf_rbtree_remove(&flow->queue, &skbn->node);
+	bpf_spin_unlock(&flow->lock);
+
+	if (!rb_node)
+		goto out;
+
+	skbn = container_of(rb_node, struct skb_node, node);
+	skb = bpf_kptr_xchg(&skbn->skb, skb);
+	bpf_obj_drop(skbn);
+
+out:
+	bpf_kptr_xchg_back(&sflow->flow, flow);
+
+	return skb;
+}
+
+SEC("struct_ops/bpf_fq_dequeue")
+struct sk_buff *BPF_PROG(bpf_fq_dequeue, struct Qdisc *sch)
+{
+	struct dequeue_nonprio_ctx cb_ctx = {};
+	struct sk_buff *skb = NULL;
+	int i;
+
+	if (!sch->q.qlen)
+		goto out;
+
+	skb = fq_dequeue_prio();
+	if (skb)
+		goto dequeue;
+
+	q.ktime_cache = cb_ctx.now = bpf_ktime_get_ns();
+	fq_check_throttled(q.ktime_cache);
+	bpf_for(i, 0, sch->limit) {
+		skb = fq_dequeue_nonprio_flows(i, &cb_ctx);
+		if (cb_ctx.stop_iter)
+			break;
+	};
+
+	if (skb) {
+dequeue:
+		sch->q.qlen--;
+		sch->qstats.backlog -= qdisc_pkt_len(skb);
+		bpf_qdisc_bstats_update(sch, skb);
+		return skb;
+	}
+
+	if (cb_ctx.expire)
+		bpf_qdisc_watchdog_schedule(sch, cb_ctx.expire, q.timer_slack);
+out:
+	return NULL;
+}
+
+static int fq_remove_flows_in_list(u32 index, void *ctx)
+{
+	struct bpf_list_node *node;
+	struct fq_flow_node *flow;
+
+	bpf_spin_lock(&fq_new_flows_lock);
+	node = bpf_list_pop_front(&fq_new_flows);
+	bpf_spin_unlock(&fq_new_flows_lock);
+	if (!node) {
+		bpf_spin_lock(&fq_old_flows_lock);
+		node = bpf_list_pop_front(&fq_old_flows);
+		bpf_spin_unlock(&fq_old_flows_lock);
+		if (!node)
+			return 1;
+	}
+
+	flow = container_of(node, struct fq_flow_node, list_node);
+	bpf_obj_drop(flow);
+
+	return 0;
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+/* limit number of collected flows per round */
+#define FQ_GC_MAX 8
+#define FQ_GC_AGE (3*CONFIG_HZ)
+
+static bool fq_gc_candidate(struct fq_flow_node *flow)
+{
+	u64 jiffies = bpf_jiffies64();
+
+	return fq_flow_is_detached(flow) &&
+	       ((s64)(jiffies - (flow->age + FQ_GC_AGE)) > 0);
+}
+
+static int
+fq_remove_flows(struct bpf_map *flow_map, u64 *hash,
+		struct fq_stashed_flow *sflow, struct remove_flows_ctx *ctx)
+{
+	if (sflow->flow &&
+	    (!ctx->gc_only || fq_gc_candidate(sflow->flow))) {
+		bpf_map_delete_elem(flow_map, hash);
+		ctx->reset_cnt++;
+	}
+
+	return ctx->reset_cnt < ctx->reset_max ? 0 : 1;
+}
+
+static void fq_gc(void)
+{
+	struct remove_flows_ctx cb_ctx = {
+		.gc_only = true,
+		.reset_cnt = 0,
+		.reset_max = FQ_GC_MAX,
+	};
+
+	bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &cb_ctx, 0);
+}
+
+SEC("struct_ops/bpf_fq_reset")
+void BPF_PROG(bpf_fq_reset, struct Qdisc *sch)
+{
+	struct unset_throttled_flows_ctx utf_ctx = {
+		.unset_all = true,
+	};
+	struct remove_flows_ctx rf_ctx = {
+		.gc_only = false,
+		.reset_cnt = 0,
+		.reset_max = NUM_QUEUE,
+	};
+	struct fq_stashed_flow *sflow;
+	u64 hash = 0;
+
+	sch->q.qlen = 0;
+	sch->qstats.backlog = 0;
+
+	bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &rf_ctx, 0);
+
+	rf_ctx.reset_cnt = 0;
+	bpf_for_each_map_elem(&fq_prio_flows, fq_remove_flows, &rf_ctx, 0);
+	fq_new_flow(&fq_prio_flows, &sflow, hash);
+
+	bpf_loop(NUM_QUEUE, fq_remove_flows_in_list, NULL, 0);
+	q.new_flow_cnt = 0;
+	q.old_flow_cnt = 0;
+
+	bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &utf_ctx, 0);
+}
+
+SEC("struct_ops/bpf_fq_init")
+int BPF_PROG(bpf_fq_init, struct Qdisc *sch, struct nlattr *opt,
+	     struct netlink_ext_ack *extack)
+{
+	struct net_device *dev = sch->dev_queue->dev;
+	u32 psched_mtu = dev->mtu + dev->hard_header_len;
+	struct fq_stashed_flow *sflow;
+	u64 hash = 0;
+
+	if (fq_new_flow(&fq_prio_flows, &sflow, hash) < 0)
+		return -ENOMEM;
+
+	sch->limit = 10000;
+	q.initial_quantum = 10 * psched_mtu;
+	q.quantum = 2 * psched_mtu;
+	q.flow_refill_delay = 40;
+	q.flow_plimit = 100;
+	q.horizon = 10ULL * NSEC_PER_SEC;
+	q.horizon_drop = 1;
+	q.orphan_mask = 1024 - 1;
+	q.timer_slack = 10 * NSEC_PER_USEC;
+	q.time_next_delayed_flow = ~0ULL;
+	q.unthrottle_latency_ns = 0ULL;
+	q.new_flow_cnt = 0;
+	q.old_flow_cnt = 0;
+
+	return 0;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_fq_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops fq = {
+	.enqueue   = (void *)bpf_fq_enqueue,
+	.dequeue   = (void *)bpf_fq_dequeue,
+	.reset     = (void *)bpf_fq_reset,
+	.init      = (void *)bpf_fq_init,
+	.destroy   = (void *)bpf_fq_destroy,
+	.id        = "bpf_fq",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_smc.c b/tools/testing/selftests/bpf/progs/bpf_smc.c
new file mode 100644
index 000000000000..70d8b08f5914
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_smc.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+
+char _license[] SEC("license") = "GPL";
+
+enum {
+	BPF_SMC_LISTEN	= 10,
+};
+
+struct smc_sock___local {
+	struct sock sk;
+	struct smc_sock *listen_smc;
+	bool use_fallback;
+} __attribute__((preserve_access_index));
+
+int smc_cnt = 0;
+int fallback_cnt = 0;
+
+SEC("fentry/smc_release")
+int BPF_PROG(bpf_smc_release, struct socket *sock)
+{
+	/* only count from one side (client) */
+	if (sock->sk->__sk_common.skc_state == BPF_SMC_LISTEN)
+		return 0;
+	smc_cnt++;
+	return 0;
+}
+
+SEC("fentry/smc_switch_to_fallback")
+int BPF_PROG(bpf_smc_switch_to_fallback, struct smc_sock___local *smc)
+{
+	/* only count from one side (client) */
+	if (smc && !smc->listen_smc)
+		fallback_cnt++;
+	return 0;
+}
+
+/* go with default value if no strat was found */
+bool default_ip_strat_value = true;
+
+struct smc_policy_ip_key {
+	__u32	sip;
+	__u32	dip;
+};
+
+struct smc_policy_ip_value {
+	__u8	mode;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, sizeof(struct smc_policy_ip_key));
+	__uint(value_size, sizeof(struct smc_policy_ip_value));
+	__uint(max_entries, 128);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+} smc_policy_ip SEC(".maps");
+
+static bool smc_check(__u32 src, __u32 dst)
+{
+	struct smc_policy_ip_value *value;
+	struct smc_policy_ip_key key = {
+		.sip = src,
+		.dip = dst,
+	};
+
+	value = bpf_map_lookup_elem(&smc_policy_ip, &key);
+	return value ? value->mode : default_ip_strat_value;
+}
+
+SEC("fmod_ret/update_socket_protocol")
+int BPF_PROG(smc_run, int family, int type, int protocol)
+{
+	struct task_struct *task;
+
+	if (family != AF_INET && family != AF_INET6)
+		return protocol;
+
+	if ((type & 0xf) != SOCK_STREAM)
+		return protocol;
+
+	if (protocol != 0 && protocol != IPPROTO_TCP)
+		return protocol;
+
+	task = bpf_get_current_task_btf();
+	/* Prevent from affecting other tests */
+	if (!task || !task->nsproxy->net_ns->smc.hs_ctrl)
+		return protocol;
+
+	return IPPROTO_SMC;
+}
+
+SEC("struct_ops")
+int BPF_PROG(bpf_smc_set_tcp_option_cond, const struct tcp_sock *tp,
+	     struct inet_request_sock *ireq)
+{
+	return smc_check(ireq->req.__req_common.skc_daddr,
+			 ireq->req.__req_common.skc_rcv_saddr);
+}
+
+SEC("struct_ops")
+int BPF_PROG(bpf_smc_set_tcp_option, struct tcp_sock *tp)
+{
+	return smc_check(tp->inet_conn.icsk_inet.sk.__sk_common.skc_rcv_saddr,
+			 tp->inet_conn.icsk_inet.sk.__sk_common.skc_daddr);
+}
+
+SEC(".struct_ops")
+struct smc_hs_ctrl  linkcheck = {
+	.name		= "linkcheck",
+	.syn_option	= (void *)bpf_smc_set_tcp_option,
+	.synack_option	= (void *)bpf_smc_set_tcp_option_cond,
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_test_utils.h b/tools/testing/selftests/bpf/progs/bpf_test_utils.h
new file mode 100644
index 000000000000..f4e67b492dd2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_test_utils.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_TEST_UTILS_H__
+#define __BPF_TEST_UTILS_H__
+
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* Clobber as many native registers and stack slots as possible. */
+static __always_inline void clobber_regs_stack(void)
+{
+	char tmp_str[] = "123456789";
+	unsigned long tmp;
+
+	bpf_strtoul(tmp_str, sizeof(tmp_str), 0, &tmp);
+	__sink(tmp);
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 59843b430f76..d8dacef37c16 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -15,7 +15,11 @@
 #define SO_KEEPALIVE		9
 #define SO_PRIORITY		12
 #define SO_REUSEPORT		15
+#if defined(__TARGET_ARCH_powerpc)
+#define SO_RCVLOWAT		16
+#else
 #define SO_RCVLOWAT		18
+#endif
 #define SO_BINDTODEVICE		25
 #define SO_MARK			36
 #define SO_MAX_PACING_RATE	47
@@ -49,6 +53,7 @@
 #define TCP_SAVED_SYN		28
 #define TCP_CA_NAME_MAX		16
 #define TCP_NAGLE_OFF		1
+#define TCP_RTO_MAX_MS		44
 
 #define TCP_ECN_OK              1
 #define TCP_ECN_QUEUE_CWR       2
@@ -123,6 +128,7 @@
 #define sk_refcnt		__sk_common.skc_refcnt
 #define sk_state		__sk_common.skc_state
 #define sk_net			__sk_common.skc_net
+#define sk_rcv_saddr		__sk_common.skc_rcv_saddr
 #define sk_v6_daddr		__sk_common.skc_v6_daddr
 #define sk_v6_rcv_saddr		__sk_common.skc_v6_rcv_saddr
 #define sk_flags		__sk_common.skc_flags
@@ -140,6 +146,20 @@
 
 #define tcp_jiffies32 ((__u32)bpf_jiffies64())
 
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+#ifndef max
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+static inline bool before(__u32 seq1, __u32 seq2)
+{
+	return (__s32)(seq1 - seq2) < 0;
+}
+
+#define after(seq2, seq1) before(seq1, seq2)
+
 static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
 {
 	return (struct inet_connection_sock *)sk;
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c
new file mode 100644
index 000000000000..21a560427b10
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_signed_arr_elem_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_arrays___err_bad_signed_arr_elem_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
index 38f78d9345de..d93f68024cc6 100644
--- a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
@@ -30,36 +30,42 @@ int BPF_PROG(test_percpu2, struct bpf_testmod_btf_type_tag_2 *arg)
 
 /* trace_cgroup_mkdir(struct cgroup *cgrp, const char *path)
  *
- * struct cgroup_rstat_cpu {
+ * struct css_rstat_cpu {
  *   ...
- *   struct cgroup *updated_children;
+ *   struct cgroup_subsys_state *updated_children;
  *   ...
  * };
  *
- * struct cgroup {
+ * struct cgroup_subsys_state {
+ *   ...
+ *   struct css_rstat_cpu __percpu *rstat_cpu;
  *   ...
- *   struct cgroup_rstat_cpu __percpu *rstat_cpu;
+ * };
+ *
+ * struct cgroup {
+ *   struct cgroup_subsys_state self;
  *   ...
  * };
  */
 SEC("tp_btf/cgroup_mkdir")
 int BPF_PROG(test_percpu_load, struct cgroup *cgrp, const char *path)
 {
-	g = (__u64)cgrp->rstat_cpu->updated_children;
+	g = (__u64)cgrp->self.rstat_cpu->updated_children;
 	return 0;
 }
 
 SEC("tp_btf/cgroup_mkdir")
 int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path)
 {
-	struct cgroup_rstat_cpu *rstat;
+	struct css_rstat_cpu *rstat;
 	__u32 cpu;
 
 	cpu = bpf_get_smp_processor_id();
-	rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr(cgrp->rstat_cpu, cpu);
+	rstat = (struct css_rstat_cpu *)bpf_per_cpu_ptr(
+			cgrp->self.rstat_cpu, cpu);
 	if (rstat) {
 		/* READ_ONCE */
-		*(volatile int *)rstat;
+		*(volatile long *)rstat;
 	}
 
 	return 0;
diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c
index 56c764df8196..5d6fc7f01ebb 100644
--- a/tools/testing/selftests/bpf/progs/cb_refs.c
+++ b/tools/testing/selftests/bpf/progs/cb_refs.c
@@ -2,7 +2,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 struct map_value {
 	struct prog_test_ref_kfunc __kptr *ptr;
diff --git a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
index c74362854948..ff189a736ad8 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
@@ -37,8 +37,9 @@ struct {
 	__type(value, struct attach_counter);
 } attach_counters SEC(".maps");
 
-extern void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) __ksym;
-extern void cgroup_rstat_flush(struct cgroup *cgrp) __ksym;
+extern void css_rstat_updated(
+		struct cgroup_subsys_state *css, int cpu) __ksym;
+extern void css_rstat_flush(struct cgroup_subsys_state *css) __ksym;
 
 static uint64_t cgroup_id(struct cgroup *cgrp)
 {
@@ -75,7 +76,7 @@ int BPF_PROG(counter, struct cgroup *dst_cgrp, struct task_struct *leader,
 	else if (create_percpu_attach_counter(cg_id, 1))
 		return 0;
 
-	cgroup_rstat_updated(dst_cgrp, bpf_get_smp_processor_id());
+	css_rstat_updated(&dst_cgrp->self, bpf_get_smp_processor_id());
 	return 0;
 }
 
@@ -141,7 +142,7 @@ int BPF_PROG(dumper, struct bpf_iter_meta *meta, struct cgroup *cgrp)
 		return 1;
 
 	/* Flush the stats to make sure we get the most updated numbers */
-	cgroup_rstat_flush(cgrp);
+	css_rstat_flush(&cgrp->self);
 
 	total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id);
 	if (!total_counter) {
diff --git a/tools/testing/selftests/bpf/progs/cgroup_mprog.c b/tools/testing/selftests/bpf/progs/cgroup_mprog.c
new file mode 100644
index 000000000000..6a0ea02c4de2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_mprog.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("cgroup/getsockopt")
+int getsockopt_1(struct bpf_sockopt *ctx)
+{
+	return 1;
+}
+
+SEC("cgroup/getsockopt")
+int getsockopt_2(struct bpf_sockopt *ctx)
+{
+	return 1;
+}
+
+SEC("cgroup/getsockopt")
+int getsockopt_3(struct bpf_sockopt *ctx)
+{
+	return 1;
+}
+
+SEC("cgroup/getsockopt")
+int getsockopt_4(struct bpf_sockopt *ctx)
+{
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_preorder.c b/tools/testing/selftests/bpf/progs/cgroup_preorder.c
new file mode 100644
index 000000000000..4ef6202baa0a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_preorder.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+unsigned int idx;
+__u8 result[4];
+
+SEC("cgroup/getsockopt")
+int child(struct bpf_sockopt *ctx)
+{
+	if (idx < 4)
+		result[idx++] = 1;
+	return 1;
+}
+
+SEC("cgroup/getsockopt")
+int child_2(struct bpf_sockopt *ctx)
+{
+	if (idx < 4)
+		result[idx++] = 2;
+	return 1;
+}
+
+SEC("cgroup/getsockopt")
+int parent(struct bpf_sockopt *ctx)
+{
+	if (idx < 4)
+		result[idx++] = 3;
+	return 1;
+}
+
+SEC("cgroup/getsockopt")
+int parent_2(struct bpf_sockopt *ctx)
+{
+	if (idx < 4)
+		result[idx++] = 4;
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c
new file mode 100644
index 000000000000..88e13e17ec9e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_read_xattr.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+char value[16];
+
+static __always_inline void read_xattr(struct cgroup *cgroup)
+{
+	struct bpf_dynptr value_ptr;
+
+	bpf_dynptr_from_mem(value, sizeof(value), 0, &value_ptr);
+	bpf_cgroup_read_xattr(cgroup, "user.bpf_test",
+			      &value_ptr);
+}
+
+SEC("lsm.s/socket_connect")
+__success
+int BPF_PROG(trusted_cgroup_ptr_sleepable)
+{
+	u64 cgrp_id = bpf_get_current_cgroup_id();
+	struct cgroup *cgrp;
+
+	cgrp = bpf_cgroup_from_id(cgrp_id);
+	if (!cgrp)
+		return 0;
+
+	read_xattr(cgrp);
+	bpf_cgroup_release(cgrp);
+	return 0;
+}
+
+SEC("lsm/socket_connect")
+__success
+int BPF_PROG(trusted_cgroup_ptr_non_sleepable)
+{
+	u64 cgrp_id = bpf_get_current_cgroup_id();
+	struct cgroup *cgrp;
+
+	cgrp = bpf_cgroup_from_id(cgrp_id);
+	if (!cgrp)
+		return 0;
+
+	read_xattr(cgrp);
+	bpf_cgroup_release(cgrp);
+	return 0;
+}
+
+SEC("lsm/socket_connect")
+__success
+int BPF_PROG(use_css_iter_non_sleepable)
+{
+	u64 cgrp_id = bpf_get_current_cgroup_id();
+	struct cgroup_subsys_state *css;
+	struct cgroup *cgrp;
+
+	cgrp = bpf_cgroup_from_id(cgrp_id);
+	if (!cgrp)
+		return 0;
+
+	bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP)
+		read_xattr(css->cgroup);
+
+	bpf_cgroup_release(cgrp);
+	return 0;
+}
+
+SEC("lsm.s/socket_connect")
+__failure __msg("kernel func bpf_iter_css_new requires RCU critical section protection")
+int BPF_PROG(use_css_iter_sleepable_missing_rcu_lock)
+{
+	u64 cgrp_id = bpf_get_current_cgroup_id();
+	struct cgroup_subsys_state *css;
+	struct cgroup *cgrp;
+
+	cgrp = bpf_cgroup_from_id(cgrp_id);
+	if (!cgrp)
+		return 0;
+
+	bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP)
+		read_xattr(css->cgroup);
+
+	bpf_cgroup_release(cgrp);
+	return 0;
+}
+
+SEC("lsm.s/socket_connect")
+__success
+int BPF_PROG(use_css_iter_sleepable_with_rcu_lock)
+{
+	u64 cgrp_id = bpf_get_current_cgroup_id();
+	struct cgroup_subsys_state *css;
+	struct cgroup *cgrp;
+
+	bpf_rcu_read_lock();
+	cgrp = bpf_cgroup_from_id(cgrp_id);
+	if (!cgrp)
+		goto out;
+
+	bpf_for_each(css, css, &cgrp->self, BPF_CGROUP_ITER_ANCESTORS_UP)
+		read_xattr(css->cgroup);
+
+	bpf_cgroup_release(cgrp);
+out:
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("lsm/socket_connect")
+__success
+int BPF_PROG(use_bpf_cgroup_ancestor)
+{
+	u64 cgrp_id = bpf_get_current_cgroup_id();
+	struct cgroup *cgrp, *ancestor;
+
+	cgrp = bpf_cgroup_from_id(cgrp_id);
+	if (!cgrp)
+		return 0;
+
+	ancestor = bpf_cgroup_ancestor(cgrp, 1);
+	if (!ancestor)
+		goto out;
+
+	read_xattr(cgrp);
+	bpf_cgroup_release(ancestor);
+out:
+	bpf_cgroup_release(cgrp);
+	return 0;
+}
+
+SEC("cgroup/sendmsg4")
+__success
+int BPF_PROG(cgroup_skb)
+{
+	u64 cgrp_id = bpf_get_current_cgroup_id();
+	struct cgroup *cgrp, *ancestor;
+
+	cgrp = bpf_cgroup_from_id(cgrp_id);
+	if (!cgrp)
+		return 0;
+
+	ancestor = bpf_cgroup_ancestor(cgrp, 1);
+	if (!ancestor)
+		goto out;
+
+	read_xattr(cgrp);
+	bpf_cgroup_release(ancestor);
+out:
+	bpf_cgroup_release(cgrp);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c b/tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c
new file mode 100644
index 000000000000..e32b07d802bb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_skb_direct_packet_access.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u32 data_end;
+
+SEC("cgroup_skb/ingress")
+int direct_packet_access(struct __sk_buff *skb)
+{
+	data_end = skb->data_end;
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
index 5354455a01be..02d8f160ca0e 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
@@ -221,3 +221,15 @@ int BPF_PROG(test_cgrp_from_id, struct cgroup *cgrp, const char *path)
 
 	return 0;
 }
+
+SEC("syscall")
+int test_cgrp_from_id_ns(void *ctx)
+{
+	struct cgroup *cg;
+
+	cg = bpf_cgroup_from_id(1);
+	if (!cg)
+		return 42;
+	bpf_cgroup_release(cg);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/compute_live_registers.c b/tools/testing/selftests/bpf/progs/compute_live_registers.c
new file mode 100644
index 000000000000..6884ab99a421
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/compute_live_registers.c
@@ -0,0 +1,440 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_arena_common.h"
+#include "bpf_misc.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} test_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARENA);
+	__uint(map_flags, BPF_F_MMAPABLE);
+	__uint(max_entries, 1);
+} arena SEC(".maps");
+
+SEC("socket")
+__log_level(2)
+__msg(" 0: .......... (b7) r0 = 42")
+__msg(" 1: 0......... (bf) r1 = r0")
+__msg(" 2: .1........ (bf) r2 = r1")
+__msg(" 3: ..2....... (bf) r3 = r2")
+__msg(" 4: ...3...... (bf) r4 = r3")
+__msg(" 5: ....4..... (bf) r5 = r4")
+__msg(" 6: .....5.... (bf) r6 = r5")
+__msg(" 7: ......6... (bf) r7 = r6")
+__msg(" 8: .......7.. (bf) r8 = r7")
+__msg(" 9: ........8. (bf) r9 = r8")
+__msg("10: .........9 (bf) r0 = r9")
+__msg("11: 0......... (95) exit")
+__naked void assign_chain(void)
+{
+	asm volatile (
+		"r0 = 42;"
+		"r1 = r0;"
+		"r2 = r1;"
+		"r3 = r2;"
+		"r4 = r3;"
+		"r5 = r4;"
+		"r6 = r5;"
+		"r7 = r6;"
+		"r8 = r7;"
+		"r9 = r8;"
+		"r0 = r9;"
+		"exit;"
+		::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: .......... (b7) r1 = 7")
+__msg("1: .1........ (07) r1 += 7")
+__msg("2: .......... (b7) r2 = 7")
+__msg("3: ..2....... (b7) r3 = 42")
+__msg("4: ..23...... (0f) r2 += r3")
+__msg("5: .......... (b7) r0 = 0")
+__msg("6: 0......... (95) exit")
+__naked void arithmetics(void)
+{
+	asm volatile (
+		"r1 = 7;"
+		"r1 += 7;"
+		"r2 = 7;"
+		"r3 = 42;"
+		"r2 += r3;"
+		"r0 = 0;"
+		"exit;"
+		::: __clobber_all);
+}
+
+#ifdef CAN_USE_BPF_ST
+SEC("socket")
+__log_level(2)
+__msg("  1: .1........ (07) r1 += -8")
+__msg("  2: .1........ (7a) *(u64 *)(r1 +0) = 7")
+__msg("  3: .1........ (b7) r2 = 42")
+__msg("  4: .12....... (7b) *(u64 *)(r1 +0) = r2")
+__msg("  5: .12....... (7b) *(u64 *)(r1 +0) = r2")
+__msg("  6: .......... (b7) r0 = 0")
+__naked void store(void)
+{
+	asm volatile (
+		"r1 = r10;"
+		"r1 += -8;"
+		"*(u64 *)(r1 +0) = 7;"
+		"r2 = 42;"
+		"*(u64 *)(r1 +0) = r2;"
+		"*(u64 *)(r1 +0) = r2;"
+		"r0 = 0;"
+		"exit;"
+		::: __clobber_all);
+}
+#endif
+
+SEC("socket")
+__log_level(2)
+__msg("1: ....4..... (07) r4 += -8")
+__msg("2: ....4..... (79) r5 = *(u64 *)(r4 +0)")
+__msg("3: ....45.... (07) r4 += -8")
+__naked void load(void)
+{
+	asm volatile (
+		"r4 = r10;"
+		"r4 += -8;"
+		"r5 = *(u64 *)(r4 +0);"
+		"r4 += -8;"
+		"r0 = r5;"
+		"exit;"
+		::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: .1........ (61) r2 = *(u32 *)(r1 +0)")
+__msg("1: ..2....... (d4) r2 = le64 r2")
+__msg("2: ..2....... (bf) r0 = r2")
+__naked void endian(void)
+{
+	asm volatile (
+		"r2 = *(u32 *)(r1 +0);"
+		"r2 = le64 r2;"
+		"r0 = r2;"
+		"exit;"
+		::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg(" 8: 0......... (b7) r1 = 1")
+__msg(" 9: 01........ (db) r1 = atomic64_fetch_add((u64 *)(r0 +0), r1)")
+__msg("10: 01........ (c3) lock *(u32 *)(r0 +0) += r1")
+__msg("11: 01........ (db) r1 = atomic64_xchg((u64 *)(r0 +0), r1)")
+__msg("12: 01........ (bf) r2 = r0")
+__msg("13: .12....... (bf) r0 = r1")
+__msg("14: 012....... (db) r0 = atomic64_cmpxchg((u64 *)(r2 +0), r0, r1)")
+__naked void atomic(void)
+{
+	asm volatile (
+		"r2 = r10;"
+		"r2 += -8;"
+		"r1 = 0;"
+		"*(u64 *)(r2 +0) = r1;"
+		"r1 = %[test_map] ll;"
+		"call %[bpf_map_lookup_elem];"
+		"if r0 == 0 goto 1f;"
+		"r1 = 1;"
+		"r1 = atomic_fetch_add((u64 *)(r0 +0), r1);"
+		".8byte %[add_nofetch];" /* same as "lock *(u32 *)(r0 +0) += r1;" */
+		"r1 = xchg_64(r0 + 0, r1);"
+		"r2 = r0;"
+		"r0 = r1;"
+		"r0 = cmpxchg_64(r2 + 0, r0, r1);"
+		"1: exit;"
+		:
+		: __imm(bpf_map_lookup_elem),
+		  __imm_addr(test_map),
+		  __imm_insn(add_nofetch, BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_0, BPF_REG_1, 0))
+		: __clobber_all);
+}
+
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("socket")
+__log_level(2)
+__msg("2: .12....... (db) store_release((u64 *)(r2 -8), r1)")
+__msg("3: .......... (bf) r3 = r10")
+__msg("4: ...3...... (db) r4 = load_acquire((u64 *)(r3 -8))")
+__naked void atomic_load_acq_store_rel(void)
+{
+	asm volatile (
+		"r1 = 42;"
+		"r2 = r10;"
+		".8byte %[store_release_insn];" /* store_release((u64 *)(r2 - 8), r1); */
+		"r3 = r10;"
+		".8byte %[load_acquire_insn];" /* r4 = load_acquire((u64 *)(r3 + 0)); */
+		"r0 = r4;"
+		"exit;"
+		:
+		: __imm_insn(store_release_insn,
+			     BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_2, BPF_REG_1, -8)),
+		  __imm_insn(load_acquire_insn,
+			     BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_4, BPF_REG_3, -8))
+		: __clobber_all);
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+SEC("socket")
+__log_level(2)
+__msg("4: .12....7.. (85) call bpf_trace_printk#6")
+__msg("5: 0......7.. (0f) r0 += r7")
+__naked void regular_call(void)
+{
+	asm volatile (
+		"r7 = 1;"
+		"r1 = r10;"
+		"r1 += -8;"
+		"r2 = 1;"
+		"call %[bpf_trace_printk];"
+		"r0 += r7;"
+		"exit;"
+		:
+		: __imm(bpf_trace_printk)
+		: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("2: 012....... (25) if r1 > 0x7 goto pc+1")
+__msg("3: ..2....... (bf) r0 = r2")
+__naked void if1(void)
+{
+	asm volatile (
+		"r0 = 1;"
+		"r2 = 2;"
+		"if r1 > 0x7 goto +1;"
+		"r0 = r2;"
+		"exit;"
+		::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("3: 0123...... (2d) if r1 > r3 goto pc+1")
+__msg("4: ..2....... (bf) r0 = r2")
+__naked void if2(void)
+{
+	asm volatile (
+		"r0 = 1;"
+		"r2 = 2;"
+		"r3 = 7;"
+		"if r1 > r3 goto +1;"
+		"r0 = r2;"
+		"exit;"
+		::: __clobber_all);
+}
+
+/* Verifier misses that r2 is alive if jset is not handled properly */
+SEC("socket")
+__log_level(2)
+__msg("2: 012....... (45) if r1 & 0x7 goto pc+1")
+__naked void if3_jset_bug(void)
+{
+	asm volatile (
+		"r0 = 1;"
+		"r2 = 2;"
+		"if r1 & 0x7 goto +1;"
+		"exit;"
+		"r0 = r2;"
+		"exit;"
+		::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: .......... (b7) r1 = 0")
+__msg("1: .1........ (b7) r2 = 7")
+__msg("2: .12....... (25) if r1 > 0x7 goto pc+4")
+__msg("3: .12....... (07) r1 += 1")
+__msg("4: .12....... (27) r2 *= 2")
+__msg("5: .12....... (05) goto pc+0")
+__msg("6: .12....... (05) goto pc-5")
+__msg("7: .......... (b7) r0 = 0")
+__msg("8: 0......... (95) exit")
+__naked void loop(void)
+{
+	asm volatile (
+		"r1 = 0;"
+		"r2 = 7;"
+		"if r1 > 0x7 goto +4;"
+		"r1 += 1;"
+		"r2 *= 2;"
+		"goto +0;"
+		"goto -5;"
+		"r0 = 0;"
+		"exit;"
+		:
+		: __imm(bpf_trace_printk)
+		: __clobber_all);
+}
+
+#ifdef CAN_USE_GOTOL
+SEC("socket")
+__log_level(2)
+__msg("2: .123...... (25) if r1 > 0x7 goto pc+2")
+__msg("3: ..2....... (bf) r0 = r2")
+__msg("4: 0......... (06) gotol pc+1")
+__msg("5: ...3...... (bf) r0 = r3")
+__msg("6: 0......... (95) exit")
+__naked void gotol(void)
+{
+	asm volatile (
+		"r2 = 42;"
+		"r3 = 24;"
+		"if r1 > 0x7 goto +2;"
+		"r0 = r2;"
+		"gotol +1;"
+		"r0 = r3;"
+		"exit;"
+		:
+		: __imm(bpf_trace_printk)
+		: __clobber_all);
+}
+#endif
+
+SEC("socket")
+__log_level(2)
+__msg("0: .......... (b7) r1 = 1")
+__msg("1: .1........ (e5) may_goto pc+1")
+__msg("2: .......... (05) goto pc-3")
+__msg("3: .1........ (bf) r0 = r1")
+__msg("4: 0......... (95) exit")
+__naked void may_goto(void)
+{
+	asm volatile (
+	"1: r1 = 1;"
+	".8byte %[may_goto];"
+	"goto 1b;"
+	"r0 = r1;"
+	"exit;"
+	:
+	: __imm(bpf_get_smp_processor_id),
+	  __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0))
+	: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("1: 0......... (18) r2 = 0x7")
+__msg("3: 0.2....... (0f) r0 += r2")
+__naked void ldimm64(void)
+{
+	asm volatile (
+		"r0 = 0;"
+		"r2 = 0x7 ll;"
+		"r0 += r2;"
+		"exit;"
+		:
+		:: __clobber_all);
+}
+
+/* No rules specific for LD_ABS/LD_IND, default behaviour kicks in */
+SEC("socket")
+__log_level(2)
+__msg("2: 0123456789 (30) r0 = *(u8 *)skb[42]")
+__msg("3: 012.456789 (0f) r7 += r0")
+__msg("4: 012.456789 (b7) r3 = 42")
+__msg("5: 0123456789 (50) r0 = *(u8 *)skb[r3 + 0]")
+__msg("6: 0......7.. (0f) r7 += r0")
+__naked void ldabs(void)
+{
+	asm volatile (
+		"r6 = r1;"
+		"r7 = 0;"
+		"r0 = *(u8 *)skb[42];"
+		"r7 += r0;"
+		"r3 = 42;"
+		".8byte %[ld_ind];" /* same as "r0 = *(u8 *)skb[r3];" */
+		"r7 += r0;"
+		"r0 = r7;"
+		"exit;"
+		:
+		: __imm_insn(ld_ind, BPF_LD_IND(BPF_B, BPF_REG_3, 0))
+		: __clobber_all);
+}
+
+
+#ifdef __BPF_FEATURE_ADDR_SPACE_CAST
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__log_level(2)
+__msg(" 6: .12345.... (85) call bpf_arena_alloc_pages")
+__msg(" 7: 0......... (bf) r1 = addr_space_cast(r0, 0, 1)")
+__msg(" 8: .1........ (b7) r2 = 42")
+__naked void addr_space_cast(void)
+{
+	asm volatile (
+		"r1 = %[arena] ll;"
+		"r2 = 0;"
+		"r3 = 1;"
+		"r4 = 0;"
+		"r5 = 0;"
+		"call %[bpf_arena_alloc_pages];"
+		"r1 = addr_space_cast(r0, 0, 1);"
+		"r2 = 42;"
+		"*(u64 *)(r1 +0) = r2;"
+		"r0 = 0;"
+		"exit;"
+		:
+		: __imm(bpf_arena_alloc_pages),
+		  __imm_addr(arena)
+		: __clobber_all);
+}
+#endif
+
+static __used __naked int aux1(void)
+{
+	asm volatile (
+		"r0 = r1;"
+		"r0 += r2;"
+		"exit;"
+		::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("0: ....45.... (b7) r1 = 1")
+__msg("1: .1..45.... (b7) r2 = 2")
+__msg("2: .12.45.... (b7) r3 = 3")
+/* Conservative liveness for subprog parameters. */
+__msg("3: .12345.... (85) call pc+2")
+__msg("4: .......... (b7) r0 = 0")
+__msg("5: 0......... (95) exit")
+__msg("6: .12....... (bf) r0 = r1")
+__msg("7: 0.2....... (0f) r0 += r2")
+/* Conservative liveness for subprog return value. */
+__msg("8: 0......... (95) exit")
+__naked void subprog1(void)
+{
+	asm volatile (
+		"r1 = 1;"
+		"r2 = 2;"
+		"r3 = 3;"
+		"call aux1;"
+		"r0 = 0;"
+		"exit;"
+		::: __clobber_all);
+}
+
+/* to retain debug info for BTF generation */
+void kfunc_root(void)
+{
+	bpf_arena_alloc_pages(0, 0, 0, 0, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c
index d3f4c5e4fb69..a3819a5d09c8 100644
--- a/tools/testing/selftests/bpf/progs/connect4_dropper.c
+++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c
@@ -13,12 +13,14 @@
 #define VERDICT_REJECT	0
 #define VERDICT_PROCEED	1
 
+int port;
+
 SEC("cgroup/connect4")
 int connect_v4_dropper(struct bpf_sock_addr *ctx)
 {
 	if (ctx->type != SOCK_STREAM)
 		return VERDICT_PROCEED;
-	if (ctx->user_port == bpf_htons(60120))
+	if (ctx->user_port == bpf_htons(port))
 		return VERDICT_REJECT;
 	return VERDICT_PROCEED;
 }
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index 9e9ebf27b878..9d158cfad981 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -34,6 +34,9 @@
 #define SOL_TCP 6
 #endif
 
+const char reno[] = "reno";
+const char cubic[] = "cubic";
+
 __attribute__ ((noinline)) __weak
 int do_bind(struct bpf_sock_addr *ctx)
 {
@@ -50,35 +53,27 @@ int do_bind(struct bpf_sock_addr *ctx)
 }
 
 static __inline int verify_cc(struct bpf_sock_addr *ctx,
-			      char expected[TCP_CA_NAME_MAX])
+			      const char expected[])
 {
 	char buf[TCP_CA_NAME_MAX];
-	int i;
 
 	if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
 		return 1;
 
-	for (i = 0; i < TCP_CA_NAME_MAX; i++) {
-		if (buf[i] != expected[i])
-			return 1;
-		if (buf[i] == 0)
-			break;
-	}
+	if (bpf_strncmp(buf, TCP_CA_NAME_MAX, expected))
+		return 1;
 
 	return 0;
 }
 
 static __inline int set_cc(struct bpf_sock_addr *ctx)
 {
-	char reno[TCP_CA_NAME_MAX] = "reno";
-	char cubic[TCP_CA_NAME_MAX] = "cubic";
-
-	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno)))
+	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, (void *)reno, sizeof(reno)))
 		return 1;
 	if (verify_cc(ctx, reno))
 		return 1;
 
-	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
+	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, (void *)cubic, sizeof(cubic)))
 		return 1;
 	if (verify_cc(ctx, cubic))
 		return 1;
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index fd8e1b4c6762..5760ae015e09 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -347,6 +347,7 @@ struct core_reloc_nesting___err_too_deep {
  */
 struct core_reloc_arrays_output {
 	int a2;
+	int a3;
 	char b123;
 	int c1c;
 	int d00d;
@@ -455,6 +456,15 @@ struct core_reloc_arrays___err_bad_zero_sz_arr {
 	struct core_reloc_arrays_substruct d[1][2];
 };
 
+struct core_reloc_arrays___err_bad_signed_arr_elem_sz {
+	/* int -> short (signed!): not supported case */
+	short a[5];
+	char b[2][3][4];
+	struct core_reloc_arrays_substruct c[3];
+	struct core_reloc_arrays_substruct d[1][2];
+	struct core_reloc_arrays_substruct f[][2];
+};
+
 /*
  * PRIMITIVES
  */
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
index 4ece7873ba60..86085b79f5ca 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_common.h
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -61,6 +61,7 @@ u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym __weak;
 u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1,
 				   const struct cpumask *src2) __ksym __weak;
 u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym __weak;
+int bpf_cpumask_populate(struct cpumask *cpumask, void *src, size_t src__sz) __ksym __weak;
 
 void bpf_rcu_read_lock(void) __ksym __weak;
 void bpf_rcu_read_unlock(void) __ksym __weak;
diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c
index b40b52548ffb..8a2fd596c8a3 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_failure.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c
@@ -222,3 +222,41 @@ int BPF_PROG(test_invalid_nested_array, struct task_struct *task, u64 clone_flag
 
 	return 0;
 }
+
+SEC("tp_btf/task_newtask")
+__failure __msg("type=scalar expected=fp")
+int BPF_PROG(test_populate_invalid_destination, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *invalid = (struct bpf_cpumask *)0x123456;
+	u64 bits;
+	int ret;
+
+	ret = bpf_cpumask_populate((struct cpumask *)invalid, &bits, sizeof(bits));
+	if (!ret)
+		err = 2;
+
+	return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("leads to invalid memory access")
+int BPF_PROG(test_populate_invalid_source, struct task_struct *task, u64 clone_flags)
+{
+	void *garbage = (void *)0x123456;
+	struct bpf_cpumask *local;
+	int ret;
+
+	local = create_cpumask();
+	if (!local) {
+		err = 1;
+		return 0;
+	}
+
+	ret = bpf_cpumask_populate((struct cpumask *)local, garbage, 8);
+	if (!ret)
+		err = 2;
+
+	bpf_cpumask_release(local);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c
index 80ee469b0b60..0e04c31b91c0 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_success.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_success.c
@@ -749,7 +749,6 @@ out:
 }
 
 SEC("tp_btf/task_newtask")
-__success
 int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags)
 {
 	struct bpf_cpumask *mask1, *mask2;
@@ -770,3 +769,122 @@ free_masks_return:
 		bpf_cpumask_release(mask2);
 	return 0;
 }
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_populate_reject_small_mask, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *local;
+	u8 toofewbits;
+	int ret;
+
+	if (!is_test_task())
+		return 0;
+
+	local = create_cpumask();
+	if (!local)
+		return 0;
+
+	/* The kfunc should prevent this operation */
+	ret = bpf_cpumask_populate((struct cpumask *)local, &toofewbits, sizeof(toofewbits));
+	if (ret != -EACCES)
+		err = 2;
+
+	bpf_cpumask_release(local);
+
+	return 0;
+}
+
+/* Mask is guaranteed to be large enough for bpf_cpumask_t. */
+#define CPUMASK_TEST_MASKLEN (sizeof(cpumask_t))
+
+/* Add an extra word for the test_populate_reject_unaligned test. */
+u64 bits[CPUMASK_TEST_MASKLEN / 8 + 1];
+extern bool CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS __kconfig __weak;
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_populate_reject_unaligned, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *mask;
+	char *src;
+	int ret;
+
+	if (!is_test_task())
+		return 0;
+
+	/* Skip if unaligned accesses are fine for this arch.  */
+	if (CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+		return 0;
+
+	mask = bpf_cpumask_create();
+	if (!mask) {
+		err = 1;
+		return 0;
+	}
+
+	/* Misalign the source array by a byte. */
+	src = &((char *)bits)[1];
+
+	ret = bpf_cpumask_populate((struct cpumask *)mask, src, CPUMASK_TEST_MASKLEN);
+	if (ret != -EINVAL)
+		err = 2;
+
+	bpf_cpumask_release(mask);
+
+	return 0;
+}
+
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_populate, struct task_struct *task, u64 clone_flags)
+{
+	struct bpf_cpumask *mask;
+	bool bit;
+	int ret;
+	int i;
+
+	if (!is_test_task())
+		return 0;
+
+	/* Set only odd bits. */
+	__builtin_memset(bits, 0xaa, CPUMASK_TEST_MASKLEN);
+
+	mask = bpf_cpumask_create();
+	if (!mask) {
+		err = 1;
+		return 0;
+	}
+
+	/* Pass the entire bits array, the kfunc will only copy the valid bits. */
+	ret = bpf_cpumask_populate((struct cpumask *)mask, bits, CPUMASK_TEST_MASKLEN);
+	if (ret) {
+		err = 2;
+		goto out;
+	}
+
+	/*
+	 * Test is there to appease the verifier. We cannot directly
+	 * access NR_CPUS, the upper bound for nr_cpus, so we infer
+	 * it from the size of cpumask_t.
+	 */
+	if (nr_cpus < 0 || nr_cpus >= CPUMASK_TEST_MASKLEN * 8) {
+		err = 3;
+		goto out;
+	}
+
+	bpf_for(i, 0, nr_cpus) {
+		/* Odd-numbered bits should be set, even ones unset. */
+		bit = bpf_cpumask_test_cpu(i, (const struct cpumask *)mask);
+		if (bit == (i % 2 != 0))
+			continue;
+
+		err = 4;
+		break;
+	}
+
+out:
+	bpf_cpumask_release(mask);
+
+	return 0;
+}
+
+#undef CPUMASK_TEST_MASKLEN
diff --git a/tools/testing/selftests/bpf/progs/crypto_sanity.c b/tools/testing/selftests/bpf/progs/crypto_sanity.c
index 645be6cddf36..dfd8a258f14a 100644
--- a/tools/testing/selftests/bpf/progs/crypto_sanity.c
+++ b/tools/testing/selftests/bpf/progs/crypto_sanity.c
@@ -14,7 +14,7 @@ unsigned char key[256] = {};
 u16 udp_test_port = 7777;
 u32 authsize, key_len;
 char algo[128] = {};
-char dst[16] = {};
+char dst[16] = {}, dst_bad[8] = {};
 int status;
 
 static int skb_dynptr_validate(struct __sk_buff *skb, struct bpf_dynptr *psrc)
@@ -59,10 +59,9 @@ int skb_crypto_setup(void *ctx)
 		.authsize = authsize,
 	};
 	struct bpf_crypto_ctx *cctx;
-	int err = 0;
+	int err;
 
 	status = 0;
-
 	if (key_len > 256) {
 		status = -EINVAL;
 		return 0;
@@ -70,8 +69,8 @@ int skb_crypto_setup(void *ctx)
 
 	__builtin_memcpy(&params.algo, algo, sizeof(algo));
 	__builtin_memcpy(&params.key, key, sizeof(key));
-	cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
 
+	cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
 	if (!cctx) {
 		status = err;
 		return 0;
@@ -80,7 +79,6 @@ int skb_crypto_setup(void *ctx)
 	err = crypto_ctx_insert(cctx);
 	if (err && err != -EEXIST)
 		status = err;
-
 	return 0;
 }
 
@@ -92,6 +90,7 @@ int decrypt_sanity(struct __sk_buff *skb)
 	struct bpf_dynptr psrc, pdst;
 	int err;
 
+	status = 0;
 	err = skb_dynptr_validate(skb, &psrc);
 	if (err < 0) {
 		status = err;
@@ -110,13 +109,23 @@ int decrypt_sanity(struct __sk_buff *skb)
 		return TC_ACT_SHOT;
 	}
 
-	/* dst is a global variable to make testing part easier to check. In real
-	 * production code, a percpu map should be used to store the result.
+	/* Check also bad case where the dst buffer is smaller than the
+	 * skb's linear section.
+	 */
+	bpf_dynptr_from_mem(dst_bad, sizeof(dst_bad), 0, &pdst);
+	status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL);
+	if (!status)
+		status = -EIO;
+	if (status != -EINVAL)
+		goto err;
+
+	/* dst is a global variable to make testing part easier to check.
+	 * In real production code, a percpu map should be used to store
+	 * the result.
 	 */
 	bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
-
 	status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL);
-
+err:
 	return TC_ACT_SHOT;
 }
 
@@ -129,7 +138,6 @@ int encrypt_sanity(struct __sk_buff *skb)
 	int err;
 
 	status = 0;
-
 	err = skb_dynptr_validate(skb, &psrc);
 	if (err < 0) {
 		status = err;
@@ -148,13 +156,23 @@ int encrypt_sanity(struct __sk_buff *skb)
 		return TC_ACT_SHOT;
 	}
 
-	/* dst is a global variable to make testing part easier to check. In real
-	 * production code, a percpu map should be used to store the result.
+	/* Check also bad case where the dst buffer is smaller than the
+	 * skb's linear section.
+	 */
+	bpf_dynptr_from_mem(dst_bad, sizeof(dst_bad), 0, &pdst);
+	status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL);
+	if (!status)
+		status = -EIO;
+	if (status != -EINVAL)
+		goto err;
+
+	/* dst is a global variable to make testing part easier to check.
+	 * In real production code, a percpu map should be used to store
+	 * the result.
 	 */
 	bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
-
 	status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL);
-
+err:
 	return TC_ACT_SHOT;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/dmabuf_iter.c b/tools/testing/selftests/bpf/progs/dmabuf_iter.c
new file mode 100644
index 000000000000..13cdb11fdeb2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dmabuf_iter.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google LLC */
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+/* From uapi/linux/dma-buf.h */
+#define DMA_BUF_NAME_LEN 32
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(key_size, DMA_BUF_NAME_LEN);
+	__type(value, bool);
+	__uint(max_entries, 5);
+} testbuf_hash SEC(".maps");
+
+/*
+ * Fields output by this iterator are delimited by newlines. Convert any
+ * newlines in user-provided printed strings to spaces.
+ */
+static void sanitize_string(char *src, size_t size)
+{
+	for (char *c = src; (size_t)(c - src) < size && *c; ++c)
+		if (*c == '\n')
+			*c = ' ';
+}
+
+SEC("iter/dmabuf")
+int dmabuf_collector(struct bpf_iter__dmabuf *ctx)
+{
+	const struct dma_buf *dmabuf = ctx->dmabuf;
+	struct seq_file *seq = ctx->meta->seq;
+	unsigned long inode = 0;
+	size_t size;
+	const char *pname, *exporter;
+	char name[DMA_BUF_NAME_LEN] = {'\0'};
+
+	if (!dmabuf)
+		return 0;
+
+	if (BPF_CORE_READ_INTO(&inode, dmabuf, file, f_inode, i_ino) ||
+	    bpf_core_read(&size, sizeof(size), &dmabuf->size) ||
+	    bpf_core_read(&pname, sizeof(pname), &dmabuf->name) ||
+	    bpf_core_read(&exporter, sizeof(exporter), &dmabuf->exp_name))
+		return 1;
+
+	/* Buffers are not required to be named */
+	if (pname) {
+		if (bpf_probe_read_kernel(name, sizeof(name), pname))
+			return 1;
+
+		/* Name strings can be provided by userspace */
+		sanitize_string(name, sizeof(name));
+	}
+
+	BPF_SEQ_PRINTF(seq, "%lu\n%llu\n%s\n%s\n", inode, size, name, exporter);
+	return 0;
+}
+
+SEC("syscall")
+int iter_dmabuf_for_each(const void *ctx)
+{
+	struct dma_buf *d;
+
+	bpf_for_each(dmabuf, d) {
+		char name[DMA_BUF_NAME_LEN];
+		const char *pname;
+		bool *found;
+		long len;
+		int i;
+
+		if (bpf_core_read(&pname, sizeof(pname), &d->name))
+			return 1;
+
+		/* Buffers are not required to be named */
+		if (!pname)
+			continue;
+
+		len = bpf_probe_read_kernel_str(name, sizeof(name), pname);
+		if (len < 0)
+			return 1;
+
+		/*
+		 * The entire name buffer is used as a map key.
+		 * Zeroize any uninitialized trailing bytes after the NUL.
+		 */
+		bpf_for(i, len, DMA_BUF_NAME_LEN)
+			name[i] = 0;
+
+		found = bpf_map_lookup_elem(&testbuf_hash, name);
+		if (found) {
+			bool t = true;
+
+			bpf_map_update_elem(&testbuf_hash, name, &t, BPF_EXIST);
+		}
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index 8f36c9de7591..dda6a8dada82 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -149,7 +149,7 @@ int ringbuf_release_uninit_dynptr(void *ctx)
 
 /* A dynptr can't be used after it has been invalidated */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #3")
+__failure __msg("Expected an initialized dynptr as arg #2")
 int use_after_invalid(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -192,7 +192,7 @@ done:
 
 /* Can't add a dynptr to a map */
 SEC("?raw_tp")
-__failure __msg("invalid indirect read from stack")
+__failure __msg("invalid read from stack")
 int add_dynptr_to_map1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -210,7 +210,7 @@ int add_dynptr_to_map1(void *ctx)
 
 /* Can't add a struct with an embedded dynptr to a map */
 SEC("?raw_tp")
-__failure __msg("invalid indirect read from stack")
+__failure __msg("invalid read from stack")
 int add_dynptr_to_map2(void *ctx)
 {
 	struct test_info x;
@@ -269,6 +269,26 @@ int data_slice_out_of_bounds_skb(struct __sk_buff *skb)
 	return SK_PASS;
 }
 
+/* A metadata slice can't be accessed out of bounds */
+SEC("?tc")
+__failure __msg("value is outside of the allowed memory range")
+int data_slice_out_of_bounds_skb_meta(struct __sk_buff *skb)
+{
+	struct bpf_dynptr meta;
+	__u8 *md;
+
+	bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+	md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+	if (!md)
+		return SK_DROP;
+
+	/* this should fail */
+	*(md + 1) = 42;
+
+	return SK_PASS;
+}
+
 SEC("?raw_tp")
 __failure __msg("value is outside of the allowed memory range")
 int data_slice_out_of_bounds_map_value(void *ctx)
@@ -398,7 +418,7 @@ int data_slice_missing_null_check2(void *ctx)
  * dynptr argument
  */
 SEC("?raw_tp")
-__failure __msg("invalid indirect read from stack")
+__failure __msg("invalid read from stack")
 int invalid_helper1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -428,7 +448,7 @@ int invalid_helper2(void *ctx)
 
 /* A bpf_dynptr is invalidated if it's been written into */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #1")
+__failure __msg("Expected an initialized dynptr as arg #0")
 int invalid_write1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -1089,6 +1109,26 @@ int skb_invalid_slice_write(struct __sk_buff *skb)
 	return SK_PASS;
 }
 
+/* bpf_dynptr_slice()s are read-only and cannot be written to */
+SEC("?tc")
+__failure __msg("R{{[0-9]+}} cannot write into rdonly_mem")
+int skb_meta_invalid_slice_write(struct __sk_buff *skb)
+{
+	struct bpf_dynptr meta;
+	__u8 *md;
+
+	bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+	md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+	if (!md)
+		return SK_DROP;
+
+	/* this should fail */
+	*md = 42;
+
+	return SK_PASS;
+}
+
 /* The read-only data slice is invalidated whenever a helper changes packet data */
 SEC("?tc")
 __failure __msg("invalid mem access 'scalar'")
@@ -1192,6 +1232,188 @@ int skb_invalid_data_slice4(struct __sk_buff *skb)
 	return SK_PASS;
 }
 
+/* Read-only skb data slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+	struct bpf_dynptr data, meta;
+	__u8 *d;
+
+	bpf_dynptr_from_skb(skb, 0, &data);
+	bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+	d = bpf_dynptr_slice(&data, 0, NULL, sizeof(*d));
+	if (!d)
+		return SK_DROP;
+
+	bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+	/* this should fail */
+	val = *d;
+
+	return SK_PASS;
+}
+
+/* Read-write skb data slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+	struct bpf_dynptr data, meta;
+	__u8 *d;
+
+	bpf_dynptr_from_skb(skb, 0, &data);
+	bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+	d = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*d));
+	if (!d)
+		return SK_DROP;
+
+	bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+	/* this should fail */
+	*d = 42;
+
+	return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated on write to skb data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb)
+{
+	struct bpf_dynptr data, meta;
+	__u8 *md;
+
+	bpf_dynptr_from_skb(skb, 0, &data);
+	bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+	md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+	if (!md)
+		return SK_DROP;
+
+	bpf_dynptr_write(&data, 0, "x", 1, 0);
+
+	/* this should fail */
+	val = *md;
+
+	return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated on write to skb data slice */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb)
+{
+	struct bpf_dynptr data, meta;
+	__u8 *md;
+
+	bpf_dynptr_from_skb(skb, 0, &data);
+	bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+	md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+	if (!md)
+		return SK_DROP;
+
+	bpf_dynptr_write(&data, 0, "x", 1, 0);
+
+	/* this should fail */
+	*md = 42;
+
+	return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb)
+{
+	struct bpf_dynptr meta;
+	__u8 *md;
+
+	bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+	md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+	if (!md)
+		return SK_DROP;
+
+	if (bpf_skb_pull_data(skb, skb->len))
+		return SK_DROP;
+
+	/* this should fail */
+	val = *md;
+
+	return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb)
+{
+	struct bpf_dynptr meta;
+	__u8 *md;
+
+	bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+	md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+	if (!md)
+		return SK_DROP;
+
+	if (bpf_skb_pull_data(skb, skb->len))
+		return SK_DROP;
+
+	/* this should fail */
+	*md = 42;
+
+	return SK_PASS;
+}
+
+/* Read-only skb metadata slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int ro_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+	struct bpf_dynptr meta;
+	__u8 *md;
+
+	bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+	md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md));
+	if (!md)
+		return SK_DROP;
+
+	bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+	/* this should fail */
+	val = *md;
+
+	return SK_PASS;
+}
+
+/* Read-write skb metadata slice is invalidated on write to skb metadata */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int rw_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb)
+{
+	struct bpf_dynptr meta;
+	__u8 *md;
+
+	bpf_dynptr_from_skb_meta(skb, 0, &meta);
+
+	md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md));
+	if (!md)
+		return SK_DROP;
+
+	bpf_dynptr_write(&meta, 0, "x", 1, 0);
+
+	/* this should fail */
+	*md = 42;
+
+	return SK_PASS;
+}
+
 /* The read-only data slice is invalidated whenever a helper changes packet data */
 SEC("?xdp")
 __failure __msg("invalid mem access 'scalar'")
@@ -1255,6 +1477,19 @@ int skb_invalid_ctx(void *ctx)
 	return 0;
 }
 
+/* Only supported prog type can create skb_meta-type dynptrs */
+SEC("?raw_tp")
+__failure __msg("calling kernel function bpf_dynptr_from_skb_meta is not allowed")
+int skb_meta_invalid_ctx(void *ctx)
+{
+	struct bpf_dynptr meta;
+
+	/* this should fail */
+	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+
+	return 0;
+}
+
 SEC("fentry/skb_tx_error")
 __failure __msg("must be referenced or trusted")
 int BPF_PROG(skb_invalid_ctx_fentry, void *skb)
@@ -1407,7 +1642,7 @@ int invalid_slice_rdwr_rdonly(struct __sk_buff *skb)
 
 /* bpf_dynptr_adjust can only be called on initialized dynptrs */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #1")
+__failure __msg("Expected an initialized dynptr as arg #0")
 int dynptr_adjust_invalid(void *ctx)
 {
 	struct bpf_dynptr ptr = {};
@@ -1420,7 +1655,7 @@ int dynptr_adjust_invalid(void *ctx)
 
 /* bpf_dynptr_is_null can only be called on initialized dynptrs */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #1")
+__failure __msg("Expected an initialized dynptr as arg #0")
 int dynptr_is_null_invalid(void *ctx)
 {
 	struct bpf_dynptr ptr = {};
@@ -1433,7 +1668,7 @@ int dynptr_is_null_invalid(void *ctx)
 
 /* bpf_dynptr_is_rdonly can only be called on initialized dynptrs */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #1")
+__failure __msg("Expected an initialized dynptr as arg #0")
 int dynptr_is_rdonly_invalid(void *ctx)
 {
 	struct bpf_dynptr ptr = {};
@@ -1446,7 +1681,7 @@ int dynptr_is_rdonly_invalid(void *ctx)
 
 /* bpf_dynptr_size can only be called on initialized dynptrs */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #1")
+__failure __msg("Expected an initialized dynptr as arg #0")
 int dynptr_size_invalid(void *ctx)
 {
 	struct bpf_dynptr ptr = {};
@@ -1459,7 +1694,7 @@ int dynptr_size_invalid(void *ctx)
 
 /* Only initialized dynptrs can be cloned */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #1")
+__failure __msg("Expected an initialized dynptr as arg #0")
 int clone_invalid1(void *ctx)
 {
 	struct bpf_dynptr ptr1 = {};
@@ -1493,7 +1728,7 @@ int clone_invalid2(struct xdp_md *xdp)
 
 /* Invalidating a dynptr should invalidate its clones */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #3")
+__failure __msg("Expected an initialized dynptr as arg #2")
 int clone_invalidate1(void *ctx)
 {
 	struct bpf_dynptr clone;
@@ -1514,7 +1749,7 @@ int clone_invalidate1(void *ctx)
 
 /* Invalidating a dynptr should invalidate its parent */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #3")
+__failure __msg("Expected an initialized dynptr as arg #2")
 int clone_invalidate2(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -1535,7 +1770,7 @@ int clone_invalidate2(void *ctx)
 
 /* Invalidating a dynptr should invalidate its siblings */
 SEC("?raw_tp")
-__failure __msg("Expected an initialized dynptr as arg #3")
+__failure __msg("Expected an initialized dynptr as arg #2")
 int clone_invalidate3(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -1665,6 +1900,29 @@ int clone_skb_packet_data(struct __sk_buff *skb)
 	return 0;
 }
 
+/* A skb clone's metadata slice becomes invalid anytime packet data changes */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int clone_skb_packet_meta(struct __sk_buff *skb)
+{
+	struct bpf_dynptr clone, meta;
+	__u8 *md;
+
+	bpf_dynptr_from_skb_meta(skb, 0, &meta);
+	bpf_dynptr_clone(&meta, &clone);
+	md = bpf_dynptr_slice_rdwr(&clone, 0, NULL, sizeof(*md));
+	if (!md)
+		return SK_DROP;
+
+	if (bpf_skb_pull_data(skb, skb->len))
+		return SK_DROP;
+
+	/* this should fail */
+	*md = 42;
+
+	return 0;
+}
+
 /* A xdp clone's data slices should be invalid anytime packet data changes */
 SEC("?xdp")
 __failure __msg("invalid mem access 'scalar'")
@@ -1723,7 +1981,7 @@ __noinline long global_call_bpf_dynptr(const struct bpf_dynptr *dynptr)
 }
 
 SEC("?raw_tp")
-__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr")
+__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr")
 int test_dynptr_reg_type(void *ctx)
 {
 	struct task_struct *current = NULL;
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
index bfcc85686cf0..e0d672d93adf 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_success.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -1,20 +1,21 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2022 Facebook */
 
+#include <vmlinux.h>
 #include <string.h>
 #include <stdbool.h>
-#include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_misc.h"
-#include "bpf_kfuncs.h"
 #include "errno.h"
 
+#define PAGE_SIZE_64K 65536
+
 char _license[] SEC("license") = "GPL";
 
 int pid, err, val;
 
-struct sample {
+struct ringbuf_sample {
 	int pid;
 	int seq;
 	long value;
@@ -121,7 +122,7 @@ int test_dynptr_data(void *ctx)
 
 static int ringbuf_callback(__u32 index, void *data)
 {
-	struct sample *sample;
+	struct ringbuf_sample *sample;
 
 	struct bpf_dynptr *ptr = (struct bpf_dynptr *)data;
 
@@ -138,7 +139,7 @@ SEC("?tp/syscalls/sys_enter_nanosleep")
 int test_ringbuf(void *ctx)
 {
 	struct bpf_dynptr ptr;
-	struct sample *sample;
+	struct ringbuf_sample *sample;
 
 	if (bpf_get_current_pid_tgid() >> 32 != pid)
 		return 0;
@@ -210,6 +211,61 @@ int test_dynptr_skb_data(struct __sk_buff *skb)
 	return 1;
 }
 
+SEC("?tc")
+int test_dynptr_skb_meta_data(struct __sk_buff *skb)
+{
+	struct bpf_dynptr meta;
+	__u8 *md;
+	int ret;
+
+	err = 1;
+	ret = bpf_dynptr_from_skb_meta(skb, 0, &meta);
+	if (ret)
+		return 1;
+
+	/* This should return NULL. Must use bpf_dynptr_slice API */
+	err = 2;
+	md = bpf_dynptr_data(&meta, 0, sizeof(*md));
+	if (md)
+		return 1;
+
+	err = 0;
+	return 1;
+}
+
+/* Check that skb metadata dynptr ops don't accept any flags. */
+SEC("?tc")
+int test_dynptr_skb_meta_flags(struct __sk_buff *skb)
+{
+	const __u64 INVALID_FLAGS = ~0ULL;
+	struct bpf_dynptr meta;
+	__u8 buf;
+	int ret;
+
+	err = 1;
+	ret = bpf_dynptr_from_skb_meta(skb, INVALID_FLAGS, &meta);
+	if (ret != -EINVAL)
+		return 1;
+
+	err = 2;
+	ret = bpf_dynptr_from_skb_meta(skb, 0, &meta);
+	if (ret)
+		return 1;
+
+	err = 3;
+	ret = bpf_dynptr_read(&buf, 0, &meta, 0, INVALID_FLAGS);
+	if (ret != -EINVAL)
+		return 1;
+
+	err = 4;
+	ret = bpf_dynptr_write(&meta, 0, &buf, 0, INVALID_FLAGS);
+	if (ret != -EINVAL)
+		return 1;
+
+	err = 0;
+	return 1;
+}
+
 SEC("tp/syscalls/sys_enter_nanosleep")
 int test_adjust(void *ctx)
 {
@@ -567,3 +623,515 @@ int BPF_PROG(test_dynptr_skb_tp_btf, void *skb, void *location)
 
 	return 1;
 }
+
+static inline int bpf_memcmp(const char *a, const char *b, u32 size)
+{
+	int i;
+
+	bpf_for(i, 0, size) {
+		if (a[i] != b[i])
+			return a[i] < b[i] ? -1 : 1;
+	}
+	return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_copy(void *ctx)
+{
+	char data[] = "hello there, world!!";
+	char buf[32] = {'\0'};
+	__u32 sz = sizeof(data);
+	struct bpf_dynptr src, dst;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sz, 0, &src);
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sz, 0, &dst);
+
+	/* Test basic case of copying contiguous memory backed dynptrs */
+	err = bpf_dynptr_write(&src, 0, data, sz, 0);
+	err = err ?: bpf_dynptr_copy(&dst, 0, &src, 0, sz);
+	err = err ?: bpf_dynptr_read(buf, sz, &dst, 0, 0);
+	err = err ?: bpf_memcmp(data, buf, sz);
+
+	/* Test that offsets are handled correctly */
+	err = err ?: bpf_dynptr_copy(&dst, 3, &src, 5, sz - 5);
+	err = err ?: bpf_dynptr_read(buf, sz - 5, &dst, 3, 0);
+	err = err ?: bpf_memcmp(data + 5, buf, sz - 5);
+
+	bpf_ringbuf_discard_dynptr(&src, 0);
+	bpf_ringbuf_discard_dynptr(&dst, 0);
+	return 0;
+}
+
+SEC("xdp")
+int test_dynptr_copy_xdp(struct xdp_md *xdp)
+{
+	struct bpf_dynptr ptr_buf, ptr_xdp;
+	char data[] = "qwertyuiopasdfghjkl";
+	char buf[32] = {'\0'};
+	__u32 len = sizeof(data), xdp_data_size;
+	int i, chunks = 200;
+
+	/* ptr_xdp is backed by non-contiguous memory */
+	bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+	xdp_data_size = bpf_dynptr_size(&ptr_xdp);
+	bpf_ringbuf_reserve_dynptr(&ringbuf, len * chunks, 0, &ptr_buf);
+
+	/* Destination dynptr is backed by non-contiguous memory */
+	bpf_for(i, 0, chunks) {
+		err = bpf_dynptr_write(&ptr_buf, i * len, data, len, 0);
+		if (err)
+			goto out;
+	}
+
+	err = bpf_dynptr_copy(&ptr_xdp, 0, &ptr_buf, 0, len * chunks);
+	if (err)
+		goto out;
+
+	bpf_for(i, 0, chunks) {
+		__builtin_memset(buf, 0, sizeof(buf));
+		err = bpf_dynptr_read(&buf, len, &ptr_xdp, i * len, 0);
+		if (err)
+			goto out;
+		if (bpf_memcmp(data, buf, len) != 0)
+			goto out;
+	}
+
+	/* Source dynptr is backed by non-contiguous memory */
+	__builtin_memset(buf, 0, sizeof(buf));
+	bpf_for(i, 0, chunks) {
+		err = bpf_dynptr_write(&ptr_buf, i * len, buf, len, 0);
+		if (err)
+			goto out;
+	}
+
+	err = bpf_dynptr_copy(&ptr_buf, 0, &ptr_xdp, 0, len * chunks);
+	if (err)
+		goto out;
+
+	bpf_for(i, 0, chunks) {
+		__builtin_memset(buf, 0, sizeof(buf));
+		err = bpf_dynptr_read(&buf, len, &ptr_buf, i * len, 0);
+		if (err)
+			goto out;
+		if (bpf_memcmp(data, buf, len) != 0)
+			goto out;
+	}
+
+	/* Both source and destination dynptrs are backed by non-contiguous memory */
+	err = bpf_dynptr_copy(&ptr_xdp, 2, &ptr_xdp, len, len * (chunks - 1));
+	if (err)
+		goto out;
+
+	bpf_for(i, 0, chunks - 1) {
+		__builtin_memset(buf, 0, sizeof(buf));
+		err = bpf_dynptr_read(&buf, len, &ptr_xdp, 2 + i * len, 0);
+		if (err)
+			goto out;
+		if (bpf_memcmp(data, buf, len) != 0)
+			goto out;
+	}
+
+	if (bpf_dynptr_copy(&ptr_xdp, xdp_data_size - 3000, &ptr_xdp, 0, len * chunks) != -E2BIG)
+		err = 1;
+
+out:
+	bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+	return XDP_DROP;
+}
+
+char memset_zero_data[] = "data to be zeroed";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_zero(void *ctx)
+{
+	__u32 data_sz = sizeof(memset_zero_data);
+	char zeroes[32] = {'\0'};
+	struct bpf_dynptr ptr;
+
+	err = bpf_dynptr_from_mem(memset_zero_data, data_sz, 0, &ptr);
+	err = err ?: bpf_dynptr_memset(&ptr, 0, data_sz, 0);
+	err = err ?: bpf_memcmp(zeroes, memset_zero_data, data_sz);
+
+	return 0;
+}
+
+#define DYNPTR_MEMSET_VAL 42
+
+char memset_notzero_data[] = "data to be overwritten";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_notzero(void *ctx)
+{
+	u32 data_sz = sizeof(memset_notzero_data);
+	struct bpf_dynptr ptr;
+	char expected[32];
+
+	__builtin_memset(expected, DYNPTR_MEMSET_VAL, data_sz);
+
+	err = bpf_dynptr_from_mem(memset_notzero_data, data_sz, 0, &ptr);
+	err = err ?: bpf_dynptr_memset(&ptr, 0, data_sz, DYNPTR_MEMSET_VAL);
+	err = err ?: bpf_memcmp(expected, memset_notzero_data, data_sz);
+
+	return 0;
+}
+
+char memset_zero_offset_data[] = "data to be zeroed partially";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_zero_offset(void *ctx)
+{
+	char expected[] = "data to \0\0\0\0eroed partially";
+	__u32 data_sz = sizeof(memset_zero_offset_data);
+	struct bpf_dynptr ptr;
+
+	err = bpf_dynptr_from_mem(memset_zero_offset_data, data_sz, 0, &ptr);
+	err = err ?: bpf_dynptr_memset(&ptr, 8, 4, 0);
+	err = err ?: bpf_memcmp(expected, memset_zero_offset_data, data_sz);
+
+	return 0;
+}
+
+char memset_zero_adjusted_data[] = "data to be zeroed partially";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_zero_adjusted(void *ctx)
+{
+	char expected[] = "data\0\0\0\0be zeroed partially";
+	__u32 data_sz = sizeof(memset_zero_adjusted_data);
+	struct bpf_dynptr ptr;
+
+	err = bpf_dynptr_from_mem(memset_zero_adjusted_data, data_sz, 0, &ptr);
+	err = err ?: bpf_dynptr_adjust(&ptr, 4, 8);
+	err = err ?: bpf_dynptr_memset(&ptr, 0, bpf_dynptr_size(&ptr), 0);
+	err = err ?: bpf_memcmp(expected, memset_zero_adjusted_data, data_sz);
+
+	return 0;
+}
+
+char memset_overflow_data[] = "memset overflow data";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_overflow(void *ctx)
+{
+	__u32 data_sz = sizeof(memset_overflow_data);
+	struct bpf_dynptr ptr;
+	int ret;
+
+	err = bpf_dynptr_from_mem(memset_overflow_data, data_sz, 0, &ptr);
+	ret = bpf_dynptr_memset(&ptr, 0, data_sz + 1, 0);
+	if (ret != -E2BIG)
+		err = 1;
+
+	return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_memset_overflow_offset(void *ctx)
+{
+	__u32 data_sz = sizeof(memset_overflow_data);
+	struct bpf_dynptr ptr;
+	int ret;
+
+	err = bpf_dynptr_from_mem(memset_overflow_data, data_sz, 0, &ptr);
+	ret = bpf_dynptr_memset(&ptr, 1, data_sz, 0);
+	if (ret != -E2BIG)
+		err = 1;
+
+	return 0;
+}
+
+SEC("?cgroup_skb/egress")
+int test_dynptr_memset_readonly(struct __sk_buff *skb)
+{
+	struct bpf_dynptr ptr;
+	int ret;
+
+	err = bpf_dynptr_from_skb(skb, 0, &ptr);
+
+	/* cgroup skbs are read only, memset should fail */
+	ret = bpf_dynptr_memset(&ptr, 0, bpf_dynptr_size(&ptr), 0);
+	if (ret != -EINVAL)
+		err = 1;
+
+	return 0;
+}
+
+#define min_t(type, x, y) ({		\
+	type __x = (x);			\
+	type __y = (y);			\
+	__x < __y ? __x : __y; })
+
+SEC("xdp")
+int test_dynptr_memset_xdp_chunks(struct xdp_md *xdp)
+{
+	u32 data_sz, chunk_sz, offset = 0;
+	const int max_chunks = 200;
+	struct bpf_dynptr ptr_xdp;
+	char expected_buf[32];
+	char buf[32];
+	int i;
+
+	__builtin_memset(expected_buf, DYNPTR_MEMSET_VAL, sizeof(expected_buf));
+
+	/* ptr_xdp is backed by non-contiguous memory */
+	bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+	data_sz = bpf_dynptr_size(&ptr_xdp);
+
+	err = bpf_dynptr_memset(&ptr_xdp, 0, data_sz, DYNPTR_MEMSET_VAL);
+	if (err) {
+		/* bpf_dynptr_memset() eventually called bpf_xdp_pointer()
+		 * where if data_sz is greater than 0xffff, -EFAULT will be
+		 * returned. For 64K page size, data_sz is greater than
+		 * 64K, so error is expected and let us zero out error and
+		 * return success.
+		 */
+		if (data_sz >= PAGE_SIZE_64K)
+			err = 0;
+		goto out;
+	}
+
+	bpf_for(i, 0, max_chunks) {
+		offset = i * sizeof(buf);
+		if (offset >= data_sz)
+			goto out;
+		chunk_sz = min_t(u32, sizeof(buf), data_sz - offset);
+		err = bpf_dynptr_read(&buf, chunk_sz, &ptr_xdp, offset, 0);
+		if (err)
+			goto out;
+		err = bpf_memcmp(buf, expected_buf, sizeof(buf));
+		if (err)
+			goto out;
+	}
+out:
+	return XDP_DROP;
+}
+
+void *user_ptr;
+/* Contains the copy of the data pointed by user_ptr.
+ * Size 384 to make it not fit into a single kernel chunk when copying
+ * but less than the maximum bpf stack size (512).
+ */
+char expected_str[384];
+__u32 test_len[7] = {0/* placeholder */, 0, 1, 2, 255, 256, 257};
+
+typedef int (*bpf_read_dynptr_fn_t)(struct bpf_dynptr *dptr, u64 off,
+				    u64 size, const void *unsafe_ptr);
+
+/* Returns the offset just before the end of the maximum sized xdp fragment.
+ * Any write larger than 32 bytes will be split between 2 fragments.
+ */
+__u32 xdp_near_frag_end_offset(void)
+{
+	const __u32 headroom = 256;
+	const __u32 max_frag_size =  __PAGE_SIZE - headroom - sizeof(struct skb_shared_info);
+
+	/* 32 bytes before the approximate end of the fragment */
+	return max_frag_size - 32;
+}
+
+/* Use __always_inline on test_dynptr_probe[_str][_xdp]() and callbacks
+ * of type bpf_read_dynptr_fn_t to prevent compiler from generating
+ * indirect calls that make program fail to load with "unknown opcode" error.
+ */
+static __always_inline void test_dynptr_probe(void *ptr, bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+	char buf[sizeof(expected_str)];
+	struct bpf_dynptr ptr_buf;
+	int i;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return;
+
+	err = bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(buf), 0, &ptr_buf);
+
+	bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+		__u32 len = test_len[i];
+
+		err = err ?: bpf_read_dynptr_fn(&ptr_buf, 0, test_len[i], ptr);
+		if (len > sizeof(buf))
+			break;
+		err = err ?: bpf_dynptr_read(&buf, len, &ptr_buf, 0, 0);
+
+		if (err || bpf_memcmp(expected_str, buf, len))
+			err = 1;
+
+		/* Reset buffer and dynptr */
+		__builtin_memset(buf, 0, sizeof(buf));
+		err = err ?: bpf_dynptr_write(&ptr_buf, 0, buf, len, 0);
+	}
+	bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+}
+
+static __always_inline void test_dynptr_probe_str(void *ptr,
+						  bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+	char buf[sizeof(expected_str)];
+	struct bpf_dynptr ptr_buf;
+	__u32 cnt, i;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(buf), 0, &ptr_buf);
+
+	bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+		__u32 len = test_len[i];
+
+		cnt = bpf_read_dynptr_fn(&ptr_buf, 0, len, ptr);
+		if (cnt != len)
+			err = 1;
+
+		if (len > sizeof(buf))
+			continue;
+		err = err ?: bpf_dynptr_read(&buf, len, &ptr_buf, 0, 0);
+		if (!len)
+			continue;
+		if (err || bpf_memcmp(expected_str, buf, len - 1) || buf[len - 1] != '\0')
+			err = 1;
+	}
+	bpf_ringbuf_discard_dynptr(&ptr_buf, 0);
+}
+
+static __always_inline void test_dynptr_probe_xdp(struct xdp_md *xdp, void *ptr,
+						  bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+	struct bpf_dynptr ptr_xdp;
+	char buf[sizeof(expected_str)];
+	__u32 off, i;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return;
+
+	off = xdp_near_frag_end_offset();
+	err = bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+
+	bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+		__u32 len = test_len[i];
+
+		err = err ?: bpf_read_dynptr_fn(&ptr_xdp, off, len, ptr);
+		if (len > sizeof(buf))
+			continue;
+		err = err ?: bpf_dynptr_read(&buf, len, &ptr_xdp, off, 0);
+		if (err || bpf_memcmp(expected_str, buf, len))
+			err = 1;
+		/* Reset buffer and dynptr */
+		__builtin_memset(buf, 0, sizeof(buf));
+		err = err ?: bpf_dynptr_write(&ptr_xdp, off, buf, len, 0);
+	}
+}
+
+static __always_inline void test_dynptr_probe_str_xdp(struct xdp_md *xdp, void *ptr,
+						      bpf_read_dynptr_fn_t bpf_read_dynptr_fn)
+{
+	struct bpf_dynptr ptr_xdp;
+	char buf[sizeof(expected_str)];
+	__u32 cnt, off, i;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return;
+
+	off = xdp_near_frag_end_offset();
+	err = bpf_dynptr_from_xdp(xdp, 0, &ptr_xdp);
+	if (err)
+		return;
+
+	bpf_for(i, 0, ARRAY_SIZE(test_len)) {
+		__u32 len = test_len[i];
+
+		cnt = bpf_read_dynptr_fn(&ptr_xdp, off, len, ptr);
+		if (cnt != len)
+			err = 1;
+
+		if (len > sizeof(buf))
+			continue;
+		err = err ?: bpf_dynptr_read(&buf, len, &ptr_xdp, off, 0);
+
+		if (!len)
+			continue;
+		if (err || bpf_memcmp(expected_str, buf, len - 1) || buf[len - 1] != '\0')
+			err = 1;
+
+		__builtin_memset(buf, 0, sizeof(buf));
+		err = err ?: bpf_dynptr_write(&ptr_xdp, off, buf, len, 0);
+	}
+}
+
+SEC("xdp")
+int test_probe_read_user_dynptr(struct xdp_md *xdp)
+{
+	test_dynptr_probe(user_ptr, bpf_probe_read_user_dynptr);
+	if (!err)
+		test_dynptr_probe_xdp(xdp, user_ptr, bpf_probe_read_user_dynptr);
+	return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_kernel_dynptr(struct xdp_md *xdp)
+{
+	test_dynptr_probe(expected_str, bpf_probe_read_kernel_dynptr);
+	if (!err)
+		test_dynptr_probe_xdp(xdp, expected_str, bpf_probe_read_kernel_dynptr);
+	return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_user_str_dynptr(struct xdp_md *xdp)
+{
+	test_dynptr_probe_str(user_ptr, bpf_probe_read_user_str_dynptr);
+	if (!err)
+		test_dynptr_probe_str_xdp(xdp, user_ptr, bpf_probe_read_user_str_dynptr);
+	return XDP_PASS;
+}
+
+SEC("xdp")
+int test_probe_read_kernel_str_dynptr(struct xdp_md *xdp)
+{
+	test_dynptr_probe_str(expected_str, bpf_probe_read_kernel_str_dynptr);
+	if (!err)
+		test_dynptr_probe_str_xdp(xdp, expected_str, bpf_probe_read_kernel_str_dynptr);
+	return XDP_PASS;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_dynptr(void *ctx)
+{
+	test_dynptr_probe(user_ptr, bpf_copy_from_user_dynptr);
+	return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_str_dynptr(void *ctx)
+{
+	test_dynptr_probe_str(user_ptr, bpf_copy_from_user_str_dynptr);
+	return 0;
+}
+
+static int bpf_copy_data_from_user_task(struct bpf_dynptr *dptr, u64 off,
+					u64 size, const void *unsafe_ptr)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+
+	return bpf_copy_from_user_task_dynptr(dptr, off, size, unsafe_ptr, task);
+}
+
+static int bpf_copy_data_from_user_task_str(struct bpf_dynptr *dptr, u64 off,
+					    u64 size, const void *unsafe_ptr)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+
+	return bpf_copy_from_user_task_str_dynptr(dptr, off, size, unsafe_ptr, task);
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_task_dynptr(void *ctx)
+{
+	test_dynptr_probe(user_ptr, bpf_copy_data_from_user_task);
+	return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int test_copy_from_user_task_str_dynptr(void *ctx)
+{
+	test_dynptr_probe_str(user_ptr, bpf_copy_data_from_user_task_str);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/epilogue_exit.c b/tools/testing/selftests/bpf/progs/epilogue_exit.c
index 33d3a57bee90..35fec7c75bef 100644
--- a/tools/testing/selftests/bpf/progs/epilogue_exit.c
+++ b/tools/testing/selftests/bpf/progs/epilogue_exit.c
@@ -4,8 +4,8 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/epilogue_tailcall.c b/tools/testing/selftests/bpf/progs/epilogue_tailcall.c
index 7275dd594de0..153514691ba4 100644
--- a/tools/testing/selftests/bpf/progs/epilogue_tailcall.c
+++ b/tools/testing/selftests/bpf/progs/epilogue_tailcall.c
@@ -4,8 +4,8 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
index 5e0a1ca96d4e..a01c2736890f 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_assert.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -18,43 +18,43 @@
 		return *(u64 *)num;					\
 	}
 
-__msg(": R0_w=0xffffffff80000000")
+__msg(": R0=0xffffffff80000000")
 check_assert(s64, ==, eq_int_min, INT_MIN);
-__msg(": R0_w=0x7fffffff")
+__msg(": R0=0x7fffffff")
 check_assert(s64, ==, eq_int_max, INT_MAX);
-__msg(": R0_w=0")
+__msg(": R0=0")
 check_assert(s64, ==, eq_zero, 0);
-__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000")
+__msg(": R0=0x8000000000000000 R1=0x8000000000000000")
 check_assert(s64, ==, eq_llong_min, LLONG_MIN);
-__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff")
+__msg(": R0=0x7fffffffffffffff R1=0x7fffffffffffffff")
 check_assert(s64, ==, eq_llong_max, LLONG_MAX);
 
-__msg(": R0_w=scalar(id=1,smax=0x7ffffffe)")
+__msg(": R0=scalar(id=1,smax=0x7ffffffe)")
 check_assert(s64, <, lt_pos, INT_MAX);
-__msg(": R0_w=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
 check_assert(s64, <, lt_zero, 0);
-__msg(": R0_w=scalar(id=1,smax=0xffffffff7fffffff")
+__msg(": R0=scalar(id=1,smax=0xffffffff7fffffff")
 check_assert(s64, <, lt_neg, INT_MIN);
 
-__msg(": R0_w=scalar(id=1,smax=0x7fffffff)")
+__msg(": R0=scalar(id=1,smax=0x7fffffff)")
 check_assert(s64, <=, le_pos, INT_MAX);
-__msg(": R0_w=scalar(id=1,smax=0)")
+__msg(": R0=scalar(id=1,smax=0)")
 check_assert(s64, <=, le_zero, 0);
-__msg(": R0_w=scalar(id=1,smax=0xffffffff80000000")
+__msg(": R0=scalar(id=1,smax=0xffffffff80000000")
 check_assert(s64, <=, le_neg, INT_MIN);
 
-__msg(": R0_w=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
 check_assert(s64, >, gt_pos, INT_MAX);
-__msg(": R0_w=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
 check_assert(s64, >, gt_zero, 0);
-__msg(": R0_w=scalar(id=1,smin=0xffffffff80000001")
+__msg(": R0=scalar(id=1,smin=0xffffffff80000001")
 check_assert(s64, >, gt_neg, INT_MIN);
 
-__msg(": R0_w=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
 check_assert(s64, >=, ge_pos, INT_MAX);
-__msg(": R0_w=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+__msg(": R0=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
 check_assert(s64, >=, ge_zero, 0);
-__msg(": R0_w=scalar(id=1,smin=0xffffffff80000000")
+__msg(": R0=scalar(id=1,smin=0xffffffff80000000")
 check_assert(s64, >=, ge_neg, INT_MIN);
 
 SEC("?tc")
diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c
index fe0f3fa5aab6..8a0fdff89927 100644
--- a/tools/testing/selftests/bpf/progs/exceptions_fail.c
+++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c
@@ -131,7 +131,7 @@ int reject_subprog_with_lock(void *ctx)
 }
 
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_rcu_read_lock-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_rcu_read_lock-ed region")
 int reject_with_rcu_read_lock(void *ctx)
 {
 	bpf_rcu_read_lock();
@@ -147,7 +147,7 @@ __noinline static int throwing_subprog(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_rcu_read_lock-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_rcu_read_lock-ed region")
 int reject_subprog_with_rcu_read_lock(void *ctx)
 {
 	bpf_rcu_read_lock();
diff --git a/tools/testing/selftests/bpf/progs/fd_htab_lookup.c b/tools/testing/selftests/bpf/progs/fd_htab_lookup.c
new file mode 100644
index 000000000000..a4a9e1db626f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fd_htab_lookup.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct inner_map_type {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(key_size, 4);
+	__uint(value_size, 4);
+	__uint(max_entries, 1);
+} inner_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+	__uint(max_entries, 64);
+	__type(key, int);
+	__type(value, int);
+	__array(values, struct inner_map_type);
+} outer_map SEC(".maps") = {
+	.values = {
+		[0] = &inner_map,
+	},
+};
diff --git a/tools/testing/selftests/bpf/progs/file_reader.c b/tools/testing/selftests/bpf/progs/file_reader.c
new file mode 100644
index 000000000000..4d756b623557
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/file_reader.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct elem);
+} arrmap SEC(".maps");
+
+struct elem {
+	struct file *file;
+	struct bpf_task_work tw;
+};
+
+char user_buf[256000];
+char tmp_buf[256000];
+
+int pid = 0;
+int err, run_success = 0;
+
+static int validate_file_read(struct file *file);
+static int task_work_callback(struct bpf_map *map, void *key, void *value);
+
+SEC("lsm/file_open")
+int on_open_expect_fault(void *c)
+{
+	struct bpf_dynptr dynptr;
+	struct file *file;
+	int local_err = 1;
+	__u32 user_buf_sz = sizeof(user_buf);
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	file = bpf_get_task_exe_file(bpf_get_current_task_btf());
+	if (!file)
+		return 0;
+
+	if (bpf_dynptr_from_file(file, 0, &dynptr))
+		goto out;
+
+	local_err = bpf_dynptr_read(tmp_buf, user_buf_sz, &dynptr, user_buf_sz, 0);
+	if (local_err == -EFAULT) { /* Expect page fault */
+		local_err = 0;
+		run_success = 1;
+	}
+out:
+	bpf_dynptr_file_discard(&dynptr);
+	if (local_err)
+		err = local_err;
+	bpf_put_file(file);
+	return 0;
+}
+
+SEC("lsm/file_open")
+int on_open_validate_file_read(void *c)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+	struct elem *work;
+	int key = 0;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	work = bpf_map_lookup_elem(&arrmap, &key);
+	if (!work) {
+		err = 1;
+		return 0;
+	}
+	bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, task_work_callback, NULL);
+	return 0;
+}
+
+/* Called in a sleepable context, read 256K bytes, cross check with user space read data */
+static int task_work_callback(struct bpf_map *map, void *key, void *value)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+	struct file *file = bpf_get_task_exe_file(task);
+
+	if (!file)
+		return 0;
+
+	err = validate_file_read(file);
+	if (!err)
+		run_success = 1;
+	bpf_put_file(file);
+	return 0;
+}
+
+static int verify_dynptr_read(struct bpf_dynptr *ptr, u32 off, char *user_buf, u32 len)
+{
+	int i;
+
+	if (bpf_dynptr_read(tmp_buf, len, ptr, off, 0))
+		return 1;
+
+	/* Verify file contents read from BPF is the same as the one read from userspace */
+	bpf_for(i, 0, len)
+	{
+		if (tmp_buf[i] != user_buf[i])
+			return 1;
+	}
+	return 0;
+}
+
+static int validate_file_read(struct file *file)
+{
+	struct bpf_dynptr dynptr;
+	int loc_err = 1, off;
+	__u32 user_buf_sz = sizeof(user_buf);
+
+	if (bpf_dynptr_from_file(file, 0, &dynptr))
+		goto cleanup;
+
+	loc_err = verify_dynptr_read(&dynptr, 0, user_buf, user_buf_sz);
+	off = 1;
+	loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, user_buf_sz - off);
+	off = user_buf_sz - 1;
+	loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, user_buf_sz - off);
+	/* Read file with random offset and length */
+	off = 4097;
+	loc_err = loc_err ?: verify_dynptr_read(&dynptr, off, user_buf + off, 100);
+
+	/* Adjust dynptr, verify read */
+	loc_err = loc_err ?: bpf_dynptr_adjust(&dynptr, off, off + 1);
+	loc_err = loc_err ?: verify_dynptr_read(&dynptr, 0, user_buf + off, 1);
+	/* Can't read more than 1 byte */
+	loc_err = loc_err ?: verify_dynptr_read(&dynptr, 0, user_buf + off, 2) == 0;
+	/* Can't read with far offset */
+	loc_err = loc_err ?: verify_dynptr_read(&dynptr, 1, user_buf + off, 1) == 0;
+
+cleanup:
+	bpf_dynptr_file_discard(&dynptr);
+	return loc_err;
+}
diff --git a/tools/testing/selftests/bpf/progs/file_reader_fail.c b/tools/testing/selftests/bpf/progs/file_reader_fail.c
new file mode 100644
index 000000000000..32fe28ed2439
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/file_reader_fail.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+int err;
+void *user_ptr;
+
+SEC("lsm/file_open")
+__failure
+__msg("Unreleased reference id=")
+int on_nanosleep_unreleased_ref(void *ctx)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+	struct file *file = bpf_get_task_exe_file(task);
+	struct bpf_dynptr dynptr;
+
+	if (!file)
+		return 0;
+
+	err = bpf_dynptr_from_file(file, 0, &dynptr);
+	return err ? 1 : 0;
+}
+
+SEC("xdp")
+__failure
+__msg("Expected a dynptr of type file as arg #0")
+int xdp_wrong_dynptr_type(struct xdp_md *xdp)
+{
+	struct bpf_dynptr dynptr;
+
+	bpf_dynptr_from_xdp(xdp, 0, &dynptr);
+	bpf_dynptr_file_discard(&dynptr);
+	return 0;
+}
+
+SEC("xdp")
+__failure
+__msg("Expected an initialized dynptr as arg #0")
+int xdp_no_dynptr_type(struct xdp_md *xdp)
+{
+	struct bpf_dynptr dynptr;
+
+	bpf_dynptr_file_discard(&dynptr);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/find_vma.c b/tools/testing/selftests/bpf/progs/find_vma.c
index 38034fb82530..02b82774469c 100644
--- a/tools/testing/selftests/bpf/progs/find_vma.c
+++ b/tools/testing/selftests/bpf/progs/find_vma.c
@@ -25,7 +25,7 @@ static long check_vma(struct task_struct *task, struct vm_area_struct *vma,
 {
 	if (vma->vm_file)
 		bpf_probe_read_kernel_str(d_iname, DNAME_INLINE_LEN - 1,
-					  vma->vm_file->f_path.dentry->d_iname);
+					  vma->vm_file->f_path.dentry->d_shortname.string);
 
 	/* check for VM_EXEC */
 	if (vma->vm_flags & VM_EXEC)
diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_modify.c b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c
new file mode 100644
index 000000000000..82307166f789
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Intel Corporation */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 128);
+	__type(key, __u64);
+	__type(value, __u64);
+} hashmap SEC(".maps");
+
+static int cb(struct bpf_map *map, __u64 *key, __u64 *val, void *arg)
+{
+	bpf_map_delete_elem(map, key);
+	bpf_map_update_elem(map, key, val, 0);
+	return 0;
+}
+
+SEC("tc")
+int test_pkt_access(struct __sk_buff *skb)
+{
+	(void)skb;
+
+	bpf_for_each_map_elem(&hashmap, cb, NULL, 0);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/free_timer.c b/tools/testing/selftests/bpf/progs/free_timer.c
new file mode 100644
index 000000000000..4501ae8fc414
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/free_timer.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <time.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#define MAX_ENTRIES 8
+
+struct map_value {
+	struct bpf_timer timer;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, MAX_ENTRIES);
+} map SEC(".maps");
+
+static int timer_cb(void *map, void *key, struct map_value *value)
+{
+	volatile int sum = 0;
+	int i;
+
+	bpf_for(i, 0, 1024 * 1024) sum += i;
+
+	return 0;
+}
+
+static int start_cb(int key)
+{
+	struct map_value *value;
+
+	value = bpf_map_lookup_elem(&map, (void *)&key);
+	if (!value)
+		return 0;
+
+	bpf_timer_init(&value->timer, &map, CLOCK_MONOTONIC);
+	bpf_timer_set_callback(&value->timer, timer_cb);
+	/* Hope 100us will be enough to wake-up and run the overwrite thread */
+	bpf_timer_start(&value->timer, 100000, BPF_F_TIMER_CPU_PIN);
+
+	return 0;
+}
+
+static int overwrite_cb(int key)
+{
+	struct map_value zero = {};
+
+	/* Free the timer which may run on other CPU */
+	bpf_map_update_elem(&map, (void *)&key, &zero, BPF_ANY);
+
+	return 0;
+}
+
+SEC("syscall")
+int BPF_PROG(start_timer)
+{
+	bpf_loop(MAX_ENTRIES, start_cb, NULL, 0);
+	return 0;
+}
+
+SEC("syscall")
+int BPF_PROG(overwrite_timer)
+{
+	bpf_loop(MAX_ENTRIES, overwrite_cb, NULL, 0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
index 544e5ac90461..d09bbd8ae8a8 100644
--- a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
+++ b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
@@ -12,7 +12,7 @@
 SEC("freplace/connect_v4_prog")
 int new_connect_v4_prog(struct bpf_sock_addr *ctx)
 {
-	// return value thats in invalid range
+	// return value that's in invalid range
 	return 255;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/htab_update.c b/tools/testing/selftests/bpf/progs/htab_update.c
index 7481bb30b29b..195d3b2fba00 100644
--- a/tools/testing/selftests/bpf/progs/htab_update.c
+++ b/tools/testing/selftests/bpf/progs/htab_update.c
@@ -6,24 +6,31 @@
 
 char _license[] SEC("license") = "GPL";
 
+/* Map value type: has BTF-managed field (bpf_timer) */
+struct val {
+	struct bpf_timer t;
+	__u64 payload;
+};
+
 struct {
 	__uint(type, BPF_MAP_TYPE_HASH);
 	__uint(max_entries, 1);
-	__uint(key_size, sizeof(__u32));
-	__uint(value_size, sizeof(__u32));
+	__type(key, __u32);
+	__type(value, struct val);
 } htab SEC(".maps");
 
 int pid = 0;
 int update_err = 0;
 
-SEC("?fentry/lookup_elem_raw")
-int lookup_elem_raw(void *ctx)
+SEC("?fentry/bpf_obj_free_fields")
+int bpf_obj_free_fields(void *ctx)
 {
-	__u32 key = 0, value = 1;
+	__u32 key = 0;
+	struct val value = { .payload = 1 };
 
 	if ((bpf_get_current_pid_tgid() >> 32) != pid)
 		return 0;
 
-	update_err = bpf_map_update_elem(&htab, &key, &value, 0);
+	update_err = bpf_map_update_elem(&htab, &key, &value, BPF_ANY);
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c
index 645b2c9f7867..0e87ad1ebcfa 100644
--- a/tools/testing/selftests/bpf/progs/ip_check_defrag.c
+++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c
@@ -12,11 +12,6 @@
 #define IP_OFFSET		0x1FFF
 #define NEXTHDR_FRAGMENT	44
 
-extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
-			      struct bpf_dynptr *ptr__uninit) __ksym;
-extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
-			      void *buffer, uint32_t buffer__sz) __ksym;
-
 volatile int shootdowns = 0;
 
 static bool is_frag_v4(struct iphdr *iph)
diff --git a/tools/testing/selftests/bpf/progs/irq.c b/tools/testing/selftests/bpf/progs/irq.c
new file mode 100644
index 000000000000..74d912b22de9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/irq.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+unsigned long global_flags;
+
+extern void bpf_local_irq_save(unsigned long *) __weak __ksym;
+extern void bpf_local_irq_restore(unsigned long *) __weak __ksym;
+extern int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void *unsafe_ptr__ign, u64 flags) __weak __ksym;
+
+struct bpf_res_spin_lock lockA __hidden SEC(".data.A");
+struct bpf_res_spin_lock lockB __hidden SEC(".data.B");
+
+SEC("?tc")
+__failure __msg("arg#0 doesn't point to an irq flag on stack")
+int irq_save_bad_arg(struct __sk_buff *ctx)
+{
+	bpf_local_irq_save(&global_flags);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("arg#0 doesn't point to an irq flag on stack")
+int irq_restore_bad_arg(struct __sk_buff *ctx)
+{
+	bpf_local_irq_restore(&global_flags);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_2(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+
+	bpf_local_irq_save(&flags1);
+	bpf_local_irq_save(&flags2);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_3(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	bpf_local_irq_save(&flags1);
+	bpf_local_irq_save(&flags2);
+	bpf_local_irq_save(&flags3);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_3_minus_2(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	bpf_local_irq_save(&flags1);
+	bpf_local_irq_save(&flags2);
+	bpf_local_irq_save(&flags3);
+	bpf_local_irq_restore(&flags3);
+	bpf_local_irq_restore(&flags2);
+	return 0;
+}
+
+static __noinline void local_irq_save(unsigned long *flags)
+{
+	bpf_local_irq_save(flags);
+}
+
+static __noinline void local_irq_restore(unsigned long *flags)
+{
+	bpf_local_irq_restore(flags);
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_1_subprog(struct __sk_buff *ctx)
+{
+	unsigned long flags;
+
+	local_irq_save(&flags);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_2_subprog(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+
+	local_irq_save(&flags1);
+	local_irq_save(&flags2);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_3_subprog(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	local_irq_save(&flags1);
+	local_irq_save(&flags2);
+	local_irq_save(&flags3);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_local_irq_save-ed region")
+int irq_restore_missing_3_minus_2_subprog(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	local_irq_save(&flags1);
+	local_irq_save(&flags2);
+	local_irq_save(&flags3);
+	local_irq_restore(&flags3);
+	local_irq_restore(&flags2);
+	return 0;
+}
+
+SEC("?tc")
+__success
+int irq_balance(struct __sk_buff *ctx)
+{
+	unsigned long flags;
+
+	local_irq_save(&flags);
+	local_irq_restore(&flags);
+	return 0;
+}
+
+SEC("?tc")
+__success
+int irq_balance_n(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	local_irq_save(&flags1);
+	local_irq_save(&flags2);
+	local_irq_save(&flags3);
+	local_irq_restore(&flags3);
+	local_irq_restore(&flags2);
+	local_irq_restore(&flags1);
+	return 0;
+}
+
+static __noinline void local_irq_balance(void)
+{
+	unsigned long flags;
+
+	local_irq_save(&flags);
+	local_irq_restore(&flags);
+}
+
+static __noinline void local_irq_balance_n(void)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	local_irq_save(&flags1);
+	local_irq_save(&flags2);
+	local_irq_save(&flags3);
+	local_irq_restore(&flags3);
+	local_irq_restore(&flags2);
+	local_irq_restore(&flags1);
+}
+
+SEC("?tc")
+__success
+int irq_balance_subprog(struct __sk_buff *ctx)
+{
+	local_irq_balance();
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("sleepable helper bpf_copy_from_user#")
+int irq_sleepable_helper(void *ctx)
+{
+	unsigned long flags;
+	u32 data;
+
+	local_irq_save(&flags);
+	bpf_copy_from_user(&data, sizeof(data), NULL);
+	local_irq_restore(&flags);
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_copy_from_user_str is sleepable within IRQ-disabled region")
+int irq_sleepable_kfunc(void *ctx)
+{
+	unsigned long flags;
+	u32 data;
+
+	local_irq_save(&flags);
+	bpf_copy_from_user_str(&data, sizeof(data), NULL, 0);
+	local_irq_restore(&flags);
+	return 0;
+}
+
+int __noinline global_local_irq_balance(void)
+{
+	local_irq_balance_n();
+	return 0;
+}
+
+SEC("?tc")
+__success
+int irq_global_subprog(struct __sk_buff *ctx)
+{
+	unsigned long flags;
+
+	bpf_local_irq_save(&flags);
+	global_local_irq_balance();
+	bpf_local_irq_restore(&flags);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_restore_ooo(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+
+	bpf_local_irq_save(&flags1);
+	bpf_local_irq_save(&flags2);
+	bpf_local_irq_restore(&flags1);
+	bpf_local_irq_restore(&flags2);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_restore_ooo_3(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	bpf_local_irq_save(&flags1);
+	bpf_local_irq_save(&flags2);
+	bpf_local_irq_restore(&flags2);
+	bpf_local_irq_save(&flags3);
+	bpf_local_irq_restore(&flags1);
+	bpf_local_irq_restore(&flags3);
+	return 0;
+}
+
+static __noinline void local_irq_save_3(unsigned long *flags1, unsigned long *flags2,
+					unsigned long *flags3)
+{
+	local_irq_save(flags1);
+	local_irq_save(flags2);
+	local_irq_save(flags3);
+}
+
+SEC("?tc")
+__success
+int irq_restore_3_subprog(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	local_irq_save_3(&flags1, &flags2, &flags3);
+	bpf_local_irq_restore(&flags3);
+	bpf_local_irq_restore(&flags2);
+	bpf_local_irq_restore(&flags1);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_restore_4_subprog(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+	unsigned long flags4;
+
+	local_irq_save_3(&flags1, &flags2, &flags3);
+	bpf_local_irq_restore(&flags3);
+	bpf_local_irq_save(&flags4);
+	bpf_local_irq_restore(&flags4);
+	bpf_local_irq_restore(&flags1);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_restore_ooo_3_subprog(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags2;
+	unsigned long flags3;
+
+	local_irq_save_3(&flags1, &flags2, &flags3);
+	bpf_local_irq_restore(&flags3);
+	bpf_local_irq_restore(&flags2);
+	bpf_local_irq_save(&flags3);
+	bpf_local_irq_restore(&flags1);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("expected an initialized")
+int irq_restore_invalid(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+	unsigned long flags = 0xfaceb00c;
+
+	bpf_local_irq_save(&flags1);
+	bpf_local_irq_restore(&flags);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("expected uninitialized")
+int irq_save_invalid(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+
+	bpf_local_irq_save(&flags1);
+	bpf_local_irq_save(&flags1);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("expected an initialized")
+int irq_restore_iter(struct __sk_buff *ctx)
+{
+	struct bpf_iter_num it;
+
+	bpf_iter_num_new(&it, 0, 42);
+	bpf_local_irq_restore((unsigned long *)&it);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference id=1")
+int irq_save_iter(struct __sk_buff *ctx)
+{
+	struct bpf_iter_num it;
+
+	/* Ensure same sized slot has st->ref_obj_id set, so we reject based on
+	 * slot_type != STACK_IRQ_FLAG...
+	 */
+	_Static_assert(sizeof(it) == sizeof(unsigned long), "broken iterator size");
+
+	bpf_iter_num_new(&it, 0, 42);
+	bpf_local_irq_save((unsigned long *)&it);
+	bpf_local_irq_restore((unsigned long *)&it);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("expected an initialized")
+int irq_flag_overwrite(struct __sk_buff *ctx)
+{
+	unsigned long flags;
+
+	bpf_local_irq_save(&flags);
+	flags = 0xdeadbeef;
+	bpf_local_irq_restore(&flags);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("expected an initialized")
+int irq_flag_overwrite_partial(struct __sk_buff *ctx)
+{
+	unsigned long flags;
+
+	bpf_local_irq_save(&flags);
+	*(((char *)&flags) + 1) = 0xff;
+	bpf_local_irq_restore(&flags);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_ooo_refs_array(struct __sk_buff *ctx)
+{
+	unsigned long flags[4];
+	struct { int i; } *p;
+
+	/* refs=1 */
+	bpf_local_irq_save(&flags[0]);
+
+	/* refs=1,2 */
+	p = bpf_obj_new(typeof(*p));
+	if (!p) {
+		bpf_local_irq_restore(&flags[0]);
+		return 0;
+	}
+
+	/* refs=1,2,3 */
+	bpf_local_irq_save(&flags[1]);
+
+	/* refs=1,2,3,4 */
+	bpf_local_irq_save(&flags[2]);
+
+	/* Now when we remove ref=2, the verifier must not break the ordering in
+	 * the refs array between 1,3,4. With an older implementation, the
+	 * verifier would swap the last element with the removed element, but to
+	 * maintain the stack property we need to use memmove.
+	 */
+	bpf_obj_drop(p);
+
+	/* Save and restore to reset active_irq_id to 3, as the ordering is now
+	 * refs=1,4,3. When restoring the linear scan will find prev_id in order
+	 * as 3 instead of 4.
+	 */
+	bpf_local_irq_save(&flags[3]);
+	bpf_local_irq_restore(&flags[3]);
+
+	/* With the incorrect implementation, we can release flags[1], flags[2],
+	 * and flags[0], i.e. in the wrong order.
+	 */
+	bpf_local_irq_restore(&flags[1]);
+	bpf_local_irq_restore(&flags[2]);
+	bpf_local_irq_restore(&flags[0]);
+	return 0;
+}
+
+int __noinline
+global_subprog(int i)
+{
+	if (i)
+		bpf_printk("%p", &i);
+	return i;
+}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+	if (i)
+		bpf_copy_from_user(&i, sizeof(i), NULL);
+	return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+	if (i)
+		bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+	global_subprog(i);
+	return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+	if (!i)
+		global_sleepable_kfunc_subprog(i);
+	return i;
+}
+
+SEC("?syscall")
+__success
+int irq_non_sleepable_global_subprog(void *ctx)
+{
+	unsigned long flags;
+
+	bpf_local_irq_save(&flags);
+	global_subprog(0);
+	bpf_local_irq_restore(&flags);
+	return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int irq_sleepable_helper_global_subprog(void *ctx)
+{
+	unsigned long flags;
+
+	bpf_local_irq_save(&flags);
+	global_sleepable_helper_subprog(0);
+	bpf_local_irq_restore(&flags);
+	return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int irq_sleepable_global_subprog_indirect(void *ctx)
+{
+	unsigned long flags;
+
+	bpf_local_irq_save(&flags);
+	global_subprog_calling_sleepable_global(0);
+	bpf_local_irq_restore(&flags);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot restore irq state out of order")
+int irq_ooo_lock_cond_inv(struct __sk_buff *ctx)
+{
+	unsigned long flags1, flags2;
+
+	if (bpf_res_spin_lock_irqsave(&lockA, &flags1))
+		return 0;
+	if (bpf_res_spin_lock_irqsave(&lockB, &flags2)) {
+		bpf_res_spin_unlock_irqrestore(&lockA, &flags1);
+		return 0;
+	}
+
+	bpf_res_spin_unlock_irqrestore(&lockB, &flags1);
+	bpf_res_spin_unlock_irqrestore(&lockA, &flags2);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed")
+int irq_wrong_kfunc_class_1(struct __sk_buff *ctx)
+{
+	unsigned long flags1;
+
+	if (bpf_res_spin_lock_irqsave(&lockA, &flags1))
+		return 0;
+	/* For now, bpf_local_irq_restore is not allowed in critical section,
+	 * but this test ensures error will be caught with kfunc_class when it's
+	 * opened up. Tested by temporarily permitting this kfunc in critical
+	 * section.
+	 */
+	bpf_local_irq_restore(&flags1);
+	bpf_res_spin_unlock_irqrestore(&lockA, &flags1);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed")
+int irq_wrong_kfunc_class_2(struct __sk_buff *ctx)
+{
+	unsigned long flags1, flags2;
+
+	bpf_local_irq_save(&flags1);
+	if (bpf_res_spin_lock_irqsave(&lockA, &flags2))
+		return 0;
+	bpf_local_irq_restore(&flags2);
+	bpf_res_spin_unlock_irqrestore(&lockA, &flags1);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index ef70b88bccb2..7dd92a303bf6 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -524,11 +524,11 @@ int iter_subprog_iters(const void *ctx)
 }
 
 struct {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(type, BPF_MAP_TYPE_HASH);
 	__type(key, int);
 	__type(value, int);
 	__uint(max_entries, 1000);
-} arr_map SEC(".maps");
+} hash_map SEC(".maps");
 
 SEC("?raw_tp")
 __failure __msg("invalid mem access 'scalar'")
@@ -539,7 +539,7 @@ int iter_err_too_permissive1(const void *ctx)
 
 	MY_PID_GUARD();
 
-	map_val = bpf_map_lookup_elem(&arr_map, &key);
+	map_val = bpf_map_lookup_elem(&hash_map, &key);
 	if (!map_val)
 		return 0;
 
@@ -561,12 +561,12 @@ int iter_err_too_permissive2(const void *ctx)
 
 	MY_PID_GUARD();
 
-	map_val = bpf_map_lookup_elem(&arr_map, &key);
+	map_val = bpf_map_lookup_elem(&hash_map, &key);
 	if (!map_val)
 		return 0;
 
 	bpf_repeat(1000000) {
-		map_val = bpf_map_lookup_elem(&arr_map, &key);
+		map_val = bpf_map_lookup_elem(&hash_map, &key);
 	}
 
 	*map_val = 123;
@@ -585,7 +585,7 @@ int iter_err_too_permissive3(const void *ctx)
 	MY_PID_GUARD();
 
 	bpf_repeat(1000000) {
-		map_val = bpf_map_lookup_elem(&arr_map, &key);
+		map_val = bpf_map_lookup_elem(&hash_map, &key);
 		found = true;
 	}
 
@@ -606,7 +606,7 @@ int iter_tricky_but_fine(const void *ctx)
 	MY_PID_GUARD();
 
 	bpf_repeat(1000000) {
-		map_val = bpf_map_lookup_elem(&arr_map, &key);
+		map_val = bpf_map_lookup_elem(&hash_map, &key);
 		if (map_val) {
 			found = true;
 			break;
@@ -1175,6 +1175,122 @@ __naked int loop_state_deps2(void)
 }
 
 SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked int loop_state_deps3(void)
+{
+	/* This is equivalent to a C program below.
+	 *
+	 *   if (random() != 24) {       // assume false branch is placed first
+	 *     i = iter_new();           // fp[-8]
+	 *     while (iter_next(i));
+	 *     iter_destroy(i);
+	 *     return;
+	 *   }
+	 *
+	 *   for (i = 10; i > 0; i--);   // increase dfs_depth for child states
+	 *
+	 *   i = iter_new();             // fp[-8]
+	 *   b = -24;                    // r8
+	 *   for (;;) {                  // checkpoint (L)
+	 *     if (iter_next(i))         // checkpoint (N)
+	 *       break;
+	 *     if (random() == 77) {     // assume false branch is placed first
+	 *       *(u64 *)(r10 + b) = 7;  // this is not safe when b == -25
+	 *       iter_destroy(i);
+	 *       return;
+	 *     }
+	 *     if (random() == 42) {     // assume false branch is placed first
+	 *       b = -25;
+	 *     }
+	 *   }
+	 *   iter_destroy(i);
+	 *
+	 * In case of a buggy verifier first loop might poison
+	 * env->cur_state->loop_entry with a state having 0 branches
+	 * and small dfs_depth. This would trigger NOT_EXACT states
+	 * comparison for some states within second loop.
+	 * Specifically, checkpoint (L) might be problematic if:
+	 * - branch with '*(u64 *)(r10 + b) = 7' is not explored yet;
+	 * - checkpoint (L) is first reached in state {b=-24};
+	 * - traversal is pruned at checkpoint (N) setting checkpoint's (L)
+	 *   branch count to 0, thus making it eligible for use in pruning;
+	 * - checkpoint (L) is next reached in state {b=-25},
+	 *   this would cause NOT_EXACT comparison with a state {b=-24}
+	 *   while 'b' is not marked precise yet.
+	 */
+	asm volatile (
+		"call %[bpf_get_prandom_u32];"
+		"if r0 == 24 goto 2f;"
+		"r1 = r10;"
+		"r1 += -8;"
+		"r2 = 0;"
+		"r3 = 5;"
+		"call %[bpf_iter_num_new];"
+	"1:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_next];"
+		"if r0 != 0 goto 1b;"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_destroy];"
+		"r0 = 0;"
+		"exit;"
+	"2:"
+		/* loop to increase dfs_depth */
+		"r0 = 10;"
+	"3:"
+		"r0 -= 1;"
+		"if r0 != 0 goto 3b;"
+		/* end of loop */
+		"r1 = r10;"
+		"r1 += -8;"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+		"r8 = -24;"
+	"main_loop_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_next];"
+		"if r0 == 0 goto main_loop_end_%=;"
+		/* first if */
+		"call %[bpf_get_prandom_u32];"
+		"if r0 == 77 goto unsafe_write_%=;"
+		/* second if */
+		"call %[bpf_get_prandom_u32];"
+		"if r0 == 42 goto poison_r8_%=;"
+		/* iterate */
+		"goto main_loop_%=;"
+	"main_loop_end_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_destroy];"
+		"r0 = 0;"
+		"exit;"
+
+	"unsafe_write_%=:"
+		"r0 = r10;"
+		"r0 += r8;"
+		"r1 = 7;"
+		"*(u64 *)(r0 + 0) = r1;"
+		"goto main_loop_end_%=;"
+
+	"poison_r8_%=:"
+		"r8 = -25;"
+		"goto main_loop_%=;"
+		:
+		: __imm(bpf_get_prandom_u32),
+		  __imm(bpf_iter_num_new),
+		  __imm(bpf_iter_num_next),
+		  __imm(bpf_iter_num_destroy)
+		: __clobber_all
+	);
+}
+
+SEC("?raw_tp")
 __success
 __naked int triple_continue(void)
 {
@@ -1486,4 +1602,328 @@ int iter_subprog_check_stacksafe(const void *ctx)
 	return 0;
 }
 
+struct bpf_iter_num global_it;
+
+SEC("raw_tp")
+__failure __msg("arg#0 expected pointer to an iterator on stack")
+int iter_new_bad_arg(const void *ctx)
+{
+	bpf_iter_num_new(&global_it, 0, 1);
+	return 0;
+}
+
+SEC("raw_tp")
+__failure __msg("arg#0 expected pointer to an iterator on stack")
+int iter_next_bad_arg(const void *ctx)
+{
+	bpf_iter_num_next(&global_it);
+	return 0;
+}
+
+SEC("raw_tp")
+__failure __msg("arg#0 expected pointer to an iterator on stack")
+int iter_destroy_bad_arg(const void *ctx)
+{
+	bpf_iter_num_destroy(&global_it);
+	return 0;
+}
+
+SEC("raw_tp")
+__success
+int clean_live_states(const void *ctx)
+{
+	char buf[1];
+	int i, j, k, l, m, n, o;
+
+	bpf_for(i, 0, 10)
+	bpf_for(j, 0, 10)
+	bpf_for(k, 0, 10)
+	bpf_for(l, 0, 10)
+	bpf_for(m, 0, 10)
+	bpf_for(n, 0, 10)
+	bpf_for(o, 0, 10) {
+		if (unlikely(bpf_get_prandom_u32()))
+			buf[0] = 42;
+		bpf_printk("%s", buf);
+	}
+	return 0;
+}
+
+SEC("?raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__naked int absent_mark_in_the_middle_state(void)
+{
+	/* This is equivalent to C program below.
+	 *
+	 * r8 = bpf_get_prandom_u32();
+	 * r6 = -32;
+	 * bpf_iter_num_new(&fp[-8], 0, 10);
+	 * if (unlikely(bpf_get_prandom_u32()))
+	 *   r6 = -31;
+	 * while (bpf_iter_num_next(&fp[-8])) {
+	 *   if (unlikely(bpf_get_prandom_u32()))
+	 *     *(fp + r6) = 7;
+	 * }
+	 * bpf_iter_num_destroy(&fp[-8])
+	 * return 0
+	 */
+	asm volatile (
+		"call %[bpf_get_prandom_u32];"
+		"r8 = r0;"
+		"r7 = 0;"
+		"r6 = -32;"
+		"r0 = 0;"
+		"*(u64 *)(r10 - 16) = r0;"
+		"r1 = r10;"
+		"r1 += -8;"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+		"call %[bpf_get_prandom_u32];"
+		"if r0 == r8 goto change_r6_%=;"
+	"loop_%=:"
+		"call noop;"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_next];"
+		"if r0 == 0 goto loop_end_%=;"
+		"call %[bpf_get_prandom_u32];"
+		"if r0 == r8 goto use_r6_%=;"
+		"goto loop_%=;"
+	"loop_end_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_destroy];"
+		"r0 = 0;"
+		"exit;"
+	"use_r6_%=:"
+		"r0 = r10;"
+		"r0 += r6;"
+		"r1 = 7;"
+		"*(u64 *)(r0 + 0) = r1;"
+		"goto loop_%=;"
+	"change_r6_%=:"
+		"r6 = -31;"
+		"goto loop_%=;"
+		:
+		: __imm(bpf_iter_num_new),
+		  __imm(bpf_iter_num_next),
+		  __imm(bpf_iter_num_destroy),
+		  __imm(bpf_get_prandom_u32)
+		: __clobber_all
+	);
+}
+
+__used __naked
+static int noop(void)
+{
+	asm volatile (
+		"r0 = 0;"
+		"exit;"
+	);
+}
+
+SEC("?raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__naked int absent_mark_in_the_middle_state2(void)
+{
+	/* This is equivalent to C program below.
+	 *
+	 *     r8 = bpf_get_prandom_u32();
+	 *     r6 = -32;
+	 *     bpf_iter_num_new(&fp[-8], 0, 10);
+	 *     if (unlikely(bpf_get_prandom_u32())) {
+	 *       r6 = -31;
+	 * jump_into_loop:
+	 *       goto +0;
+	 *       goto loop;
+	 *     }
+	 *     if (unlikely(bpf_get_prandom_u32()))
+	 *       goto jump_into_loop;
+	 * loop:
+	 *     while (bpf_iter_num_next(&fp[-8])) {
+	 *       if (unlikely(bpf_get_prandom_u32()))
+	 *         *(fp + r6) = 7;
+	 *     }
+	 *     bpf_iter_num_destroy(&fp[-8])
+	 *     return 0
+	 */
+	asm volatile (
+		"call %[bpf_get_prandom_u32];"
+		"r8 = r0;"
+		"r7 = 0;"
+		"r6 = -32;"
+		"r0 = 0;"
+		"*(u64 *)(r10 - 16) = r0;"
+		"r1 = r10;"
+		"r1 += -8;"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+		"call %[bpf_get_prandom_u32];"
+		"if r0 == r8 goto change_r6_%=;"
+		"call %[bpf_get_prandom_u32];"
+		"if r0 == r8 goto jump_into_loop_%=;"
+	"loop_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_next];"
+		"if r0 == 0 goto loop_end_%=;"
+		"call %[bpf_get_prandom_u32];"
+		"if r0 == r8 goto use_r6_%=;"
+		"goto loop_%=;"
+	"loop_end_%=:"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_destroy];"
+		"r0 = 0;"
+		"exit;"
+	"use_r6_%=:"
+		"r0 = r10;"
+		"r0 += r6;"
+		"r1 = 7;"
+		"*(u64 *)(r0 + 0) = r1;"
+		"goto loop_%=;"
+	"change_r6_%=:"
+		"r6 = -31;"
+	"jump_into_loop_%=: "
+		"goto +0;"
+		"goto loop_%=;"
+		:
+		: __imm(bpf_iter_num_new),
+		  __imm(bpf_iter_num_next),
+		  __imm(bpf_iter_num_destroy),
+		  __imm(bpf_get_prandom_u32)
+		: __clobber_all
+	);
+}
+
+SEC("?raw_tp")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("misaligned stack access off 0+-31+0 size 8")
+__naked int absent_mark_in_the_middle_state3(void)
+{
+	/*
+	 * bpf_iter_num_new(&fp[-8], 0, 10)
+	 * loop1(-32, &fp[-8])
+	 * loop1_wrapper(&fp[-8])
+	 * bpf_iter_num_destroy(&fp[-8])
+	 */
+	asm volatile (
+		"r1 = r10;"
+		"r1 += -8;"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+		/* call #1 */
+		"r1 = -32;"
+		"r2 = r10;"
+		"r2 += -8;"
+		"call loop1;"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_destroy];"
+		/* call #2 */
+		"r1 = r10;"
+		"r1 += -8;"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+		"r1 = r10;"
+		"r1 += -8;"
+		"call loop1_wrapper;"
+		/* return */
+		"r1 = r10;"
+		"r1 += -8;"
+		"call %[bpf_iter_num_destroy];"
+		"r0 = 0;"
+		"exit;"
+		:
+		: __imm(bpf_iter_num_new),
+		  __imm(bpf_iter_num_destroy),
+		  __imm(bpf_get_prandom_u32)
+		: __clobber_all
+	);
+}
+
+__used __naked
+static int loop1(void)
+{
+	/*
+	 *  int loop1(num, iter) {
+	 *     r6 = num;
+	 *     r7 = iter;
+	 *     while (bpf_iter_num_next(r7)) {
+	 *       if (unlikely(bpf_get_prandom_u32()))
+	 *         *(fp + r6) = 7;
+	 *     }
+	 *     return 0
+	 *  }
+	 */
+	asm volatile (
+		"r6 = r1;"
+		"r7 = r2;"
+		"call %[bpf_get_prandom_u32];"
+		"r8 = r0;"
+	"loop_%=:"
+		"r1 = r7;"
+		"call %[bpf_iter_num_next];"
+		"if r0 == 0 goto loop_end_%=;"
+		"call %[bpf_get_prandom_u32];"
+		"if r0 == r8 goto use_r6_%=;"
+		"goto loop_%=;"
+	"loop_end_%=:"
+		"r0 = 0;"
+		"exit;"
+	"use_r6_%=:"
+		"r0 = r10;"
+		"r0 += r6;"
+		"r1 = 7;"
+		"*(u64 *)(r0 + 0) = r1;"
+		"goto loop_%=;"
+		:
+		: __imm(bpf_iter_num_next),
+		  __imm(bpf_get_prandom_u32)
+		: __clobber_all
+	);
+}
+
+__used __naked
+static int loop1_wrapper(void)
+{
+	/*
+	 *  int loop1_wrapper(iter) {
+	 *    r6 = -32;
+	 *    r7 = iter;
+	 *    if (unlikely(bpf_get_prandom_u32()))
+	 *      r6 = -31;
+	 *    loop1(r6, r7);
+	 *    return 0;
+	 *  }
+	 */
+	asm volatile (
+		"r6 = -32;"
+		"r7 = r1;"
+		"call %[bpf_get_prandom_u32];"
+		"r8 = r0;"
+		"call %[bpf_get_prandom_u32];"
+		"if r0 == r8 goto change_r6_%=;"
+	"loop_%=:"
+		"r1 = r6;"
+		"r2 = r7;"
+		"call loop1;"
+		"r0 = 0;"
+		"exit;"
+	"change_r6_%=:"
+		"r6 = -31;"
+		"goto loop_%=;"
+		:
+		: __imm(bpf_iter_num_next),
+		  __imm(bpf_get_prandom_u32)
+		: __clobber_all
+	);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters_looping.c b/tools/testing/selftests/bpf/progs/iters_looping.c
index 05fa5ce7fc59..d00fd570255a 100644
--- a/tools/testing/selftests/bpf/progs/iters_looping.c
+++ b/tools/testing/selftests/bpf/progs/iters_looping.c
@@ -161,3 +161,56 @@ int simplest_loop(void *ctx)
 
 	return 0;
 }
+
+__used
+static void iterator_with_diff_stack_depth(int x)
+{
+	struct bpf_iter_num iter;
+
+	asm volatile (
+		"if r1 == 42 goto 0f;"
+		"*(u64 *)(r10 - 128) = 0;"
+	"0:"
+		/* create iterator */
+		"r1 = %[iter];"
+		"r2 = 0;"
+		"r3 = 10;"
+		"call %[bpf_iter_num_new];"
+	"1:"
+		/* consume next item */
+		"r1 = %[iter];"
+		"call %[bpf_iter_num_next];"
+		"if r0 == 0 goto 2f;"
+		"goto 1b;"
+	"2:"
+		/* destroy iterator */
+		"r1 = %[iter];"
+		"call %[bpf_iter_num_destroy];"
+		:
+		: __imm_ptr(iter), ITER_HELPERS
+		: __clobber_common, "r6"
+	);
+}
+
+SEC("socket")
+__success
+__naked int widening_stack_size_bug(void *ctx)
+{
+	/*
+	 * Depending on iterator_with_diff_stack_depth() parameter value,
+	 * subprogram stack depth is either 8 or 128 bytes. Arrange values so
+	 * that it is 128 on a first call and 8 on a second. This triggered a
+	 * bug in verifier's widen_imprecise_scalars() logic.
+	 */
+	asm volatile (
+		"r6 = 0;"
+		"r1 = 0;"
+	"1:"
+		"call iterator_with_diff_stack_depth;"
+		"r1 = 42;"
+		"r6 += 1;"
+		"if r6 < 2 goto 1b;"
+		"r0 = 0;"
+		"exit;"
+		::: __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c
index d47e59aba6de..d273b46dfc7c 100644
--- a/tools/testing/selftests/bpf/progs/iters_state_safety.c
+++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c
@@ -30,7 +30,7 @@ int force_clang_to_emit_btf_for_externs(void *ctx)
 
 SEC("?raw_tp")
 __success __log_level(2)
-__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)")
+__msg("fp-8=iter_num(ref_id=1,state=active,depth=0)")
 int create_and_destroy(void *ctx)
 {
 	struct bpf_iter_num iter;
@@ -73,7 +73,7 @@ int create_and_forget_to_destroy_fail(void *ctx)
 }
 
 SEC("?raw_tp")
-__failure __msg("expected an initialized iter_num as arg #1")
+__failure __msg("expected an initialized iter_num as arg #0")
 int destroy_without_creating_fail(void *ctx)
 {
 	/* init with zeros to stop verifier complaining about uninit stack */
@@ -91,7 +91,7 @@ int destroy_without_creating_fail(void *ctx)
 }
 
 SEC("?raw_tp")
-__failure __msg("expected an initialized iter_num as arg #1")
+__failure __msg("expected an initialized iter_num as arg #0")
 int compromise_iter_w_direct_write_fail(void *ctx)
 {
 	struct bpf_iter_num iter;
@@ -143,7 +143,7 @@ int compromise_iter_w_direct_write_and_skip_destroy_fail(void *ctx)
 }
 
 SEC("?raw_tp")
-__failure __msg("expected an initialized iter_num as arg #1")
+__failure __msg("expected an initialized iter_num as arg #0")
 int compromise_iter_w_helper_write_fail(void *ctx)
 {
 	struct bpf_iter_num iter;
@@ -196,7 +196,7 @@ int leak_iter_from_subprog_fail(void *ctx)
 
 SEC("?raw_tp")
 __success __log_level(2)
-__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)")
+__msg("fp-8=iter_num(ref_id=1,state=active,depth=0)")
 int valid_stack_reuse(void *ctx)
 {
 	struct bpf_iter_num iter;
@@ -230,7 +230,7 @@ int valid_stack_reuse(void *ctx)
 }
 
 SEC("?raw_tp")
-__failure __msg("expected uninitialized iter_num as arg #1")
+__failure __msg("expected uninitialized iter_num as arg #0")
 int double_create_fail(void *ctx)
 {
 	struct bpf_iter_num iter;
@@ -258,7 +258,7 @@ int double_create_fail(void *ctx)
 }
 
 SEC("?raw_tp")
-__failure __msg("expected an initialized iter_num as arg #1")
+__failure __msg("expected an initialized iter_num as arg #0")
 int double_destroy_fail(void *ctx)
 {
 	struct bpf_iter_num iter;
@@ -284,7 +284,7 @@ int double_destroy_fail(void *ctx)
 }
 
 SEC("?raw_tp")
-__failure __msg("expected an initialized iter_num as arg #1")
+__failure __msg("expected an initialized iter_num as arg #0")
 int next_without_new_fail(void *ctx)
 {
 	struct bpf_iter_num iter;
@@ -305,7 +305,7 @@ int next_without_new_fail(void *ctx)
 }
 
 SEC("?raw_tp")
-__failure __msg("expected an initialized iter_num as arg #1")
+__failure __msg("expected an initialized iter_num as arg #0")
 int next_after_destroy_fail(void *ctx)
 {
 	struct bpf_iter_num iter;
@@ -345,7 +345,7 @@ int __naked read_from_iter_slot_fail(void)
 		"r3 = 1000;"
 		"call %[bpf_iter_num_new];"
 
-		/* attemp to leak bpf_iter_num state */
+		/* attempt to leak bpf_iter_num state */
 		"r7 = *(u64 *)(r6 + 0);"
 		"r8 = *(u64 *)(r6 + 8);"
 
diff --git a/tools/testing/selftests/bpf/progs/iters_task_failure.c b/tools/testing/selftests/bpf/progs/iters_task_failure.c
index 6b1588d70652..fe3663dedbe1 100644
--- a/tools/testing/selftests/bpf/progs/iters_task_failure.c
+++ b/tools/testing/selftests/bpf/progs/iters_task_failure.c
@@ -15,7 +15,7 @@ void bpf_rcu_read_lock(void) __ksym;
 void bpf_rcu_read_unlock(void) __ksym;
 
 SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
-__failure __msg("expected an RCU CS when using bpf_iter_task_next")
+__failure __msg("kernel func bpf_iter_task_new requires RCU critical section protection")
 int BPF_PROG(iter_tasks_without_lock)
 {
 	struct task_struct *pos;
@@ -27,7 +27,7 @@ int BPF_PROG(iter_tasks_without_lock)
 }
 
 SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
-__failure __msg("expected an RCU CS when using bpf_iter_css_next")
+__failure __msg("kernel func bpf_iter_css_new requires RCU critical section protection")
 int BPF_PROG(iter_css_without_lock)
 {
 	u64 cg_id = bpf_get_current_cgroup_id();
diff --git a/tools/testing/selftests/bpf/progs/iters_testmod.c b/tools/testing/selftests/bpf/progs/iters_testmod.c
index df1d3db60b1b..5379e9960ffd 100644
--- a/tools/testing/selftests/bpf/progs/iters_testmod.c
+++ b/tools/testing/selftests/bpf/progs/iters_testmod.c
@@ -4,7 +4,7 @@
 #include "bpf_experimental.h"
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -123,3 +123,49 @@ out:
 	bpf_iter_num_destroy(&num_it);
 	return 0;
 }
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_kfunc_ret_rcu_test requires RCU critical section protection")
+int iter_ret_rcu_test_protected(const void *ctx)
+{
+	struct task_struct *p;
+
+	p = bpf_kfunc_ret_rcu_test();
+	return p->pid;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("R1 type=rcu_ptr_or_null_ expected=")
+int iter_ret_rcu_test_type(const void *ctx)
+{
+	struct task_struct *p;
+
+	bpf_rcu_read_lock();
+	p = bpf_kfunc_ret_rcu_test();
+	bpf_this_cpu_ptr(p);
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_kfunc_ret_rcu_test_nostruct requires RCU critical section protection")
+int iter_ret_rcu_test_protected_nostruct(const void *ctx)
+{
+	void *p;
+
+	p = bpf_kfunc_ret_rcu_test_nostruct(4);
+	return *(int *)p;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("R1 type=rdonly_rcu_mem_or_null expected=")
+int iter_ret_rcu_test_type_nostruct(const void *ctx)
+{
+	void *p;
+
+	bpf_rcu_read_lock();
+	p = bpf_kfunc_ret_rcu_test_nostruct(4);
+	bpf_this_cpu_ptr(p);
+	bpf_rcu_read_unlock();
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
index 4a176e6aede8..83791348bed5 100644
--- a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
+++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
@@ -20,7 +20,7 @@ __s64 res_empty;
 
 SEC("raw_tp/sys_enter")
 __success __log_level(2)
-__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)")
 __msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
 __msg("call bpf_iter_testmod_seq_destroy")
 int testmod_seq_empty(const void *ctx)
@@ -38,7 +38,7 @@ __s64 res_full;
 
 SEC("raw_tp/sys_enter")
 __success __log_level(2)
-__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)")
 __msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
 __msg("call bpf_iter_testmod_seq_destroy")
 int testmod_seq_full(const void *ctx)
@@ -58,7 +58,7 @@ static volatile int zero = 0;
 
 SEC("raw_tp/sys_enter")
 __success __log_level(2)
-__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=active,depth=0)")
 __msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
 __msg("call bpf_iter_testmod_seq_destroy")
 int testmod_seq_truncated(const void *ctx)
@@ -79,7 +79,7 @@ int testmod_seq_truncated(const void *ctx)
 
 SEC("?raw_tp")
 __failure
-__msg("expected an initialized iter_testmod_seq as arg #2")
+__msg("expected an initialized iter_testmod_seq as arg #1")
 int testmod_seq_getter_before_bad(const void *ctx)
 {
 	struct bpf_iter_testmod_seq it;
@@ -89,7 +89,7 @@ int testmod_seq_getter_before_bad(const void *ctx)
 
 SEC("?raw_tp")
 __failure
-__msg("expected an initialized iter_testmod_seq as arg #2")
+__msg("expected an initialized iter_testmod_seq as arg #1")
 int testmod_seq_getter_after_bad(const void *ctx)
 {
 	struct bpf_iter_testmod_seq it;
diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
index f9789e668297..82190d79de37 100644
--- a/tools/testing/selftests/bpf/progs/jit_probe_mem.c
+++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
@@ -3,7 +3,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 static struct prog_test_ref_kfunc __kptr *v;
 long total_sum = -1;
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
index 7632d9ecb253..b9670e9a6e3d 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 SEC("tc")
 int kfunc_destructive_test(void)
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
index 08fae306539c..a1963497f0bf 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2021 Facebook */
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 struct syscall_test_args {
 	__u8 data[16];
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
index d532af07decf..48f64827cd93 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_race.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 SEC("tc")
 int kfunc_call_fail(struct __sk_buff *ctx)
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
index f502f755f567..8b86113a0126 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2021 Facebook */
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 SEC("tc")
 int kfunc_call_test4(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
index 2380c75e74ce..8e150e85b50d 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2021 Facebook */
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 extern const int bpf_prog_active __ksym;
 int active_res = -1;
diff --git a/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c b/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c
new file mode 100644
index 000000000000..f77aef0474d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_write_ctx.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#if defined(__TARGET_ARCH_x86)
+SEC("kprobe")
+int kprobe_write_ctx(struct pt_regs *ctx)
+{
+	ctx->ax = 0;
+	return 0;
+}
+
+SEC("kprobe.multi")
+int kprobe_multi_write_ctx(struct pt_regs *ctx)
+{
+	ctx->ax = 0;
+	return 0;
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c
index 6438982b928b..ddd26d1a083f 100644
--- a/tools/testing/selftests/bpf/progs/linked_list_fail.c
+++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c
@@ -226,8 +226,7 @@ int obj_new_no_composite(void *ctx)
 SEC("?tc")
 int obj_new_no_struct(void *ctx)
 {
-
-	bpf_obj_new(union { int data; unsigned udata; });
+	(void)bpf_obj_new(union { int data; unsigned udata; });
 	return 0;
 }
 
@@ -252,7 +251,7 @@ int new_null_ret(void *ctx)
 SEC("?tc")
 int obj_new_acq(void *ctx)
 {
-	bpf_obj_new(struct foo);
+	(void)bpf_obj_new(struct foo);
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/linked_list_peek.c b/tools/testing/selftests/bpf/progs/linked_list_peek.c
new file mode 100644
index 000000000000..264e81bfb287
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list_peek.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+	struct bpf_list_node l;
+	int key;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_list_head ghead __contains(node_data, l);
+
+#define list_entry(ptr, type, member) container_of(ptr, type, member)
+#define NR_NODES 16
+
+int zero = 0;
+
+SEC("syscall")
+__retval(0)
+long list_peek(void *ctx)
+{
+	struct bpf_list_node *l_n;
+	struct node_data *n;
+	int i, err = 0;
+
+	bpf_spin_lock(&glock);
+	l_n = bpf_list_front(&ghead);
+	bpf_spin_unlock(&glock);
+	if (l_n)
+		return __LINE__;
+
+	bpf_spin_lock(&glock);
+	l_n = bpf_list_back(&ghead);
+	bpf_spin_unlock(&glock);
+	if (l_n)
+		return __LINE__;
+
+	for (i = zero; i < NR_NODES && can_loop; i++) {
+		n = bpf_obj_new(typeof(*n));
+		if (!n)
+			return __LINE__;
+		n->key = i;
+		bpf_spin_lock(&glock);
+		bpf_list_push_back(&ghead, &n->l);
+		bpf_spin_unlock(&glock);
+	}
+
+	bpf_spin_lock(&glock);
+
+	l_n = bpf_list_front(&ghead);
+	if (!l_n) {
+		err = __LINE__;
+		goto done;
+	}
+
+	n = list_entry(l_n, struct node_data, l);
+	if (n->key != 0) {
+		err = __LINE__;
+		goto done;
+	}
+
+	l_n = bpf_list_back(&ghead);
+	if (!l_n) {
+		err = __LINE__;
+		goto done;
+	}
+
+	n = list_entry(l_n, struct node_data, l);
+	if (n->key != NR_NODES - 1) {
+		err = __LINE__;
+		goto done;
+	}
+
+done:
+	bpf_spin_unlock(&glock);
+	return err;
+}
+
+#define TEST_FB(op, dolock)					\
+SEC("syscall")							\
+__failure __msg(MSG)						\
+long test_##op##_spinlock_##dolock(void *ctx)			\
+{								\
+	struct bpf_list_node *l_n;				\
+	__u64 jiffies = 0;					\
+								\
+	if (dolock)						\
+		bpf_spin_lock(&glock);				\
+	l_n = bpf_list_##op(&ghead);				\
+	if (l_n)						\
+		jiffies = bpf_jiffies64();			\
+	if (dolock)						\
+		bpf_spin_unlock(&glock);			\
+								\
+	return !!jiffies;					\
+}
+
+#define MSG "call bpf_list_{{(front|back).+}}; R0{{(_w)?}}=ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_FB(front, true)
+TEST_FB(back, true)
+#undef MSG
+
+#define MSG "bpf_spin_lock at off=0 must be held for bpf_list_head"
+TEST_FB(front, false)
+TEST_FB(back, false)
+#undef MSG
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/livepatch_trampoline.c b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c
new file mode 100644
index 000000000000..15579d5bcd91
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int fentry_hit;
+int fexit_hit;
+int my_pid;
+
+SEC("fentry/cmdline_proc_show")
+int BPF_PROG(fentry_cmdline)
+{
+	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	fentry_hit = 1;
+	return 0;
+}
+
+SEC("fexit/cmdline_proc_show")
+int BPF_PROG(fexit_cmdline)
+{
+	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	fexit_hit = 1;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
index b092a72b2c9d..d736506a4c80 100644
--- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c
+++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
@@ -6,7 +6,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_core_read.h>
 #include "../bpf_experimental.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 struct plain_local;
 
diff --git a/tools/testing/selftests/bpf/progs/loop1.c b/tools/testing/selftests/bpf/progs/loop1.c
index 50e66772c046..b0fa26fb4760 100644
--- a/tools/testing/selftests/bpf/progs/loop1.c
+++ b/tools/testing/selftests/bpf/progs/loop1.c
@@ -1,11 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Facebook
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
diff --git a/tools/testing/selftests/bpf/progs/loop2.c b/tools/testing/selftests/bpf/progs/loop2.c
index 947bb7e988c2..0227409d4b0e 100644
--- a/tools/testing/selftests/bpf/progs/loop2.c
+++ b/tools/testing/selftests/bpf/progs/loop2.c
@@ -1,11 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Facebook
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c
index 717dab14322b..5d1c9a775e6b 100644
--- a/tools/testing/selftests/bpf/progs/loop3.c
+++ b/tools/testing/selftests/bpf/progs/loop3.c
@@ -1,11 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2019 Facebook
-#include <linux/sched.h>
-#include <linux/ptrace.h>
-#include <stdint.h>
-#include <stddef.h>
-#include <stdbool.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
index e4ff97fbcce1..dd36aff4fba3 100644
--- a/tools/testing/selftests/bpf/progs/loop6.c
+++ b/tools/testing/selftests/bpf/progs/loop6.c
@@ -1,8 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 
-#include <linux/ptrace.h>
-#include <stddef.h>
-#include <linux/bpf.h>
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_misc.h"
@@ -26,12 +25,6 @@ char _license[] SEC("license") = "GPL";
 #define SG_CHAIN	0x01UL
 #define SG_END		0x02UL
 
-struct scatterlist {
-	unsigned long   page_link;
-	unsigned int    offset;
-	unsigned int    length;
-};
-
 #define sg_is_chain(sg)		((sg)->page_link & SG_CHAIN)
 #define sg_is_last(sg)		((sg)->page_link & SG_END)
 #define sg_chain_ptr(sg)	\
@@ -62,7 +55,7 @@ static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
 	return sgp;
 }
 
-int config = 0;
+int run_once = 0;
 int result = 0;
 
 SEC("kprobe/virtqueue_add_sgs")
@@ -73,14 +66,14 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
 	__u64 length1 = 0, length2 = 0;
 	unsigned int i, n, len;
 
-	if (config != 0)
+	if (run_once != 0)
 		return 0;
 
 	for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) {
 		__sink(out_sgs);
 		for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
 		     sgp = __sg_next(sgp)) {
-			bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+			len = BPF_CORE_READ(sgp, length);
 			length1 += len;
 			n++;
 		}
@@ -90,13 +83,13 @@ int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
 		__sink(in_sgs);
 		for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
 		     sgp = __sg_next(sgp)) {
-			bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+			len = BPF_CORE_READ(sgp, length);
 			length2 += len;
 			n++;
 		}
 	}
 
-	config = 1;
+	run_once = 1;
 	result = length2 - length1;
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie.h b/tools/testing/selftests/bpf/progs/lpm_trie.h
new file mode 100644
index 000000000000..76aa5821807f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __PROGS_LPM_TRIE_H
+#define __PROGS_LPM_TRIE_H
+
+struct trie_key {
+	__u32 prefixlen;
+	__u32 data;
+};
+
+/* Benchmark operations */
+enum {
+	LPM_OP_NOOP = 0,
+	LPM_OP_BASELINE,
+	LPM_OP_LOOKUP,
+	LPM_OP_INSERT,
+	LPM_OP_UPDATE,
+	LPM_OP_DELETE,
+	LPM_OP_FREE
+};
+
+/*
+ * Return values from run_bench.
+ *
+ * Negative values are also allowed and represent kernel error codes.
+ */
+#define LPM_BENCH_SUCCESS	0
+#define LPM_BENCH_REINIT_MAP 	1	/* Reset trie to initial state for current op */
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_bench.c b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c
new file mode 100644
index 000000000000..a0e6ebd5507a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie_bench.c
@@ -0,0 +1,230 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Cloudflare */
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "bpf_atomic.h"
+#include "progs/lpm_trie.h"
+
+#define BPF_OBJ_NAME_LEN 16U
+#define MAX_ENTRIES 100000000
+#define NR_LOOPS 10000
+
+char _license[] SEC("license") = "GPL";
+
+/* Filled by userspace. See fill_map() in bench_lpm_trie_map.c */
+struct {
+	__uint(type, BPF_MAP_TYPE_LPM_TRIE);
+	__type(key, struct trie_key);
+	__type(value, __u32);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__uint(max_entries, MAX_ENTRIES);
+} trie_map SEC(".maps");
+
+long hits;
+long duration_ns;
+
+/* Configured from userspace */
+__u32 nr_entries;
+__u32 prefixlen;
+bool random;
+__u8 op;
+
+static __u64 latency_free_start;
+
+SEC("fentry/bpf_map_free_deferred")
+int BPF_PROG(trie_free_entry, struct work_struct *work)
+{
+	struct bpf_map *map = container_of(work, struct bpf_map, work);
+	char name[BPF_OBJ_NAME_LEN];
+	u32 map_type;
+
+	map_type = BPF_CORE_READ(map, map_type);
+	if (map_type != BPF_MAP_TYPE_LPM_TRIE)
+		return 0;
+
+	/*
+	 * Ideally we'd have access to the map ID but that's already
+	 * freed before we enter trie_free().
+	 */
+	BPF_CORE_READ_STR_INTO(&name, map, name);
+	if (bpf_strncmp(name, BPF_OBJ_NAME_LEN, "trie_free_map"))
+		return 0;
+
+	latency_free_start = bpf_ktime_get_ns();
+
+	return 0;
+}
+
+SEC("fexit/bpf_map_free_deferred")
+int BPF_PROG(trie_free_exit, struct work_struct *work)
+{
+	__u64 val;
+
+	if (!latency_free_start)
+		return 0;
+
+	val = bpf_ktime_get_ns() - latency_free_start;
+	latency_free_start = 0;
+
+	__sync_add_and_fetch(&duration_ns, val);
+	__sync_add_and_fetch(&hits, 1);
+
+	return 0;
+}
+
+static __u32 cur_key;
+
+static __always_inline void generate_key(struct trie_key *key)
+{
+	key->prefixlen = prefixlen;
+
+	if (random)
+		key->data = bpf_get_prandom_u32() % nr_entries;
+	else
+		key->data = cur_key++ % nr_entries;
+}
+
+static int noop(__u32 index, __u32 *unused)
+{
+	return 0;
+}
+
+static int baseline(__u32 index, __u32 *unused)
+{
+	struct trie_key key;
+	__u32 blackbox = 0;
+
+	generate_key(&key);
+	/* Avoid compiler optimizing out the modulo */
+	barrier_var(blackbox);
+	blackbox = READ_ONCE(key.data);
+
+	return 0;
+}
+
+static int lookup(__u32 index, int *retval)
+{
+	struct trie_key key;
+
+	generate_key(&key);
+	if (!bpf_map_lookup_elem(&trie_map, &key)) {
+		*retval = -ENOENT;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int insert(__u32 index, int *retval)
+{
+	struct trie_key key;
+	u32 val = 1;
+	int err;
+
+	generate_key(&key);
+	err = bpf_map_update_elem(&trie_map, &key, &val, BPF_NOEXIST);
+	if (err) {
+		*retval = err;
+		return 1;
+	}
+
+	/* Is this the last entry? */
+	if (key.data == nr_entries - 1) {
+		/* For atomicity concerns, see the comment in delete() */
+		*retval = LPM_BENCH_REINIT_MAP;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int update(__u32 index, int *retval)
+{
+	struct trie_key key;
+	u32 val = 1;
+	int err;
+
+	generate_key(&key);
+	err = bpf_map_update_elem(&trie_map, &key, &val, BPF_EXIST);
+	if (err) {
+		*retval = err;
+		return 1;
+	}
+
+	return 0;
+}
+
+static int delete(__u32 index, int *retval)
+{
+	struct trie_key key;
+	int err;
+
+	generate_key(&key);
+	err = bpf_map_delete_elem(&trie_map, &key);
+	if (err) {
+		*retval = err;
+		return 1;
+	}
+
+	/* Do we need to refill the map? */
+	if (key.data == nr_entries - 1) {
+		/*
+		 * Atomicity isn't required because DELETE only supports
+		 * one producer running concurrently. What we need is a
+		 * way to track how many entries have been deleted from
+		 * the trie between consecutive invocations of the BPF
+		 * prog because a single bpf_loop() call might not
+		 * delete all entries, e.g. when NR_LOOPS < nr_entries.
+		 */
+		*retval = LPM_BENCH_REINIT_MAP;
+		return 1;
+	}
+
+	return 0;
+}
+
+SEC("xdp")
+int BPF_PROG(run_bench)
+{
+	int err = LPM_BENCH_SUCCESS;
+	u64 start, delta;
+	int loops;
+
+	start = bpf_ktime_get_ns();
+
+	switch (op) {
+	case LPM_OP_NOOP:
+		loops = bpf_loop(NR_LOOPS, noop, NULL, 0);
+		break;
+	case LPM_OP_BASELINE:
+		loops = bpf_loop(NR_LOOPS, baseline, NULL, 0);
+		break;
+	case LPM_OP_LOOKUP:
+		loops = bpf_loop(NR_LOOPS, lookup, &err, 0);
+		break;
+	case LPM_OP_INSERT:
+		loops = bpf_loop(NR_LOOPS, insert, &err, 0);
+		break;
+	case LPM_OP_UPDATE:
+		loops = bpf_loop(NR_LOOPS, update, &err, 0);
+		break;
+	case LPM_OP_DELETE:
+		loops = bpf_loop(NR_LOOPS, delete, &err, 0);
+		break;
+	default:
+		bpf_printk("invalid benchmark operation\n");
+		return -1;
+	}
+
+	delta = bpf_ktime_get_ns() - start;
+
+	__sync_add_and_fetch(&duration_ns, delta);
+	__sync_add_and_fetch(&hits, loops);
+
+	return err;
+}
diff --git a/tools/testing/selftests/bpf/progs/lpm_trie_map.c b/tools/testing/selftests/bpf/progs/lpm_trie_map.c
new file mode 100644
index 000000000000..6e60d686b664
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lpm_trie_map.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define MAX_ENTRIES 100000000
+
+struct trie_key {
+	__u32 prefixlen;
+	__u32 data;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_LPM_TRIE);
+	__type(key, struct trie_key);
+	__type(value, __u32);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__uint(max_entries, MAX_ENTRIES);
+} trie_free_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index 0c13b7409947..7de173daf27b 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -89,14 +89,16 @@ SEC("lsm/file_mprotect")
 int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
 	     unsigned long reqprot, unsigned long prot, int ret)
 {
-	if (ret != 0)
+	struct mm_struct *mm = vma->vm_mm;
+
+	if (ret != 0 || !mm)
 		return ret;
 
 	__s32 pid = bpf_get_current_pid_tgid() >> 32;
 	int is_stack = 0;
 
-	is_stack = (vma->vm_start <= vma->vm_mm->start_stack &&
-		    vma->vm_end >= vma->vm_mm->start_stack);
+	is_stack = (vma->vm_start <= mm->start_stack &&
+		    vma->vm_end >= mm->start_stack);
 
 	if (is_stack && monitored_pid == pid) {
 		mprotect_count++;
diff --git a/tools/testing/selftests/bpf/progs/lsm_tailcall.c b/tools/testing/selftests/bpf/progs/lsm_tailcall.c
index 49c075ce2d4c..6e7e58051e64 100644
--- a/tools/testing/selftests/bpf/progs/lsm_tailcall.c
+++ b/tools/testing/selftests/bpf/progs/lsm_tailcall.c
@@ -20,14 +20,14 @@ int lsm_file_permission_prog(void *ctx)
 	return 0;
 }
 
-SEC("lsm/file_alloc_security")
-int lsm_file_alloc_security_prog(void *ctx)
+SEC("lsm/kernfs_init_security")
+int lsm_kernfs_init_security_prog(void *ctx)
 {
 	return 0;
 }
 
-SEC("lsm/file_alloc_security")
-int lsm_file_alloc_security_entry(void *ctx)
+SEC("lsm/kernfs_init_security")
+int lsm_kernfs_init_security_entry(void *ctx)
 {
 	bpf_tail_call_static(ctx, &jmp_table, 0);
 	return 0;
diff --git a/tools/testing/selftests/bpf/progs/map_excl.c b/tools/testing/selftests/bpf/progs/map_excl.c
new file mode 100644
index 000000000000..d461684728e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_excl.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Google LLC. */
+#include <linux/bpf.h>
+#include <time.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, __u32);
+	__type(value, __u32);
+	__uint(max_entries, 1);
+} excl_map SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int should_have_access(void *ctx)
+{
+	int key = 0, value = 0xdeadbeef;
+
+	bpf_map_update_elem(&excl_map, &key, &value, 0);
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int should_not_have_access(void *ctx)
+{
+	int key = 0, value = 0xdeadbeef;
+
+	bpf_map_update_elem(&excl_map, &key, &value, 0);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
index ab0ce1d01a4a..edaba481db9d 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -2,7 +2,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 struct map_value {
 	struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr;
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
index 450bb373b179..4c0ff01f1a96 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
@@ -4,7 +4,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_core_read.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 struct map_value {
 	char buf[8];
@@ -345,7 +345,7 @@ int reject_indirect_global_func_access(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
-__failure __msg("Unreleased reference id=5 alloc_insn=")
+__failure __msg("Unreleased reference id=4 alloc_insn=")
 int kptr_xchg_ref_state(struct __sk_buff *ctx)
 {
 	struct prog_test_ref_kfunc *p;
diff --git a/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
new file mode 100644
index 000000000000..3b984b6ae7c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mem_rdonly_untrusted.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+SEC("tp_btf/sys_enter")
+__success
+__log_level(2)
+__msg("r8 = *(u64 *)(r7 +0)          ; R7=ptr_nameidata(off={{[0-9]+}}) R8=rdonly_untrusted_mem(sz=0)")
+__msg("r9 = *(u8 *)(r8 +0)           ; R8=rdonly_untrusted_mem(sz=0) R9=scalar")
+int btf_id_to_ptr_mem(void *ctx)
+{
+	struct task_struct *task;
+	struct nameidata *idata;
+	u64 ret, off;
+
+	task = bpf_get_current_task_btf();
+	idata = task->nameidata;
+	off = bpf_core_field_offset(struct nameidata, pathname);
+	/*
+	 * asm block to have reliable match target for __msg, equivalent of:
+	 *   ret = task->nameidata->pathname[0];
+	 */
+	asm volatile (
+	"r7 = %[idata];"
+	"r7 += %[off];"
+	"r8 = *(u64 *)(r7 + 0);"
+	"r9 = *(u8 *)(r8 + 0);"
+	"%[ret] = r9;"
+	: [ret]"=r"(ret)
+	: [idata]"r"(idata),
+	  [off]"r"(off)
+	: "r7", "r8", "r9");
+	return ret;
+}
+
+SEC("socket")
+__success
+__retval(0)
+int ldx_is_ok_bad_addr(void *ctx)
+{
+	char *p;
+
+	if (!bpf_core_enum_value_exists(enum bpf_features, BPF_FEAT_RDONLY_CAST_TO_VOID))
+		return 42;
+
+	p = bpf_rdonly_cast(0, 0);
+	return p[0x7fff];
+}
+
+SEC("socket")
+__success
+__retval(1)
+int ldx_is_ok_good_addr(void *ctx)
+{
+	int v, *p;
+
+	v = 1;
+	p = bpf_rdonly_cast(&v, 0);
+	return *p;
+}
+
+SEC("socket")
+__success
+int offset_not_tracked(void *ctx)
+{
+	int *p, i, s;
+
+	p = bpf_rdonly_cast(0, 0);
+	s = 0;
+	bpf_for(i, 0, 1000 * 1000 * 1000) {
+		p++;
+		s += *p;
+	}
+	return s;
+}
+
+SEC("socket")
+__failure
+__msg("cannot write into rdonly_untrusted_mem")
+int stx_not_ok(void *ctx)
+{
+	int v, *p;
+
+	v = 1;
+	p = bpf_rdonly_cast(&v, 0);
+	*p = 1;
+	return 0;
+}
+
+SEC("socket")
+__failure
+__msg("cannot write into rdonly_untrusted_mem")
+int atomic_not_ok(void *ctx)
+{
+	int v, *p;
+
+	v = 1;
+	p = bpf_rdonly_cast(&v, 0);
+	__sync_fetch_and_add(p, 1);
+	return 0;
+}
+
+SEC("socket")
+__failure
+__msg("cannot write into rdonly_untrusted_mem")
+int atomic_rmw_not_ok(void *ctx)
+{
+	long v, *p;
+
+	v = 1;
+	p = bpf_rdonly_cast(&v, 0);
+	return __sync_val_compare_and_swap(p, 0, 42);
+}
+
+SEC("socket")
+__failure
+__msg("invalid access to memory, mem_size=0 off=0 size=4")
+__msg("R1 min value is outside of the allowed memory range")
+int kfunc_param_not_ok(void *ctx)
+{
+	int *p;
+
+	p = bpf_rdonly_cast(0, 0);
+	bpf_kfunc_trusted_num_test(p);
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure
+__msg("R1 type=rdonly_untrusted_mem expected=")
+int helper_param_not_ok(void *ctx)
+{
+	char *p;
+
+	p = bpf_rdonly_cast(0, 0);
+	/*
+	 * Any helper with ARG_CONST_SIZE_OR_ZERO constraint will do,
+	 * the most permissive constraint
+	 */
+	bpf_copy_from_user(p, 0, (void *)42);
+	return 0;
+}
+
+static __noinline u64 *get_some_addr(void)
+{
+	if (bpf_get_prandom_u32())
+		return bpf_rdonly_cast(0, bpf_core_type_id_kernel(struct sock));
+	else
+		return bpf_rdonly_cast(0, 0);
+}
+
+SEC("socket")
+__success
+__retval(0)
+int mixed_mem_type(void *ctx)
+{
+	u64 *p;
+
+	/* Try to avoid compiler hoisting load to if branches by using __noinline func. */
+	p = get_some_addr();
+	return *p;
+}
+
+__attribute__((__aligned__(8)))
+u8 global[] = {
+	0x11, 0x22, 0x33, 0x44,
+	0x55, 0x66, 0x77, 0x88,
+	0x99
+};
+
+__always_inline
+static u64 combine(void *p)
+{
+	u64 acc;
+
+	acc = 0;
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	acc |= (*(u64 *)p >> 56) << 24;
+	acc |= (*(u32 *)p >> 24) << 16;
+	acc |= (*(u16 *)p >> 8)  << 8;
+	acc |= *(u8 *)p;
+#else
+	acc |= (*(u64 *)p & 0xff) << 24;
+	acc |= (*(u32 *)p & 0xff) << 16;
+	acc |= (*(u16 *)p & 0xff) << 8;
+	acc |= *(u8 *)p;
+#endif
+	return acc;
+}
+
+SEC("socket")
+__retval(0x88442211)
+int diff_size_access(void *ctx)
+{
+	return combine(bpf_rdonly_cast(&global, 0));
+}
+
+SEC("socket")
+__retval(0x99553322)
+int misaligned_access(void *ctx)
+{
+	return combine(bpf_rdonly_cast(&global, 0) + 1);
+}
+
+__weak int return_one(void)
+{
+	return 1;
+}
+
+SEC("socket")
+__success
+__retval(1)
+int null_check(void *ctx)
+{
+	int *p;
+
+	p = bpf_rdonly_cast(0, 0);
+	if (p == 0)
+		/* make this a function call to avoid compiler
+		 * moving r0 assignment before check.
+		 */
+		return return_one();
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe.c b/tools/testing/selftests/bpf/progs/missed_kprobe.c
index 7f9ef701f5de..51a4fe64c917 100644
--- a/tools/testing/selftests/bpf/progs/missed_kprobe.c
+++ b/tools/testing/selftests/bpf/progs/missed_kprobe.c
@@ -2,7 +2,7 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c
index 8ea71cbd6c45..29c18d869ec1 100644
--- a/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c
+++ b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c
@@ -2,7 +2,7 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -46,3 +46,9 @@ int test5(struct pt_regs *ctx)
 {
 	return 0;
 }
+
+SEC("kprobe.session/bpf_kfunc_common_test")
+int test6(struct pt_regs *ctx)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/mptcp_sockmap.c b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c
new file mode 100644
index 000000000000..d4eef0cbadb9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "bpf_tracing_net.h"
+
+char _license[] SEC("license") = "GPL";
+
+int sk_index;
+int redirect_idx;
+int trace_port;
+int helper_ret;
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+	__uint(max_entries, 100);
+} sock_map SEC(".maps");
+
+SEC("sockops")
+int mptcp_sockmap_inject(struct bpf_sock_ops *skops)
+{
+	struct bpf_sock *sk;
+
+	/* only accept specified connection */
+	if (skops->local_port != trace_port ||
+	    skops->op != BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB)
+		return 1;
+
+	sk = skops->sk;
+	if (!sk)
+		return 1;
+
+	/* update sk handler */
+	helper_ret = bpf_sock_map_update(skops, &sock_map, &sk_index, BPF_NOEXIST);
+
+	return 1;
+}
+
+SEC("sk_skb/stream_verdict")
+int mptcp_sockmap_redirect(struct __sk_buff *skb)
+{
+	/* redirect skb to the sk under sock_map[redirect_idx] */
+	return bpf_sk_redirect_map(skb, &sock_map, redirect_idx, 0);
+}
diff --git a/tools/testing/selftests/bpf/progs/mptcp_subflow.c b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
index 70302477e326..41389e579578 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_subflow.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_subflow.c
@@ -117,7 +117,7 @@ int _getsockopt_subflow(struct bpf_sockopt *ctx)
 		return 1;
 
 	msk = bpf_core_cast(sk, struct mptcp_sock);
-	if (msk->pm.subflows != 1) {
+	if (msk->pm.extra_subflows != 1) {
 		ctx->retval = -1;
 		return 1;
 	}
diff --git a/tools/testing/selftests/bpf/progs/nested_acquire.c b/tools/testing/selftests/bpf/progs/nested_acquire.c
index 8e521a21d995..49ad7b9adf56 100644
--- a/tools/testing/selftests/bpf/progs/nested_acquire.c
+++ b/tools/testing/selftests/bpf/progs/nested_acquire.c
@@ -4,7 +4,7 @@
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/net_timestamping.c b/tools/testing/selftests/bpf/progs/net_timestamping.c
new file mode 100644
index 000000000000..b4c2f0f2be11
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/net_timestamping.c
@@ -0,0 +1,248 @@
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include <errno.h>
+
+__u32 monitored_pid = 0;
+
+int nr_active;
+int nr_snd;
+int nr_passive;
+int nr_sched;
+int nr_txsw;
+int nr_ack;
+
+struct sk_stg {
+	__u64 sendmsg_ns;	/* record ts when sendmsg is called */
+};
+
+struct sk_tskey {
+	u64 cookie;
+	u32 tskey;
+};
+
+struct delay_info {
+	u64 sendmsg_ns;		/* record ts when sendmsg is called */
+	u32 sched_delay;	/* SCHED_CB - sendmsg_ns */
+	u32 snd_sw_delay;	/* SND_SW_CB - SCHED_CB */
+	u32 ack_delay;		/* ACK_CB - SND_SW_CB */
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct sk_stg);
+} sk_stg_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, struct sk_tskey);
+	__type(value, struct delay_info);
+	__uint(max_entries, 1024);
+} time_map SEC(".maps");
+
+static u64 delay_tolerance_nsec = 10000000000; /* 10 second as an example */
+
+extern int bpf_sock_ops_enable_tx_tstamp(struct bpf_sock_ops_kern *skops, u64 flags) __ksym;
+
+static int bpf_test_sockopt(void *ctx, const struct sock *sk, int expected)
+{
+	int tmp, new = SK_BPF_CB_TX_TIMESTAMPING;
+	int opt = SK_BPF_CB_FLAGS;
+	int level = SOL_SOCKET;
+
+	if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)) != expected)
+		return 1;
+
+	if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) != expected ||
+	    (!expected && tmp != new))
+		return 1;
+
+	return 0;
+}
+
+static bool bpf_test_access_sockopt(void *ctx, const struct sock *sk)
+{
+	if (bpf_test_sockopt(ctx, sk, -EOPNOTSUPP))
+		return true;
+	return false;
+}
+
+static bool bpf_test_access_load_hdr_opt(struct bpf_sock_ops *skops)
+{
+	u8 opt[3] = {0};
+	int load_flags = 0;
+	int ret;
+
+	ret = bpf_load_hdr_opt(skops, opt, sizeof(opt), load_flags);
+	if (ret != -EOPNOTSUPP)
+		return true;
+
+	return false;
+}
+
+static bool bpf_test_access_cb_flags_set(struct bpf_sock_ops *skops)
+{
+	int ret;
+
+	ret = bpf_sock_ops_cb_flags_set(skops, 0);
+	if (ret != -EOPNOTSUPP)
+		return true;
+
+	return false;
+}
+
+/* In the timestamping callbacks, we're not allowed to call the following
+ * BPF CALLs for the safety concern. Return false if expected.
+ */
+static bool bpf_test_access_bpf_calls(struct bpf_sock_ops *skops,
+				      const struct sock *sk)
+{
+	if (bpf_test_access_sockopt(skops, sk))
+		return true;
+
+	if (bpf_test_access_load_hdr_opt(skops))
+		return true;
+
+	if (bpf_test_access_cb_flags_set(skops))
+		return true;
+
+	return false;
+}
+
+static bool bpf_test_delay(struct bpf_sock_ops *skops, const struct sock *sk)
+{
+	struct bpf_sock_ops_kern *skops_kern;
+	u64 timestamp = bpf_ktime_get_ns();
+	struct skb_shared_info *shinfo;
+	struct delay_info dinfo = {0};
+	struct sk_tskey key = {0};
+	struct delay_info *val;
+	struct sk_buff *skb;
+	struct sk_stg *stg;
+	u64 prior_ts, delay;
+
+	if (bpf_test_access_bpf_calls(skops, sk))
+		return false;
+
+	skops_kern = bpf_cast_to_kern_ctx(skops);
+	skb = skops_kern->skb;
+	shinfo = bpf_core_cast(skb->head + skb->end, struct skb_shared_info);
+
+	key.cookie = bpf_get_socket_cookie(skops);
+	if (!key.cookie)
+		return false;
+
+	if (skops->op == BPF_SOCK_OPS_TSTAMP_SENDMSG_CB) {
+		stg = bpf_sk_storage_get(&sk_stg_map, (void *)sk, 0, 0);
+		if (!stg)
+			return false;
+		dinfo.sendmsg_ns = stg->sendmsg_ns;
+		bpf_sock_ops_enable_tx_tstamp(skops_kern, 0);
+		key.tskey = shinfo->tskey;
+		if (!key.tskey)
+			return false;
+		bpf_map_update_elem(&time_map, &key, &dinfo, BPF_ANY);
+		return true;
+	}
+
+	key.tskey = shinfo->tskey;
+	if (!key.tskey)
+		return false;
+
+	val = bpf_map_lookup_elem(&time_map, &key);
+	if (!val)
+		return false;
+
+	switch (skops->op) {
+	case BPF_SOCK_OPS_TSTAMP_SCHED_CB:
+		val->sched_delay = timestamp - val->sendmsg_ns;
+		delay = val->sched_delay;
+		break;
+	case BPF_SOCK_OPS_TSTAMP_SND_SW_CB:
+		prior_ts = val->sched_delay + val->sendmsg_ns;
+		val->snd_sw_delay = timestamp - prior_ts;
+		delay = val->snd_sw_delay;
+		break;
+	case BPF_SOCK_OPS_TSTAMP_ACK_CB:
+		prior_ts = val->snd_sw_delay + val->sched_delay + val->sendmsg_ns;
+		val->ack_delay = timestamp - prior_ts;
+		delay = val->ack_delay;
+		break;
+	}
+
+	if (delay >= delay_tolerance_nsec)
+		return false;
+
+	/* Since it's the last one, remove from the map after latency check */
+	if (skops->op == BPF_SOCK_OPS_TSTAMP_ACK_CB)
+		bpf_map_delete_elem(&time_map, &key);
+
+	return true;
+}
+
+SEC("fentry/tcp_sendmsg_locked")
+int BPF_PROG(trace_tcp_sendmsg_locked, struct sock *sk, struct msghdr *msg,
+	     size_t size)
+{
+	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+	u64 timestamp = bpf_ktime_get_ns();
+	u32 flag = sk->sk_bpf_cb_flags;
+	struct sk_stg *stg;
+
+	if (pid != monitored_pid || !flag)
+		return 0;
+
+	stg = bpf_sk_storage_get(&sk_stg_map, sk, 0,
+				 BPF_SK_STORAGE_GET_F_CREATE);
+	if (!stg)
+		return 0;
+
+	stg->sendmsg_ns = timestamp;
+	nr_snd += 1;
+	return 0;
+}
+
+SEC("sockops")
+int skops_sockopt(struct bpf_sock_ops *skops)
+{
+	struct bpf_sock *bpf_sk = skops->sk;
+	const struct sock *sk;
+
+	if (!bpf_sk)
+		return 1;
+
+	sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk);
+	if (!sk)
+		return 1;
+
+	switch (skops->op) {
+	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+		nr_active += !bpf_test_sockopt(skops, sk, 0);
+		break;
+	case BPF_SOCK_OPS_TSTAMP_SENDMSG_CB:
+		if (bpf_test_delay(skops, sk))
+			nr_snd += 1;
+		break;
+	case BPF_SOCK_OPS_TSTAMP_SCHED_CB:
+		if (bpf_test_delay(skops, sk))
+			nr_sched += 1;
+		break;
+	case BPF_SOCK_OPS_TSTAMP_SND_SW_CB:
+		if (bpf_test_delay(skops, sk))
+			nr_txsw += 1;
+		break;
+	case BPF_SOCK_OPS_TSTAMP_ACK_CB:
+		if (bpf_test_delay(skops, sk))
+			nr_ack += 1;
+		break;
+	}
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
index c6edf8dbefeb..94040714af18 100644
--- a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
+++ b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
@@ -28,6 +28,7 @@ struct {
 } sock_map SEC(".maps");
 
 int tcx_init_netns_cookie, tcx_netns_cookie;
+int cgroup_skb_init_netns_cookie, cgroup_skb_netns_cookie;
 
 SEC("sockops")
 int get_netns_cookie_sockops(struct bpf_sock_ops *ctx)
@@ -91,4 +92,12 @@ int get_netns_cookie_tcx(struct __sk_buff *skb)
 	return TCX_PASS;
 }
 
+SEC("cgroup_skb/ingress")
+int get_netns_cookie_cgroup_skb(struct __sk_buff *skb)
+{
+	cgroup_skb_init_netns_cookie = bpf_get_netns_cookie(NULL);
+	cgroup_skb_netns_cookie = bpf_get_netns_cookie(skb);
+	return SK_PASS;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c
index 885377e83607..7d04254e61f1 100644
--- a/tools/testing/selftests/bpf/progs/preempt_lock.c
+++ b/tools/testing/selftests/bpf/progs/preempt_lock.c
@@ -5,8 +5,10 @@
 #include "bpf_misc.h"
 #include "bpf_experimental.h"
 
+extern int bpf_copy_from_user_str(void *dst, u32 dst__sz, const void *unsafe_ptr__ign, u64 flags) __weak __ksym;
+
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
 int preempt_lock_missing_1(struct __sk_buff *ctx)
 {
 	bpf_preempt_disable();
@@ -14,7 +16,7 @@ int preempt_lock_missing_1(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
 int preempt_lock_missing_2(struct __sk_buff *ctx)
 {
 	bpf_preempt_disable();
@@ -23,7 +25,7 @@ int preempt_lock_missing_2(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
 int preempt_lock_missing_3(struct __sk_buff *ctx)
 {
 	bpf_preempt_disable();
@@ -33,7 +35,7 @@ int preempt_lock_missing_3(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
 int preempt_lock_missing_3_minus_2(struct __sk_buff *ctx)
 {
 	bpf_preempt_disable();
@@ -55,7 +57,7 @@ static __noinline void preempt_enable(void)
 }
 
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
 int preempt_lock_missing_1_subprog(struct __sk_buff *ctx)
 {
 	preempt_disable();
@@ -63,7 +65,7 @@ int preempt_lock_missing_1_subprog(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
 int preempt_lock_missing_2_subprog(struct __sk_buff *ctx)
 {
 	preempt_disable();
@@ -72,7 +74,7 @@ int preempt_lock_missing_2_subprog(struct __sk_buff *ctx)
 }
 
 SEC("?tc")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_preempt_disable-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_preempt_disable-ed region")
 int preempt_lock_missing_2_minus_1_subprog(struct __sk_buff *ctx)
 {
 	preempt_disable();
@@ -113,6 +115,18 @@ int preempt_sleepable_helper(void *ctx)
 	return 0;
 }
 
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("kernel func bpf_copy_from_user_str is sleepable within non-preemptible region")
+int preempt_sleepable_kfunc(void *ctx)
+{
+	u32 data;
+
+	bpf_preempt_disable();
+	bpf_copy_from_user_str(&data, sizeof(data), NULL, 0);
+	bpf_preempt_enable();
+	return 0;
+}
+
 int __noinline preempt_global_subprog(void)
 {
 	preempt_balance_subprog();
@@ -120,7 +134,7 @@ int __noinline preempt_global_subprog(void)
 }
 
 SEC("?tc")
-__failure __msg("global function calls are not allowed with preemption disabled")
+__success
 int preempt_global_subprog_test(struct __sk_buff *ctx)
 {
 	preempt_disable();
@@ -129,4 +143,70 @@ int preempt_global_subprog_test(struct __sk_buff *ctx)
 	return 0;
 }
 
+int __noinline
+global_subprog(int i)
+{
+	if (i)
+		bpf_printk("%p", &i);
+	return i;
+}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+	if (i)
+		bpf_copy_from_user(&i, sizeof(i), NULL);
+	return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+	if (i)
+		bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+	global_subprog(i);
+	return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+	if (!i)
+		global_sleepable_kfunc_subprog(i);
+	return i;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int preempt_global_sleepable_helper_subprog(struct __sk_buff *ctx)
+{
+	preempt_disable();
+	if (ctx->mark)
+		global_sleepable_helper_subprog(ctx->mark);
+	preempt_enable();
+	return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int preempt_global_sleepable_kfunc_subprog(struct __sk_buff *ctx)
+{
+	preempt_disable();
+	if (ctx->mark)
+		global_sleepable_kfunc_subprog(ctx->mark);
+	preempt_enable();
+	return 0;
+}
+
+SEC("?syscall")
+__failure __msg("global functions that may sleep are not allowed in non-sleepable context")
+int preempt_global_sleepable_subprog_indirect(struct __sk_buff *ctx)
+{
+	preempt_disable();
+	if (ctx->mark)
+		global_subprog_calling_sleepable_global(ctx->mark);
+	preempt_enable();
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/prepare.c b/tools/testing/selftests/bpf/progs/prepare.c
new file mode 100644
index 000000000000..cfc1f48e0d28
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/prepare.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+int err;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 4096);
+} ringbuf SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} array_map SEC(".maps");
+
+SEC("cgroup_skb/egress")
+int program(struct __sk_buff *skb)
+{
+	err = 0;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/priv_freplace_prog.c b/tools/testing/selftests/bpf/progs/priv_freplace_prog.c
new file mode 100644
index 000000000000..ccf1b04010ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_freplace_prog.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("freplace/xdp_prog1")
+int new_xdp_prog2(struct xdp_md *xd)
+{
+	return XDP_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c
index 3c7b2b618c8a..725e29595079 100644
--- a/tools/testing/selftests/bpf/progs/priv_prog.c
+++ b/tools/testing/selftests/bpf/progs/priv_prog.c
@@ -6,8 +6,8 @@
 
 char _license[] SEC("license") = "GPL";
 
-SEC("kprobe")
-int kprobe_prog(void *ctx)
+SEC("xdp")
+int xdp_prog1(struct xdp_md *xdp)
 {
-	return 1;
+	return XDP_DROP;
 }
diff --git a/tools/testing/selftests/bpf/progs/pro_epilogue.c b/tools/testing/selftests/bpf/progs/pro_epilogue.c
index 44bc3f06b4b6..d97d6e07ef5c 100644
--- a/tools/testing/selftests/bpf/progs/pro_epilogue.c
+++ b/tools/testing/selftests/bpf/progs/pro_epilogue.c
@@ -4,8 +4,8 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c b/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c
index 3529e53be355..6048d79be48b 100644
--- a/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c
+++ b/tools/testing/selftests/bpf/progs/pro_epilogue_goto_start.c
@@ -4,8 +4,8 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c b/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c
new file mode 100644
index 000000000000..a5a8f08ac8fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pro_epilogue_with_kfunc.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+void __kfunc_btf_root(void)
+{
+	bpf_kfunc_st_ops_inc10(NULL);
+}
+
+static __noinline __used int subprog(struct st_ops_args *args)
+{
+	args->a += 1;
+	return args->a;
+}
+
+__success
+/* prologue */
+__xlated("0: r8 = r1")
+__xlated("1: r1 = 0")
+__xlated("2: call kernel-function")
+__xlated("3: if r0 != 0x0 goto pc+5")
+__xlated("4: r6 = *(u64 *)(r8 +0)")
+__xlated("5: r7 = *(u64 *)(r6 +0)")
+__xlated("6: r7 += 1000")
+__xlated("7: *(u64 *)(r6 +0) = r7")
+__xlated("8: goto pc+2")
+__xlated("9: r1 = r0")
+__xlated("10: call kernel-function")
+__xlated("11: r1 = r8")
+/* save __u64 *ctx to stack */
+__xlated("12: *(u64 *)(r10 -8) = r1")
+/* main prog */
+__xlated("13: r1 = *(u64 *)(r1 +0)")
+__xlated("14: r6 = r1")
+__xlated("15: call kernel-function")
+__xlated("16: r1 = r6")
+__xlated("17: call pc+")
+/* epilogue */
+__xlated("18: r1 = 0")
+__xlated("19: r6 = 0")
+__xlated("20: call kernel-function")
+__xlated("21: if r0 != 0x0 goto pc+6")
+__xlated("22: r1 = *(u64 *)(r10 -8)")
+__xlated("23: r1 = *(u64 *)(r1 +0)")
+__xlated("24: r6 = *(u64 *)(r1 +0)")
+__xlated("25: r6 += 10000")
+__xlated("26: *(u64 *)(r1 +0) = r6")
+__xlated("27: goto pc+2")
+__xlated("28: r1 = r0")
+__xlated("29: call kernel-function")
+__xlated("30: r0 = r6")
+__xlated("31: r0 *= 2")
+__xlated("32: exit")
+SEC("struct_ops/test_pro_epilogue")
+__naked int test_kfunc_pro_epilogue(void)
+{
+	asm volatile (
+	"r1 = *(u64 *)(r1 +0);"
+	"r6 = r1;"
+	"call %[bpf_kfunc_st_ops_inc10];"
+	"r1 = r6;"
+	"call subprog;"
+	"exit;"
+	:
+	: __imm(bpf_kfunc_st_ops_inc10)
+	: __clobber_all);
+}
+
+SEC("syscall")
+__retval(22022) /* (PROLOGUE_A [1000] + KFUNC_INC10 + SUBPROG_A [1] + EPILOGUE_A [10000]) * 2 */
+int syscall_pro_epilogue(void *ctx)
+{
+	struct st_ops_args args = {};
+
+	return bpf_kfunc_st_ops_test_pro_epilogue(&args);
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_st_ops pro_epilogue_with_kfunc = {
+	.test_pro_epilogue = (void *)test_kfunc_pro_epilogue,
+};
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 8bd1ebd7d6af..813143b4985d 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -223,7 +223,7 @@ static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
 		if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) {
 			payload += filepart_length;
 		}
-		cgroup_node = BPF_CORE_READ(cgroup_node, parent);
+		cgroup_node = BPF_CORE_READ(cgroup_node, __parent);
 	}
 	return payload;
 }
diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null.c b/tools/testing/selftests/bpf/progs/raw_tp_null.c
index 457f34c151e3..efa416f53968 100644
--- a/tools/testing/selftests/bpf/progs/raw_tp_null.c
+++ b/tools/testing/selftests/bpf/progs/raw_tp_null.c
@@ -3,13 +3,14 @@
 
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
 int tid;
 int i;
 
-SEC("tp_btf/bpf_testmod_test_raw_tp_null")
+SEC("tp_btf/bpf_testmod_test_raw_tp_null_tp")
 int BPF_PROG(test_raw_tp_null, struct sk_buff *skb)
 {
 	struct task_struct *task = bpf_get_current_task_btf();
@@ -17,16 +18,14 @@ int BPF_PROG(test_raw_tp_null, struct sk_buff *skb)
 	if (task->pid != tid)
 		return 0;
 
-	i = i + skb->mark + 1;
-	/* The compiler may move the NULL check before this deref, which causes
-	 * the load to fail as deref of scalar. Prevent that by using a barrier.
+	/* If dead code elimination kicks in, the increment +=2 will be
+	 * removed. For raw_tp programs attaching to tracepoints in kernel
+	 * modules, we mark input arguments as PTR_MAYBE_NULL, so branch
+	 * prediction should never kick in.
 	 */
-	barrier();
-	/* If dead code elimination kicks in, the increment below will
-	 * be removed. For raw_tp programs, we mark input arguments as
-	 * PTR_MAYBE_NULL, so branch prediction should never kick in.
-	 */
-	if (!skb)
-		i += 2;
+	asm volatile ("%[i] += 1; if %[ctx] != 0 goto +1; %[i] += 2;"
+			: [i]"+r"(i)
+			: [ctx]"r"(skb)
+			: "memory");
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c
new file mode 100644
index 000000000000..0d58114a4955
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/raw_tp_null_fail.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Ensure module parameter has PTR_MAYBE_NULL */
+SEC("tp_btf/bpf_testmod_test_raw_tp_null_tp")
+__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
+int test_raw_tp_null_bpf_testmod_test_raw_tp_null_arg_1(void *ctx) {
+    asm volatile("r1 = *(u64 *)(r1 +0); r1 = *(u64 *)(r1 +0);" ::: __clobber_all);
+    return 0;
+}
+
+/* Check NULL marking */
+SEC("tp_btf/sched_pi_setprio")
+__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
+int test_raw_tp_null_sched_pi_setprio_arg_2(void *ctx) {
+    asm volatile("r1 = *(u64 *)(r1 +8); r1 = *(u64 *)(r1 +0);" ::: __clobber_all);
+    return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c
index a3620c15c136..49fe93d7e059 100644
--- a/tools/testing/selftests/bpf/progs/rbtree.c
+++ b/tools/testing/selftests/bpf/progs/rbtree.c
@@ -61,19 +61,19 @@ static long __add_three(struct bpf_rb_root *root, struct bpf_spin_lock *lock)
 	}
 	m->key = 1;
 
-	bpf_spin_lock(&glock);
-	bpf_rbtree_add(&groot, &n->node, less);
-	bpf_rbtree_add(&groot, &m->node, less);
-	bpf_spin_unlock(&glock);
+	bpf_spin_lock(lock);
+	bpf_rbtree_add(root, &n->node, less);
+	bpf_rbtree_add(root, &m->node, less);
+	bpf_spin_unlock(lock);
 
 	n = bpf_obj_new(typeof(*n));
 	if (!n)
 		return 3;
 	n->key = 3;
 
-	bpf_spin_lock(&glock);
-	bpf_rbtree_add(&groot, &n->node, less);
-	bpf_spin_unlock(&glock);
+	bpf_spin_lock(lock);
+	bpf_rbtree_add(root, &n->node, less);
+	bpf_spin_unlock(lock);
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c
index dbd5eee8e25e..4acb6af2dfe3 100644
--- a/tools/testing/selftests/bpf/progs/rbtree_fail.c
+++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c
@@ -69,11 +69,11 @@ long rbtree_api_nolock_first(void *ctx)
 }
 
 SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__retval(0)
 long rbtree_api_remove_unadded_node(void *ctx)
 {
 	struct node_data *n, *m;
-	struct bpf_rb_node *res;
+	struct bpf_rb_node *res_n, *res_m;
 
 	n = bpf_obj_new(typeof(*n));
 	if (!n)
@@ -88,19 +88,20 @@ long rbtree_api_remove_unadded_node(void *ctx)
 	bpf_spin_lock(&glock);
 	bpf_rbtree_add(&groot, &n->node, less);
 
-	/* This remove should pass verifier */
-	res = bpf_rbtree_remove(&groot, &n->node);
-	n = container_of(res, struct node_data, node);
+	res_n = bpf_rbtree_remove(&groot, &n->node);
 
-	/* This remove shouldn't, m isn't in an rbtree */
-	res = bpf_rbtree_remove(&groot, &m->node);
-	m = container_of(res, struct node_data, node);
+	res_m = bpf_rbtree_remove(&groot, &m->node);
 	bpf_spin_unlock(&glock);
 
-	if (n)
-		bpf_obj_drop(n);
-	if (m)
-		bpf_obj_drop(m);
+	bpf_obj_drop(m);
+	if (res_n)
+		bpf_obj_drop(container_of(res_n, struct node_data, node));
+	if (res_m) {
+		bpf_obj_drop(container_of(res_m, struct node_data, node));
+		/* m was not added to the rbtree */
+		return 2;
+	}
+
 	return 0;
 }
 
@@ -178,7 +179,7 @@ err_out:
 }
 
 SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer")
 long rbtree_api_add_release_unlock_escape(void *ctx)
 {
 	struct node_data *n;
@@ -202,7 +203,7 @@ long rbtree_api_add_release_unlock_escape(void *ctx)
 }
 
 SEC("?tc")
-__failure __msg("rbtree_remove node input must be non-owning ref")
+__failure __msg("bpf_rbtree_remove can only take non-owning or refcounted bpf_rb_node pointer")
 long rbtree_api_first_release_unlock_escape(void *ctx)
 {
 	struct bpf_rb_node *res;
diff --git a/tools/testing/selftests/bpf/progs/rbtree_search.c b/tools/testing/selftests/bpf/progs/rbtree_search.c
new file mode 100644
index 000000000000..b05565d1db0d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_search.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+	struct bpf_refcount ref;
+	struct bpf_rb_node r0;
+	struct bpf_rb_node r1;
+	int key0;
+	int key1;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock0;
+private(A) struct bpf_rb_root groot0 __contains(node_data, r0);
+
+private(B) struct bpf_spin_lock glock1;
+private(B) struct bpf_rb_root groot1 __contains(node_data, r1);
+
+#define rb_entry(ptr, type, member) container_of(ptr, type, member)
+#define NR_NODES 16
+
+int zero = 0;
+
+static bool less0(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+	struct node_data *node_a;
+	struct node_data *node_b;
+
+	node_a = rb_entry(a, struct node_data, r0);
+	node_b = rb_entry(b, struct node_data, r0);
+
+	return node_a->key0 < node_b->key0;
+}
+
+static bool less1(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+	struct node_data *node_a;
+	struct node_data *node_b;
+
+	node_a = rb_entry(a, struct node_data, r1);
+	node_b = rb_entry(b, struct node_data, r1);
+
+	return node_a->key1 < node_b->key1;
+}
+
+SEC("syscall")
+__retval(0)
+long rbtree_search(void *ctx)
+{
+	struct bpf_rb_node *rb_n, *rb_m, *gc_ns[NR_NODES];
+	long lookup_key = NR_NODES / 2;
+	struct node_data *n, *m;
+	int i, nr_gc = 0;
+
+	for (i = zero; i < NR_NODES && can_loop; i++) {
+		n = bpf_obj_new(typeof(*n));
+		if (!n)
+			return __LINE__;
+
+		m = bpf_refcount_acquire(n);
+
+		n->key0 = i;
+		m->key1 = i;
+
+		bpf_spin_lock(&glock0);
+		bpf_rbtree_add(&groot0, &n->r0, less0);
+		bpf_spin_unlock(&glock0);
+
+		bpf_spin_lock(&glock1);
+		bpf_rbtree_add(&groot1, &m->r1, less1);
+		bpf_spin_unlock(&glock1);
+	}
+
+	n = NULL;
+	bpf_spin_lock(&glock0);
+	rb_n = bpf_rbtree_root(&groot0);
+	while (can_loop) {
+		if (!rb_n) {
+			bpf_spin_unlock(&glock0);
+			return __LINE__;
+		}
+
+		n = rb_entry(rb_n, struct node_data, r0);
+		if (lookup_key == n->key0)
+			break;
+		if (nr_gc < NR_NODES)
+			gc_ns[nr_gc++] = rb_n;
+		if (lookup_key < n->key0)
+			rb_n = bpf_rbtree_left(&groot0, rb_n);
+		else
+			rb_n = bpf_rbtree_right(&groot0, rb_n);
+	}
+
+	if (!n || lookup_key != n->key0) {
+		bpf_spin_unlock(&glock0);
+		return __LINE__;
+	}
+
+	for (i = 0; i < nr_gc; i++) {
+		rb_n = gc_ns[i];
+		gc_ns[i] = bpf_rbtree_remove(&groot0, rb_n);
+	}
+
+	m = bpf_refcount_acquire(n);
+	bpf_spin_unlock(&glock0);
+
+	for (i = 0; i < nr_gc; i++) {
+		rb_n = gc_ns[i];
+		if (rb_n) {
+			n = rb_entry(rb_n, struct node_data, r0);
+			bpf_obj_drop(n);
+		}
+	}
+
+	if (!m)
+		return __LINE__;
+
+	bpf_spin_lock(&glock1);
+	rb_m = bpf_rbtree_remove(&groot1, &m->r1);
+	bpf_spin_unlock(&glock1);
+	bpf_obj_drop(m);
+	if (!rb_m)
+		return __LINE__;
+	bpf_obj_drop(rb_entry(rb_m, struct node_data, r1));
+
+	return 0;
+}
+
+#define TEST_ROOT(dolock)				\
+SEC("syscall")						\
+__failure __msg(MSG)					\
+long test_root_spinlock_##dolock(void *ctx)		\
+{							\
+	struct bpf_rb_node *rb_n;			\
+	__u64 jiffies = 0;				\
+							\
+	if (dolock)					\
+		bpf_spin_lock(&glock0);			\
+	rb_n = bpf_rbtree_root(&groot0);		\
+	if (rb_n)					\
+		jiffies = bpf_jiffies64();		\
+	if (dolock)					\
+		bpf_spin_unlock(&glock0);		\
+							\
+	return !!jiffies;				\
+}
+
+#define TEST_LR(op, dolock)				\
+SEC("syscall")						\
+__failure __msg(MSG)					\
+long test_##op##_spinlock_##dolock(void *ctx)		\
+{							\
+	struct bpf_rb_node *rb_n;			\
+	struct node_data *n;				\
+	__u64 jiffies = 0;				\
+							\
+	bpf_spin_lock(&glock0);				\
+	rb_n = bpf_rbtree_root(&groot0);		\
+	if (!rb_n) {					\
+		bpf_spin_unlock(&glock0);		\
+		return 1;				\
+	}						\
+	n = rb_entry(rb_n, struct node_data, r0);	\
+	n = bpf_refcount_acquire(n);			\
+	bpf_spin_unlock(&glock0);			\
+	if (!n)						\
+		return 1;				\
+							\
+	if (dolock)					\
+		bpf_spin_lock(&glock0);			\
+	rb_n = bpf_rbtree_##op(&groot0, &n->r0);	\
+	if (rb_n)					\
+		jiffies = bpf_jiffies64();		\
+	if (dolock)					\
+		bpf_spin_unlock(&glock0);		\
+							\
+	return !!jiffies;				\
+}
+
+/*
+ * Use a separate MSG macro instead of passing to TEST_XXX(..., MSG)
+ * to ensure the message itself is not in the bpf prog lineinfo
+ * which the verifier includes in its log.
+ * Otherwise, the test_loader will incorrectly match the prog lineinfo
+ * instead of the log generated by the verifier.
+ */
+#define MSG "call bpf_rbtree_root{{.+}}; R0{{(_w)?}}=rcu_ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_ROOT(true)
+#undef MSG
+#define MSG "call bpf_rbtree_{{(left|right).+}}; R0{{(_w)?}}=rcu_ptr_or_null_node_data(id={{[0-9]+}},non_own_ref"
+TEST_LR(left,  true)
+TEST_LR(right, true)
+#undef MSG
+
+#define MSG "bpf_spin_lock at off=0 must be held for bpf_rb_root"
+TEST_ROOT(false)
+TEST_LR(left, false)
+TEST_LR(right, false)
+#undef MSG
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index ab3a532b7dd6..d70c28824bbe 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -16,10 +16,11 @@ struct {
 	__type(value, long);
 } map_a SEC(".maps");
 
-__u32 user_data, key_serial, target_pid;
+__u32 user_data, target_pid;
+__s32 key_serial;
 __u64 flags, task_storage_val, cgroup_id;
 
-struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
 void bpf_key_put(struct bpf_key *key) __ksym;
 void bpf_rcu_read_lock(void) __ksym;
 void bpf_rcu_read_unlock(void) __ksym;
@@ -242,7 +243,8 @@ out:
 }
 
 SEC("?lsm.s/bpf")
-int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size,
+	     bool kernel)
 {
 	struct bpf_key *bkey;
 
@@ -276,6 +278,46 @@ out:
 	return 0;
 }
 
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int nested_rcu_region_unbalanced_1(void *ctx)
+{
+	struct task_struct *task, *real_parent;
+
+	/* nested rcu read lock regions */
+	task = bpf_get_current_task_btf();
+	bpf_rcu_read_lock();
+	bpf_rcu_read_lock();
+	real_parent = task->real_parent;
+	if (!real_parent)
+		goto out;
+	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+	bpf_rcu_read_unlock();
+	bpf_rcu_read_unlock();
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int nested_rcu_region_unbalanced_2(void *ctx)
+{
+	struct task_struct *task, *real_parent;
+
+	/* nested rcu read lock regions */
+	task = bpf_get_current_task_btf();
+	bpf_rcu_read_lock();
+	bpf_rcu_read_lock();
+	bpf_rcu_read_lock();
+	real_parent = task->real_parent;
+	if (!real_parent)
+		goto out;
+	(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+	bpf_rcu_read_unlock();
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
 SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
 int task_trusted_non_rcuptr(void *ctx)
 {
@@ -439,3 +481,61 @@ int rcu_read_lock_global_subprog_unlock(void *ctx)
 	ret += global_subprog_unlock(ret);
 	return 0;
 }
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+	if (i)
+		bpf_copy_from_user(&i, sizeof(i), NULL);
+	return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+	if (i)
+		bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+	global_subprog(i);
+	return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+	if (!i)
+		global_sleepable_kfunc_subprog(i);
+	return i;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_sleepable_helper_global_subprog(void *ctx)
+{
+	volatile int ret = 0;
+
+	bpf_rcu_read_lock();
+	ret += global_sleepable_helper_subprog(ret);
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_sleepable_kfunc_global_subprog(void *ctx)
+{
+	volatile int ret = 0;
+
+	bpf_rcu_read_lock();
+	ret += global_sleepable_kfunc_subprog(ret);
+	bpf_rcu_read_unlock();
+	return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_sleepable_global_subprog_indirect(void *ctx)
+{
+	volatile int ret = 0;
+
+	bpf_rcu_read_lock();
+	ret += global_subprog_calling_sleepable_global(ret);
+	bpf_rcu_read_unlock();
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
index 76556e0b42b2..69da05bb6c63 100644
--- a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
+++ b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
@@ -4,7 +4,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
-extern bool CONFIG_PREEMPT __kconfig __weak;
+extern bool CONFIG_PREEMPTION __kconfig __weak;
 extern const int bpf_task_storage_busy __ksym;
 
 char _license[] SEC("license") = "GPL";
@@ -24,7 +24,7 @@ int BPF_PROG(read_bpf_task_storage_busy)
 {
 	int *value;
 
-	if (!CONFIG_PREEMPT)
+	if (!CONFIG_PREEMPTION)
 		return 0;
 
 	if (bpf_get_current_pid_tgid() >> 32 != pid)
diff --git a/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c
new file mode 100644
index 000000000000..405adbe5e8b0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/read_cgroupfs_xattr.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t target_pid = 0;
+
+char xattr_value[64];
+static const char expected_value_a[] = "bpf_selftest_value_a";
+static const char expected_value_b[] = "bpf_selftest_value_b";
+bool found_value_a;
+bool found_value_b;
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open)
+{
+	u64 cgrp_id = bpf_get_current_cgroup_id();
+	struct cgroup_subsys_state *css, *tmp;
+	struct bpf_dynptr value_ptr;
+	struct cgroup *cgrp;
+
+	if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+		return 0;
+
+	bpf_rcu_read_lock();
+	cgrp = bpf_cgroup_from_id(cgrp_id);
+	if (!cgrp) {
+		bpf_rcu_read_unlock();
+		return 0;
+	}
+
+	css = &cgrp->self;
+	bpf_dynptr_from_mem(xattr_value, sizeof(xattr_value), 0, &value_ptr);
+	bpf_for_each(css, tmp, css, BPF_CGROUP_ITER_ANCESTORS_UP) {
+		int ret;
+
+		ret = bpf_cgroup_read_xattr(tmp->cgroup, "user.bpf_test",
+					    &value_ptr);
+		if (ret < 0)
+			continue;
+
+		if (ret == sizeof(expected_value_a) &&
+		    !bpf_strncmp(xattr_value, sizeof(expected_value_a), expected_value_a))
+			found_value_a = true;
+		if (ret == sizeof(expected_value_b) &&
+		    !bpf_strncmp(xattr_value, sizeof(expected_value_b), expected_value_b))
+			found_value_b = true;
+	}
+
+	bpf_rcu_read_unlock();
+	bpf_cgroup_release(cgrp);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c
index 39ebef430059..395591374d4f 100644
--- a/tools/testing/selftests/bpf/progs/read_vsyscall.c
+++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c
@@ -8,14 +8,16 @@
 
 int target_pid = 0;
 void *user_ptr = 0;
-int read_ret[9];
+int read_ret[10];
 
 char _license[] SEC("license") = "GPL";
 
 /*
- * This is the only kfunc, the others are helpers
+ * These are the kfuncs, the others are helpers
  */
 int bpf_copy_from_user_str(void *dst, u32, const void *, u64) __weak __ksym;
+int bpf_copy_from_user_task_str(void *dst, u32, const void *,
+				struct task_struct *, u64) __weak __ksym;
 
 SEC("fentry/" SYS_PREFIX "sys_nanosleep")
 int do_probe_read(void *ctx)
@@ -47,6 +49,11 @@ int do_copy_from_user(void *ctx)
 	read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr,
 					      bpf_get_current_task_btf(), 0);
 	read_ret[8] = bpf_copy_from_user_str((char *)buf, sizeof(buf), user_ptr, 0);
+	read_ret[9] = bpf_copy_from_user_task_str((char *)buf,
+						  sizeof(buf),
+						  user_ptr,
+						  bpf_get_current_task_btf(),
+						  0);
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
index 893a4fdb4b6e..1aca85d86aeb 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
@@ -568,4 +568,64 @@ err_out:
 	return 0;
 }
 
+private(kptr_ref) u64 ref;
+
+static int probe_read_refcount(void)
+{
+	u32 refcount;
+
+	bpf_probe_read_kernel(&refcount, sizeof(refcount), (void *) ref);
+	return refcount;
+}
+
+static int __insert_in_list(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+			    struct node_data __kptr **node)
+{
+	struct node_data *node_new, *node_ref, *node_old;
+
+	node_new = bpf_obj_new(typeof(*node_new));
+	if (!node_new)
+		return -1;
+
+	node_ref = bpf_refcount_acquire(node_new);
+	node_old = bpf_kptr_xchg(node, node_new);
+	if (node_old) {
+		bpf_obj_drop(node_old);
+		bpf_obj_drop(node_ref);
+		return -2;
+	}
+
+	bpf_spin_lock(lock);
+	bpf_list_push_front(head, &node_ref->l);
+	ref = (u64)(void *) &node_ref->ref;
+	bpf_spin_unlock(lock);
+	return probe_read_refcount();
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 1);
+} percpu_hash SEC(".maps");
+
+SEC("tc")
+int percpu_hash_refcount_leak(void *ctx)
+{
+	struct map_value *v;
+	int key = 0;
+
+	v = bpf_map_lookup_elem(&percpu_hash, &key);
+	if (!v)
+		return 0;
+
+	return __insert_in_list(&head, &lock, &v->node);
+}
+
+SEC("tc")
+int check_percpu_hash_refcount(void *ctx)
+{
+	return probe_read_refcount();
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock.c b/tools/testing/selftests/bpf/progs/res_spin_lock.c
new file mode 100644
index 000000000000..22c4fb8b9266
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/res_spin_lock.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024-2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define EDEADLK 35
+#define ETIMEDOUT 110
+
+struct arr_elem {
+	struct bpf_res_spin_lock lock;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 64);
+	__type(key, int);
+	__type(value, struct arr_elem);
+} arrmap SEC(".maps");
+
+struct bpf_res_spin_lock lockA __hidden SEC(".data.A");
+struct bpf_res_spin_lock lockB __hidden SEC(".data.B");
+
+SEC("tc")
+int res_spin_lock_test(struct __sk_buff *ctx)
+{
+	struct arr_elem *elem1, *elem2;
+	int r;
+
+	elem1 = bpf_map_lookup_elem(&arrmap, &(int){0});
+	if (!elem1)
+		return -1;
+	elem2 = bpf_map_lookup_elem(&arrmap, &(int){0});
+	if (!elem2)
+		return -1;
+
+	r = bpf_res_spin_lock(&elem1->lock);
+	if (r)
+		return r;
+	r = bpf_res_spin_lock(&elem2->lock);
+	if (!r) {
+		bpf_res_spin_unlock(&elem2->lock);
+		bpf_res_spin_unlock(&elem1->lock);
+		return -1;
+	}
+	bpf_res_spin_unlock(&elem1->lock);
+	return r != -EDEADLK;
+}
+
+SEC("tc")
+int res_spin_lock_test_AB(struct __sk_buff *ctx)
+{
+	int r;
+
+	r = bpf_res_spin_lock(&lockA);
+	if (r)
+		return !r;
+	/* Only unlock if we took the lock. */
+	if (!bpf_res_spin_lock(&lockB))
+		bpf_res_spin_unlock(&lockB);
+	bpf_res_spin_unlock(&lockA);
+	return 0;
+}
+
+int err;
+
+SEC("tc")
+int res_spin_lock_test_BA(struct __sk_buff *ctx)
+{
+	int r;
+
+	r = bpf_res_spin_lock(&lockB);
+	if (r)
+		return !r;
+	if (!bpf_res_spin_lock(&lockA))
+		bpf_res_spin_unlock(&lockA);
+	else
+		err = -EDEADLK;
+	bpf_res_spin_unlock(&lockB);
+	return err ?: 0;
+}
+
+SEC("tc")
+int res_spin_lock_test_held_lock_max(struct __sk_buff *ctx)
+{
+	struct bpf_res_spin_lock *locks[48] = {};
+	struct arr_elem *e;
+	u64 time_beg, time;
+	int ret = 0, i;
+
+	_Static_assert(ARRAY_SIZE(((struct rqspinlock_held){}).locks) == 31,
+		       "RES_NR_HELD assumed to be 31");
+
+	for (i = 0; i < 34; i++) {
+		int key = i;
+
+		/* We cannot pass in i as it will get spilled/filled by the compiler and
+		 * loses bounds in verifier state.
+		 */
+		e = bpf_map_lookup_elem(&arrmap, &key);
+		if (!e)
+			return 1;
+		locks[i] = &e->lock;
+	}
+
+	for (; i < 48; i++) {
+		int key = i - 2;
+
+		/* We cannot pass in i as it will get spilled/filled by the compiler and
+		 * loses bounds in verifier state.
+		 */
+		e = bpf_map_lookup_elem(&arrmap, &key);
+		if (!e)
+			return 1;
+		locks[i] = &e->lock;
+	}
+
+	time_beg = bpf_ktime_get_ns();
+	for (i = 0; i < 34; i++) {
+		if (bpf_res_spin_lock(locks[i]))
+			goto end;
+	}
+
+	/* Trigger AA, after exhausting entries in the held lock table. This
+	 * time, only the timeout can save us, as AA detection won't succeed.
+	 */
+	ret = bpf_res_spin_lock(locks[34]);
+	if (!ret) {
+		bpf_res_spin_unlock(locks[34]);
+		ret = 1;
+		goto end;
+	}
+
+	ret = ret != -ETIMEDOUT ? 2 : 0;
+
+end:
+	for (i = i - 1; i >= 0; i--)
+		bpf_res_spin_unlock(locks[i]);
+	time = bpf_ktime_get_ns() - time_beg;
+	/* Time spent should be easily above our limit (1/4 s), since AA
+	 * detection won't be expedited due to lack of held lock entry.
+	 */
+	return ret ?: (time > 1000000000 / 4 ? 0 : 1);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c
new file mode 100644
index 000000000000..330682a88c16
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/res_spin_lock_fail.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024-2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct arr_elem {
+	struct bpf_res_spin_lock lock;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct arr_elem);
+} arrmap SEC(".maps");
+
+long value;
+
+struct bpf_spin_lock lock __hidden SEC(".data.A");
+struct bpf_res_spin_lock res_lock __hidden SEC(".data.B");
+
+SEC("?tc")
+__failure __msg("point to map value or allocated object")
+int res_spin_lock_arg(struct __sk_buff *ctx)
+{
+	struct arr_elem *elem;
+
+	elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+	if (!elem)
+		return 0;
+	bpf_res_spin_lock((struct bpf_res_spin_lock *)bpf_core_cast(&elem->lock, struct __sk_buff));
+	bpf_res_spin_lock(&elem->lock);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("AA deadlock detected")
+int res_spin_lock_AA(struct __sk_buff *ctx)
+{
+	struct arr_elem *elem;
+
+	elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+	if (!elem)
+		return 0;
+	bpf_res_spin_lock(&elem->lock);
+	bpf_res_spin_lock(&elem->lock);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("AA deadlock detected")
+int res_spin_lock_cond_AA(struct __sk_buff *ctx)
+{
+	struct arr_elem *elem;
+
+	elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+	if (!elem)
+		return 0;
+	if (bpf_res_spin_lock(&elem->lock))
+		return 0;
+	bpf_res_spin_lock(&elem->lock);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("unlock of different lock")
+int res_spin_lock_mismatch_1(struct __sk_buff *ctx)
+{
+	struct arr_elem *elem;
+
+	elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+	if (!elem)
+		return 0;
+	if (bpf_res_spin_lock(&elem->lock))
+		return 0;
+	bpf_res_spin_unlock(&res_lock);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("unlock of different lock")
+int res_spin_lock_mismatch_2(struct __sk_buff *ctx)
+{
+	struct arr_elem *elem;
+
+	elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+	if (!elem)
+		return 0;
+	if (bpf_res_spin_lock(&res_lock))
+		return 0;
+	bpf_res_spin_unlock(&elem->lock);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("unlock of different lock")
+int res_spin_lock_irq_mismatch_1(struct __sk_buff *ctx)
+{
+	struct arr_elem *elem;
+	unsigned long f1;
+
+	elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+	if (!elem)
+		return 0;
+	bpf_local_irq_save(&f1);
+	if (bpf_res_spin_lock(&res_lock))
+		return 0;
+	bpf_res_spin_unlock_irqrestore(&res_lock, &f1);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("unlock of different lock")
+int res_spin_lock_irq_mismatch_2(struct __sk_buff *ctx)
+{
+	struct arr_elem *elem;
+	unsigned long f1;
+
+	elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+	if (!elem)
+		return 0;
+	if (bpf_res_spin_lock_irqsave(&res_lock, &f1))
+		return 0;
+	bpf_res_spin_unlock(&res_lock);
+	return 0;
+}
+
+SEC("?tc")
+__success
+int res_spin_lock_ooo(struct __sk_buff *ctx)
+{
+	struct arr_elem *elem;
+
+	elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+	if (!elem)
+		return 0;
+	if (bpf_res_spin_lock(&res_lock))
+		return 0;
+	if (bpf_res_spin_lock(&elem->lock)) {
+		bpf_res_spin_unlock(&res_lock);
+		return 0;
+	}
+	bpf_res_spin_unlock(&elem->lock);
+	bpf_res_spin_unlock(&res_lock);
+	return 0;
+}
+
+SEC("?tc")
+__success
+int res_spin_lock_ooo_irq(struct __sk_buff *ctx)
+{
+	struct arr_elem *elem;
+	unsigned long f1, f2;
+
+	elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+	if (!elem)
+		return 0;
+	if (bpf_res_spin_lock_irqsave(&res_lock, &f1))
+		return 0;
+	if (bpf_res_spin_lock_irqsave(&elem->lock, &f2)) {
+		bpf_res_spin_unlock_irqrestore(&res_lock, &f1);
+		/* We won't have a unreleased IRQ flag error here. */
+		return 0;
+	}
+	bpf_res_spin_unlock_irqrestore(&elem->lock, &f2);
+	bpf_res_spin_unlock_irqrestore(&res_lock, &f1);
+	return 0;
+}
+
+struct bpf_res_spin_lock lock1 __hidden SEC(".data.OO1");
+struct bpf_res_spin_lock lock2 __hidden SEC(".data.OO2");
+
+SEC("?tc")
+__failure __msg("bpf_res_spin_unlock cannot be out of order")
+int res_spin_lock_ooo_unlock(struct __sk_buff *ctx)
+{
+	if (bpf_res_spin_lock(&lock1))
+		return 0;
+	if (bpf_res_spin_lock(&lock2)) {
+		bpf_res_spin_unlock(&lock1);
+		return 0;
+	}
+	bpf_res_spin_unlock(&lock1);
+	bpf_res_spin_unlock(&lock2);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("off 1 doesn't point to 'struct bpf_res_spin_lock' that is at 0")
+int res_spin_lock_bad_off(struct __sk_buff *ctx)
+{
+	struct arr_elem *elem;
+
+	elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+	if (!elem)
+		return 0;
+	bpf_res_spin_lock((void *)&elem->lock + 1);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("R1 doesn't have constant offset. bpf_res_spin_lock has to be at the constant offset")
+int res_spin_lock_var_off(struct __sk_buff *ctx)
+{
+	struct arr_elem *elem;
+	u64 val = value;
+
+	elem = bpf_map_lookup_elem(&arrmap, &(int){0});
+	if (!elem) {
+		// FIXME: Only inline assembly use in assert macro doesn't emit
+		//	  BTF definition.
+		bpf_throw(0);
+		return 0;
+	}
+	bpf_assert_range(val, 0, 40);
+	bpf_res_spin_lock((void *)&value + val);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("map 'res_spin.bss' has no valid bpf_res_spin_lock")
+int res_spin_lock_no_lock_map(struct __sk_buff *ctx)
+{
+	bpf_res_spin_lock((void *)&value + 1);
+	return 0;
+}
+
+SEC("?tc")
+__failure __msg("local 'kptr' has no valid bpf_res_spin_lock")
+int res_spin_lock_no_lock_kptr(struct __sk_buff *ctx)
+{
+	struct { int i; } *p = bpf_obj_new(typeof(*p));
+
+	if (!p)
+		return 0;
+	bpf_res_spin_lock((void *)p);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
index 6a468496f539..d96c7d1e8fc2 100644
--- a/tools/testing/selftests/bpf/progs/ringbuf_bench.c
+++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2020 Facebook
 
+#include <stdbool.h>
 #include <linux/bpf.h>
 #include <stdint.h>
 #include <bpf/bpf_helpers.h>
@@ -14,9 +15,11 @@ struct {
 
 const volatile int batch_cnt = 0;
 const volatile long use_output = 0;
+const volatile bool bench_producer = false;
 
 long sample_val = 42;
 long dropped __attribute__((aligned(128))) = 0;
+long hits __attribute__((aligned(128))) = 0;
 
 const volatile long wakeup_data_size = 0;
 
@@ -24,6 +27,9 @@ static __always_inline long get_flags()
 {
 	long sz;
 
+	if (bench_producer)
+		return BPF_RB_NO_WAKEUP;
+
 	if (!wakeup_data_size)
 		return 0;
 
@@ -47,6 +53,8 @@ int bench_ringbuf(void *ctx)
 				*sample = sample_val;
 				flags = get_flags();
 				bpf_ringbuf_submit(sample, flags);
+				if (bench_producer)
+					__sync_add_and_fetch(&hits, 1);
 			}
 		}
 	} else {
@@ -55,6 +63,9 @@ int bench_ringbuf(void *ctx)
 			if (bpf_ringbuf_output(&ringbuf, &sample_val,
 					       sizeof(sample_val), flags))
 				__sync_add_and_fetch(&dropped, 1);
+			else if (bench_producer)
+				__sync_add_and_fetch(&hits, 1);
+
 		}
 	}
 	return 0;
diff --git a/tools/testing/selftests/bpf/progs/security_bpf_map.c b/tools/testing/selftests/bpf/progs/security_bpf_map.c
new file mode 100644
index 000000000000..7216b3450e96
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/security_bpf_map.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define EPERM 1 /* Operation not permitted */
+
+/* From include/linux/mm.h. */
+#define FMODE_WRITE	0x2
+
+struct map;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, __u32);
+	__type(value, __u32);
+	__uint(max_entries, 1);
+} prot_status_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, __u32);
+	__type(value, __u32);
+	__uint(max_entries, 3);
+} prot_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, __u32);
+	__type(value, __u32);
+	__uint(max_entries, 3);
+} not_prot_map SEC(".maps");
+
+SEC("fmod_ret/security_bpf_map")
+int BPF_PROG(fmod_bpf_map, struct bpf_map *map, int fmode)
+{
+	__u32 key = 0;
+	__u32 *status_ptr = bpf_map_lookup_elem(&prot_status_map, &key);
+
+	if (!status_ptr || !*status_ptr)
+		return 0;
+
+	if (map == &prot_map) {
+		/* Allow read-only access */
+		if (fmode & FMODE_WRITE)
+			return -EPERM;
+	}
+
+	return 0;
+}
+
+/*
+ * This program keeps references to maps. This is needed to prevent
+ * optimizing them out.
+ */
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(fentry_dummy1, int a)
+{
+	__u32 key = 0;
+	__u32 val1 = a;
+	__u32 val2 = a + 1;
+
+	bpf_map_update_elem(&prot_map, &key, &val1, BPF_ANY);
+	bpf_map_update_elem(&not_prot_map, &key, &val2, BPF_ANY);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/set_global_vars.c b/tools/testing/selftests/bpf/progs/set_global_vars.c
new file mode 100644
index 000000000000..ebaef28b2cb3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/set_global_vars.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include <stdbool.h>
+
+char _license[] SEC("license") = "GPL";
+
+typedef __s32 s32;
+typedef s32 i32;
+typedef __u8 u8;
+
+enum Enum { EA1 = 0, EA2 = 11, EA3 = 10 };
+enum Enumu64 {EB1 = 0llu, EB2 = 12llu };
+enum Enums64 { EC1 = 0ll, EC2 = 13ll };
+
+const volatile __s64 var_s64 = -1;
+const volatile __u64 var_u64 = 0;
+const volatile i32 var_s32 = -1;
+const volatile __u32 var_u32 = 0;
+const volatile __s16 var_s16 = -1;
+const volatile __u16 var_u16 = 0;
+const volatile __s8 var_s8 = -1;
+const volatile u8 var_u8 = 0;
+const volatile enum Enum var_ea = EA1;
+const volatile enum Enumu64 var_eb = EB1;
+const volatile enum Enums64 var_ec = EC1;
+const volatile bool var_b = false;
+const volatile i32 arr[32];
+const volatile enum Enum enum_arr[32];
+const volatile i32 three_d[47][19][17];
+const volatile i32 *ptr_arr[32];
+
+struct Struct {
+	int:16;
+	__u16 filler;
+	struct {
+		const __u16 filler2;
+	};
+	struct Struct2 {
+		__u16 filler;
+		volatile struct {
+			const int:1;
+			union {
+				const volatile u8 var_u8[3];
+				const volatile __s16 filler3;
+				const int:1;
+				s32 mat[7][5];
+			} u;
+		};
+	} struct2[2][4];
+};
+
+const volatile __u32 stru = 0; /* same prefix as below */
+const volatile struct Struct struct1[3];
+const volatile struct Struct struct11[11][7];
+
+struct Struct3 {
+	struct {
+		u8 var_u8_l;
+	};
+	struct {
+		struct {
+			u8 var_u8_h;
+		};
+	};
+};
+
+typedef struct Struct3 Struct3_t;
+
+union Union {
+	__u16 var_u16;
+	Struct3_t struct3;
+};
+
+const volatile union Union union1 = {.var_u16 = -1};
+
+SEC("socket")
+int test_set_globals(void *ctx)
+{
+	volatile __s8 a;
+
+	a = var_s64;
+	a = var_u64;
+	a = var_s32;
+	a = var_u32;
+	a = var_s16;
+	a = var_u16;
+	a = var_s8;
+	a = var_u8;
+	a = var_ea;
+	a = var_eb;
+	a = var_ec;
+	a = var_b;
+	a = struct1[2].struct2[1][2].u.var_u8[2];
+	a = union1.var_u16;
+	a = arr[3];
+	a = arr[EA2];
+	a = enum_arr[EC2];
+	a = three_d[31][7][EA2];
+	a = struct1[2].struct2[1][2].u.mat[5][3];
+	a = struct11[7][5].struct2[0][1].u.mat[3][0];
+
+	return a;
+}
diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
index 6dd4318debbf..d330b1511979 100644
--- a/tools/testing/selftests/bpf/progs/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -61,6 +61,9 @@ static const struct sockopt_test sol_tcp_tests[] = {
 	{ .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, },
 	{ .opt = TCP_BPF_SOCK_OPS_CB_FLAGS, .new = BPF_SOCK_OPS_ALL_CB_FLAGS,
 	  .expected = BPF_SOCK_OPS_ALL_CB_FLAGS, },
+	{ .opt = TCP_BPF_DELACK_MAX, .new = 30000, .expected = 30000, },
+	{ .opt = TCP_BPF_RTO_MIN, .new = 30000, .expected = 30000, },
+	{ .opt = TCP_RTO_MAX_MS, .new = 2000, .expected = 2000, },
 	{ .opt = 0, },
 };
 
@@ -80,6 +83,14 @@ struct loop_ctx {
 	struct sock *sk;
 };
 
+static bool sk_is_tcp(struct sock *sk)
+{
+	return (sk->__sk_common.skc_family == AF_INET ||
+		sk->__sk_common.skc_family == AF_INET6) &&
+		sk->sk_type == SOCK_STREAM &&
+		sk->sk_protocol == IPPROTO_TCP;
+}
+
 static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
 				 const struct sockopt_test *t,
 				 int level)
@@ -88,6 +99,9 @@ static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
 
 	opt = t->opt;
 
+	if (opt == SO_TXREHASH && !sk_is_tcp(sk))
+		return 0;
+
 	if (bpf_getsockopt(ctx, level, opt, &old, sizeof(old)))
 		return 1;
 	/* kernel initialized txrehash to 255 */
diff --git a/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c
new file mode 100644
index 000000000000..09a00d11ffcc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sk_bypass_prot_mem.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2025 Google LLC */
+
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+extern int tcp_memory_per_cpu_fw_alloc __ksym;
+extern int udp_memory_per_cpu_fw_alloc __ksym;
+
+int nr_cpus;
+bool tcp_activated, udp_activated;
+long tcp_memory_allocated, udp_memory_allocated;
+
+struct sk_prot {
+	long *memory_allocated;
+	int *memory_per_cpu_fw_alloc;
+};
+
+static int drain_memory_per_cpu_fw_alloc(__u32 i, struct sk_prot *sk_prot_ctx)
+{
+	int *memory_per_cpu_fw_alloc;
+
+	memory_per_cpu_fw_alloc = bpf_per_cpu_ptr(sk_prot_ctx->memory_per_cpu_fw_alloc, i);
+	if (memory_per_cpu_fw_alloc)
+		*sk_prot_ctx->memory_allocated += *memory_per_cpu_fw_alloc;
+
+	return 0;
+}
+
+static long get_memory_allocated(struct sock *_sk, int *memory_per_cpu_fw_alloc)
+{
+	struct sock *sk = bpf_core_cast(_sk, struct sock);
+	struct sk_prot sk_prot_ctx;
+	long memory_allocated;
+
+	/* net_aligned_data.{tcp,udp}_memory_allocated was not available. */
+	memory_allocated = sk->__sk_common.skc_prot->memory_allocated->counter;
+
+	sk_prot_ctx.memory_allocated = &memory_allocated;
+	sk_prot_ctx.memory_per_cpu_fw_alloc = memory_per_cpu_fw_alloc;
+
+	bpf_loop(nr_cpus, drain_memory_per_cpu_fw_alloc, &sk_prot_ctx, 0);
+
+	return memory_allocated;
+}
+
+static void fentry_init_sock(struct sock *sk, bool *activated,
+			     long *memory_allocated, int *memory_per_cpu_fw_alloc)
+{
+	if (!*activated)
+		return;
+
+	*memory_allocated = get_memory_allocated(sk, memory_per_cpu_fw_alloc);
+	*activated = false;
+}
+
+SEC("fentry/tcp_init_sock")
+int BPF_PROG(fentry_tcp_init_sock, struct sock *sk)
+{
+	fentry_init_sock(sk, &tcp_activated,
+			 &tcp_memory_allocated, &tcp_memory_per_cpu_fw_alloc);
+	return 0;
+}
+
+SEC("fentry/udp_init_sock")
+int BPF_PROG(fentry_udp_init_sock, struct sock *sk)
+{
+	fentry_init_sock(sk, &udp_activated,
+			 &udp_memory_allocated, &udp_memory_per_cpu_fw_alloc);
+	return 0;
+}
+
+SEC("cgroup/sock_create")
+int sock_create(struct bpf_sock *ctx)
+{
+	int err, val = 1;
+
+	err = bpf_setsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM,
+			     &val, sizeof(val));
+	if (err)
+		goto err;
+
+	val = 0;
+
+	err = bpf_getsockopt(ctx, SOL_SOCKET, SK_BPF_BYPASS_PROT_MEM,
+			     &val, sizeof(val));
+	if (err)
+		goto err;
+
+	if (val != 1) {
+		err = -EINVAL;
+		goto err;
+	}
+
+	return 1;
+
+err:
+	bpf_set_retval(err);
+	return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sock_addr_kern.c b/tools/testing/selftests/bpf/progs/sock_addr_kern.c
index 8386bb15ccdc..84ad515eafd6 100644
--- a/tools/testing/selftests/bpf/progs/sock_addr_kern.c
+++ b/tools/testing/selftests/bpf/progs/sock_addr_kern.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2024 Google LLC */
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 SEC("syscall")
 int init_sock(struct init_sock_args *args)
diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
index 96531b0d9d55..77966ded5467 100644
--- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
@@ -17,6 +17,13 @@ static bool ipv6_addr_loopback(const struct in6_addr *a)
 		a->s6_addr32[2] | (a->s6_addr32[3] ^ bpf_htonl(1))) == 0;
 }
 
+static bool ipv4_addr_loopback(__be32 a)
+{
+	return a == bpf_ntohl(0x7f000001);
+}
+
+volatile const unsigned int sf;
+volatile const unsigned int ss;
 volatile const __u16 ports[2];
 unsigned int bucket[2];
 
@@ -26,22 +33,28 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
 	struct sock *sk = (struct sock *)ctx->sk_common;
 	struct inet_hashinfo *hinfo;
 	unsigned int hash;
+	__u64 sock_cookie;
 	struct net *net;
 	int idx;
 
 	if (!sk)
 		return 0;
 
+	sock_cookie = bpf_get_socket_cookie(sk);
 	sk = bpf_core_cast(sk, struct sock);
-	if (sk->sk_family != AF_INET6 ||
-	    sk->sk_state != TCP_LISTEN ||
-	    !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
+	if (sk->sk_family != sf ||
+	    (ss && sk->sk_state != ss) ||
+	    (sk->sk_family == AF_INET6 ?
+	    !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
+	    !ipv4_addr_loopback(sk->sk_rcv_saddr)))
 		return 0;
 
 	if (sk->sk_num == ports[0])
 		idx = 0;
 	else if (sk->sk_num == ports[1])
 		idx = 1;
+	else if (!ports[0] && !ports[1])
+		idx = 0;
 	else
 		return 0;
 
@@ -52,6 +65,28 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
 	hinfo = net->ipv4.tcp_death_row.hashinfo;
 	bucket[idx] = hash & hinfo->lhash2_mask;
 	bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx));
+	bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
+
+	return 0;
+}
+
+volatile const __u64 destroy_cookie;
+
+SEC("iter/tcp")
+int iter_tcp_destroy(struct bpf_iter__tcp *ctx)
+{
+	struct sock_common *sk_common = (struct sock_common *)ctx->sk_common;
+	__u64 sock_cookie;
+
+	if (!sk_common)
+		return 0;
+
+	sock_cookie = bpf_get_socket_cookie(sk_common);
+	if (sock_cookie != destroy_cookie)
+		return 0;
+
+	bpf_sock_destroy(sk_common);
+	bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
 
 	return 0;
 }
@@ -63,20 +98,26 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
 {
 	struct sock *sk = (struct sock *)ctx->udp_sk;
 	struct udp_table *udptable;
+	__u64 sock_cookie;
 	int idx;
 
 	if (!sk)
 		return 0;
 
+	sock_cookie = bpf_get_socket_cookie(sk);
 	sk = bpf_core_cast(sk, struct sock);
-	if (sk->sk_family != AF_INET6 ||
-	    !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
+	if (sk->sk_family != sf ||
+	    (sk->sk_family == AF_INET6 ?
+	    !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
+	    !ipv4_addr_loopback(sk->sk_rcv_saddr)))
 		return 0;
 
 	if (sk->sk_num == ports[0])
 		idx = 0;
 	else if (sk->sk_num == ports[1])
 		idx = 1;
+	else if (!ports[0] && !ports[1])
+		idx = 0;
 	else
 		return 0;
 
@@ -84,6 +125,7 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
 	udptable = sk->sk_net.net->ipv4.udp_table;
 	bucket[idx] = udp_sk(sk)->udp_portaddr_hash & udptable->mask;
 	bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx));
+	bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/stacktrace_ips.c b/tools/testing/selftests/bpf/progs/stacktrace_ips.c
new file mode 100644
index 000000000000..a96c8150d7f5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stacktrace_ips.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2018 Facebook
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH         127
+#endif
+
+typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
+
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(max_entries, 16384);
+	__type(key, __u32);
+	__type(value, stack_trace_t);
+} stackmap SEC(".maps");
+
+extern bool CONFIG_UNWINDER_ORC __kconfig __weak;
+
+/*
+ * This function is here to have CONFIG_UNWINDER_ORC
+ * used and added to object BTF.
+ */
+int unused(void)
+{
+	return CONFIG_UNWINDER_ORC ? 0 : 1;
+}
+
+__u32 stack_key;
+
+SEC("kprobe.multi")
+int kprobe_multi_test(struct pt_regs *ctx)
+{
+	stack_key = bpf_get_stackid(ctx, &stackmap, 0);
+	return 0;
+}
+
+SEC("raw_tp/bpf_testmod_test_read")
+int rawtp_test(void *ctx)
+{
+	/* Skip ebpf program entry in the stack. */
+	stack_key = bpf_get_stackid(ctx, &stackmap, 0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/stacktrace_map.c
index 47568007b668..0c77df05be7f 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/stacktrace_map.c
@@ -50,6 +50,7 @@ struct sched_switch_args {
 	int next_prio;
 };
 
+__u32 stack_id;
 SEC("tracepoint/sched/sched_switch")
 int oncpu(struct sched_switch_args *ctx)
 {
@@ -64,6 +65,7 @@ int oncpu(struct sched_switch_args *ctx)
 	/* The size of stackmap and stackid_hmap should be the same */
 	key = bpf_get_stackid(ctx, &stackmap, 0);
 	if ((int)key >= 0) {
+		stack_id = key;
 		bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
 		stack_p = bpf_map_lookup_elem(&stack_amap, &key);
 		if (stack_p)
diff --git a/tools/testing/selftests/bpf/progs/stream.c b/tools/testing/selftests/bpf/progs/stream.c
new file mode 100644
index 000000000000..4a5bd852f10c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stream.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+
+struct arr_elem {
+	struct bpf_res_spin_lock lock;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct arr_elem);
+} arrmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARENA);
+	__uint(map_flags, BPF_F_MMAPABLE);
+	__uint(max_entries, 1); /* number of pages */
+} arena SEC(".maps");
+
+struct elem {
+	struct bpf_timer timer;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct elem);
+} array SEC(".maps");
+
+#define ENOSPC 28
+#define _STR "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
+
+int size;
+u64 fault_addr;
+void *arena_ptr;
+
+SEC("syscall")
+__success __retval(0)
+int stream_exhaust(void *ctx)
+{
+	/* Use global variable for loop convergence. */
+	size = 0;
+	bpf_repeat(BPF_MAX_LOOPS) {
+		if (bpf_stream_printk(BPF_STDOUT, _STR) == -ENOSPC && size == 99954)
+			return 0;
+		size += sizeof(_STR) - 1;
+	}
+	return 1;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__arch_s390x
+__success __retval(0)
+__stderr("ERROR: Timeout detected for may_goto instruction")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_cond_break(void *ctx)
+{
+	while (can_loop)
+		;
+	return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+__stderr("ERROR: AA or ABBA deadlock detected for bpf_res_spin_lock")
+__stderr("{{Attempted lock   = (0x[0-9a-fA-F]+)\n"
+"Total held locks = 1\n"
+"Held lock\\[ 0\\] = \\1}}")
+__stderr("...")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_deadlock(void *ctx)
+{
+	struct bpf_res_spin_lock *lock, *nlock;
+
+	lock = bpf_map_lookup_elem(&arrmap, &(int){0});
+	if (!lock)
+		return 1;
+	nlock = bpf_map_lookup_elem(&arrmap, &(int){0});
+	if (!nlock)
+		return 1;
+	if (bpf_res_spin_lock(lock))
+		return 1;
+	if (bpf_res_spin_lock(nlock)) {
+		bpf_res_spin_unlock(lock);
+		return 0;
+	}
+	bpf_res_spin_unlock(nlock);
+	bpf_res_spin_unlock(lock);
+	return 1;
+}
+
+SEC("syscall")
+__success __retval(0)
+int stream_syscall(void *ctx)
+{
+	bpf_stream_printk(BPF_STDOUT, "foo");
+	return 0;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_write_fault(void *ctx)
+{
+	struct bpf_arena *ptr = (void *)&arena;
+	u64 user_vm_start;
+
+	/* Prevent GCC bounds warning: casting &arena to struct bpf_arena *
+	 * triggers bounds checking since the map definition is smaller than struct
+	 * bpf_arena. barrier_var() makes the pointer opaque to GCC, preventing the
+	 * bounds analysis
+	 */
+	barrier_var(ptr);
+	user_vm_start = ptr->user_vm_start;
+	fault_addr = user_vm_start + 0x7fff;
+	bpf_addr_space_cast(user_vm_start, 0, 1);
+	asm volatile (
+		"r1 = %0;"
+		"r2 = 1;"
+		"*(u32 *)(r1 + 0x7fff) = r2;"
+		:
+		: "r" (user_vm_start)
+		: "r1", "r2"
+	);
+	return 0;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena READ access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_read_fault(void *ctx)
+{
+	struct bpf_arena *ptr = (void *)&arena;
+	u64 user_vm_start;
+
+	/* Prevent GCC bounds warning: casting &arena to struct bpf_arena *
+	 * triggers bounds checking since the map definition is smaller than struct
+	 * bpf_arena. barrier_var() makes the pointer opaque to GCC, preventing the
+	 * bounds analysis
+	 */
+	barrier_var(ptr);
+	user_vm_start = ptr->user_vm_start;
+	fault_addr = user_vm_start + 0x7fff;
+	bpf_addr_space_cast(user_vm_start, 0, 1);
+	asm volatile (
+		"r1 = %0;"
+		"r1 = *(u32 *)(r1 + 0x7fff);"
+		:
+		: "r" (user_vm_start)
+		: "r1"
+	);
+	return 0;
+}
+
+static __noinline void subprog(void)
+{
+	int __arena *addr = (int __arena *)0xdeadbeef;
+
+	arena_ptr = &arena;
+	*addr = 1;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_subprog_fault(void *ctx)
+{
+	subprog();
+	return 0;
+}
+
+static __noinline int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+	int __arena *addr = (int __arena *)0xdeadbeef;
+
+	arena_ptr = &arena;
+	*addr = 1;
+	return 0;
+}
+
+SEC("syscall")
+__arch_x86_64
+__arch_arm64
+__success __retval(0)
+__stderr("ERROR: Arena WRITE access at unmapped address 0x{{.*}}")
+__stderr("CPU: {{[0-9]+}} UID: 0 PID: {{[0-9]+}} Comm: {{.*}}")
+__stderr("Call trace:\n"
+"{{([a-zA-Z_][a-zA-Z0-9_]*\\+0x[0-9a-fA-F]+/0x[0-9a-fA-F]+\n"
+"|[ \t]+[^\n]+\n)*}}")
+int stream_arena_callback_fault(void *ctx)
+{
+	struct bpf_timer *arr_timer;
+
+	arr_timer = bpf_map_lookup_elem(&array, &(int){0});
+	if (!arr_timer)
+		return 0;
+	bpf_timer_init(arr_timer, &array, 1);
+	bpf_timer_set_callback(arr_timer, timer_cb);
+	bpf_timer_start(arr_timer, 0, 0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c
new file mode 100644
index 000000000000..3662515f0107
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stream_fail.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+SEC("syscall")
+__failure __msg("Possibly NULL pointer passed")
+int stream_vprintk_null_arg(void *ctx)
+{
+	bpf_stream_vprintk_impl(BPF_STDOUT, "", NULL, 0, NULL);
+	return 0;
+}
+
+SEC("syscall")
+__failure __msg("R3 type=scalar expected=")
+int stream_vprintk_scalar_arg(void *ctx)
+{
+	bpf_stream_vprintk_impl(BPF_STDOUT, "", (void *)46, 0, NULL);
+	return 0;
+}
+
+SEC("syscall")
+__failure __msg("arg#1 doesn't point to a const string")
+int stream_vprintk_string_arg(void *ctx)
+{
+	bpf_stream_vprintk_impl(BPF_STDOUT, ctx, NULL, 0, NULL);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
new file mode 100644
index 000000000000..826e6b6aff7e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure1.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <linux/limits.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char *user_ptr = (char *)1;
+char *invalid_kern_ptr = (char *)-1;
+
+/*
+ * When passing userspace pointers, the error code differs based on arch:
+ *   -ERANGE on arches with non-overlapping address spaces
+ *   -EFAULT on other arches
+ */
+#if defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_loongarch) || \
+    defined(__TARGET_ARCH_powerpc) || defined(__TARGET_ARCH_x86)
+#define USER_PTR_ERR -ERANGE
+#else
+#define USER_PTR_ERR -EFAULT
+#endif
+
+/*
+ * On s390, __get_kernel_nofault (used in string kfuncs) returns 0 for NULL and
+ * user_ptr (instead of causing an exception) so the below two groups of tests
+ * are not applicable.
+ */
+#ifndef __TARGET_ARCH_s390
+
+/* Passing NULL to string kfuncs (treated as a userspace ptr) */
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_null1(void *ctx) { return bpf_strcmp(NULL, "hello"); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strcmp_null2(void *ctx) { return bpf_strcmp("hello", NULL); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_null1(void *ctx) { return bpf_strcasecmp(NULL, "HELLO"); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strcasecmp_null2(void *ctx) { return bpf_strcasecmp("HELLO", NULL); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strchr_null(void *ctx) { return bpf_strchr(NULL, 'a'); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strchrnul_null(void *ctx) { return bpf_strchrnul(NULL, 'a'); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strnchr_null(void *ctx) { return bpf_strnchr(NULL, 1, 'a'); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strrchr_null(void *ctx) { return bpf_strrchr(NULL, 'a'); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strlen_null(void *ctx) { return bpf_strlen(NULL); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strnlen_null(void *ctx) { return bpf_strnlen(NULL, 1); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strspn_null1(void *ctx) { return bpf_strspn(NULL, "hello"); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strspn_null2(void *ctx) { return bpf_strspn("hello", NULL); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strcspn_null1(void *ctx) { return bpf_strcspn(NULL, "hello"); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strcspn_null2(void *ctx) { return bpf_strcspn("hello", NULL); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strstr_null1(void *ctx) { return bpf_strstr(NULL, "hello"); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strstr_null2(void *ctx) { return bpf_strstr("hello", NULL); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strcasestr_null1(void *ctx) { return bpf_strcasestr(NULL, "hello"); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strcasestr_null2(void *ctx) { return bpf_strcasestr("hello", NULL); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strnstr_null1(void *ctx) { return bpf_strnstr(NULL, "hello", 1); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strnstr_null2(void *ctx) { return bpf_strnstr("hello", NULL, 1); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strncasestr_null1(void *ctx) { return bpf_strncasestr(NULL, "hello", 1); }
+SEC("syscall")  __retval(USER_PTR_ERR)int test_strncasestr_null2(void *ctx) { return bpf_strncasestr("hello", NULL, 1); }
+
+/* Passing userspace ptr to string kfuncs */
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr1(void *ctx) { return bpf_strcmp(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcmp_user_ptr2(void *ctx) { return bpf_strcmp("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr1(void *ctx) { return bpf_strcasecmp(user_ptr, "HELLO"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasecmp_user_ptr2(void *ctx) { return bpf_strcasecmp("HELLO", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strchr_user_ptr(void *ctx) { return bpf_strchr(user_ptr, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strchrnul_user_ptr(void *ctx) { return bpf_strchrnul(user_ptr, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnchr_user_ptr(void *ctx) { return bpf_strnchr(user_ptr, 1, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strrchr_user_ptr(void *ctx) { return bpf_strrchr(user_ptr, 'a'); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strlen_user_ptr(void *ctx) { return bpf_strlen(user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnlen_user_ptr(void *ctx) { return bpf_strnlen(user_ptr, 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr1(void *ctx) { return bpf_strspn(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strspn_user_ptr2(void *ctx) { return bpf_strspn("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr1(void *ctx) { return bpf_strcspn(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcspn_user_ptr2(void *ctx) { return bpf_strcspn("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr1(void *ctx) { return bpf_strstr(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strstr_user_ptr2(void *ctx) { return bpf_strstr("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasestr_user_ptr1(void *ctx) { return bpf_strcasestr(user_ptr, "hello"); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strcasestr_user_ptr2(void *ctx) { return bpf_strcasestr("hello", user_ptr); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr1(void *ctx) { return bpf_strnstr(user_ptr, "hello", 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strnstr_user_ptr2(void *ctx) { return bpf_strnstr("hello", user_ptr, 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strncasestr_user_ptr1(void *ctx) { return bpf_strncasestr(user_ptr, "hello", 1); }
+SEC("syscall") __retval(USER_PTR_ERR) int test_strncasestr_user_ptr2(void *ctx) { return bpf_strncasestr("hello", user_ptr, 1); }
+
+#endif /* __TARGET_ARCH_s390 */
+
+/* Passing invalid kernel ptr to string kfuncs should always return -EFAULT */
+SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault1(void *ctx) { return bpf_strcmp(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strcmp_pagefault2(void *ctx) { return bpf_strcmp("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault1(void *ctx) { return bpf_strcasecmp(invalid_kern_ptr, "HELLO"); }
+SEC("syscall") __retval(-EFAULT) int test_strcasecmp_pagefault2(void *ctx) { return bpf_strcasecmp("HELLO", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strchr_pagefault(void *ctx) { return bpf_strchr(invalid_kern_ptr, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strchrnul_pagefault(void *ctx) { return bpf_strchrnul(invalid_kern_ptr, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strnchr_pagefault(void *ctx) { return bpf_strnchr(invalid_kern_ptr, 1, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strrchr_pagefault(void *ctx) { return bpf_strrchr(invalid_kern_ptr, 'a'); }
+SEC("syscall") __retval(-EFAULT) int test_strlen_pagefault(void *ctx) { return bpf_strlen(invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strnlen_pagefault(void *ctx) { return bpf_strnlen(invalid_kern_ptr, 1); }
+SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault1(void *ctx) { return bpf_strspn(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strspn_pagefault2(void *ctx) { return bpf_strspn("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault1(void *ctx) { return bpf_strcspn(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strcspn_pagefault2(void *ctx) { return bpf_strcspn("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault1(void *ctx) { return bpf_strstr(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strstr_pagefault2(void *ctx) { return bpf_strstr("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strcasestr_pagefault1(void *ctx) { return bpf_strcasestr(invalid_kern_ptr, "hello"); }
+SEC("syscall") __retval(-EFAULT) int test_strcasestr_pagefault2(void *ctx) { return bpf_strcasestr("hello", invalid_kern_ptr); }
+SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault1(void *ctx) { return bpf_strnstr(invalid_kern_ptr, "hello", 1); }
+SEC("syscall") __retval(-EFAULT) int test_strnstr_pagefault2(void *ctx) { return bpf_strnstr("hello", invalid_kern_ptr, 1); }
+SEC("syscall") __retval(-EFAULT) int test_strncasestr_pagefault1(void *ctx) { return bpf_strncasestr(invalid_kern_ptr, "hello", 1); }
+SEC("syscall") __retval(-EFAULT) int test_strncasestr_pagefault2(void *ctx) { return bpf_strncasestr("hello", invalid_kern_ptr, 1); }
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
new file mode 100644
index 000000000000..05e1da1f250f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_failure2.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <linux/limits.h>
+
+char long_str[XATTR_SIZE_MAX + 1];
+
+SEC("syscall") int test_strcmp_too_long(void *ctx) { return bpf_strcmp(long_str, long_str); }
+SEC("syscall") int test_strcasecmp_too_long(void *ctx) { return bpf_strcasecmp(long_str, long_str); }
+SEC("syscall") int test_strchr_too_long(void *ctx) { return bpf_strchr(long_str, 'b'); }
+SEC("syscall") int test_strchrnul_too_long(void *ctx) { return bpf_strchrnul(long_str, 'b'); }
+SEC("syscall") int test_strnchr_too_long(void *ctx) { return bpf_strnchr(long_str, sizeof(long_str), 'b'); }
+SEC("syscall") int test_strrchr_too_long(void *ctx) { return bpf_strrchr(long_str, 'b'); }
+SEC("syscall") int test_strlen_too_long(void *ctx) { return bpf_strlen(long_str); }
+SEC("syscall") int test_strnlen_too_long(void *ctx) { return bpf_strnlen(long_str, sizeof(long_str)); }
+SEC("syscall") int test_strspn_str_too_long(void *ctx) { return bpf_strspn(long_str, "a"); }
+SEC("syscall") int test_strspn_accept_too_long(void *ctx) { return bpf_strspn("b", long_str); }
+SEC("syscall") int test_strcspn_str_too_long(void *ctx) { return bpf_strcspn(long_str, "b"); }
+SEC("syscall") int test_strcspn_reject_too_long(void *ctx) { return bpf_strcspn("b", long_str); }
+SEC("syscall") int test_strstr_too_long(void *ctx) { return bpf_strstr(long_str, "hello"); }
+SEC("syscall") int test_strcasestr_too_long(void *ctx) { return bpf_strcasestr(long_str, "hello"); }
+SEC("syscall") int test_strnstr_too_long(void *ctx) { return bpf_strnstr(long_str, "hello", sizeof(long_str)); }
+SEC("syscall") int test_strncasestr_too_long(void *ctx) { return bpf_strncasestr(long_str, "hello", sizeof(long_str)); }
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/string_kfuncs_success.c b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
new file mode 100644
index 000000000000..a8513964516b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/string_kfuncs_success.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025 Red Hat, Inc.*/
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char str[] = "hello world";
+
+#define __test(retval) SEC("syscall") __success __retval(retval)
+
+/* Functional tests */
+__test(0) int test_strcmp_eq(void *ctx) { return bpf_strcmp(str, "hello world"); }
+__test(1) int test_strcmp_neq(void *ctx) { return bpf_strcmp(str, "hello"); }
+__test(0) int test_strcasecmp_eq1(void *ctx) { return bpf_strcasecmp(str, "hello world"); }
+__test(0) int test_strcasecmp_eq2(void *ctx) { return bpf_strcasecmp(str, "HELLO WORLD"); }
+__test(0) int test_strcasecmp_eq3(void *ctx) { return bpf_strcasecmp(str, "HELLO world"); }
+__test(1) int test_strcasecmp_neq1(void *ctx) { return bpf_strcasecmp(str, "hello"); }
+__test(1) int test_strcasecmp_neq2(void *ctx) { return bpf_strcasecmp(str, "HELLO"); }
+__test(1) int test_strchr_found(void *ctx) { return bpf_strchr(str, 'e'); }
+__test(11) int test_strchr_null(void *ctx) { return bpf_strchr(str, '\0'); }
+__test(-ENOENT) int test_strchr_notfound(void *ctx) { return bpf_strchr(str, 'x'); }
+__test(1) int test_strchrnul_found(void *ctx) { return bpf_strchrnul(str, 'e'); }
+__test(11) int test_strchrnul_notfound(void *ctx) { return bpf_strchrnul(str, 'x'); }
+__test(1) int test_strnchr_found(void *ctx) { return bpf_strnchr(str, 5, 'e'); }
+__test(11) int test_strnchr_null(void *ctx) { return bpf_strnchr(str, 12, '\0'); }
+__test(-ENOENT) int test_strnchr_notfound(void *ctx) { return bpf_strnchr(str, 5, 'w'); }
+__test(9) int test_strrchr_found(void *ctx) { return bpf_strrchr(str, 'l'); }
+__test(11) int test_strrchr_null(void *ctx) { return bpf_strrchr(str, '\0'); }
+__test(-ENOENT) int test_strrchr_notfound(void *ctx) { return bpf_strrchr(str, 'x'); }
+__test(11) int test_strlen(void *ctx) { return bpf_strlen(str); }
+__test(11) int test_strnlen(void *ctx) { return bpf_strnlen(str, 12); }
+__test(5) int test_strspn(void *ctx) { return bpf_strspn(str, "ehlo"); }
+__test(2) int test_strcspn(void *ctx) { return bpf_strcspn(str, "lo"); }
+__test(6) int test_strstr_found(void *ctx) { return bpf_strstr(str, "world"); }
+__test(6) int test_strcasestr_found(void *ctx) { return bpf_strcasestr(str, "woRLD"); }
+__test(-ENOENT) int test_strstr_notfound(void *ctx) { return bpf_strstr(str, "hi"); }
+__test(-ENOENT) int test_strcasestr_notfound(void *ctx) { return bpf_strcasestr(str, "hi"); }
+__test(0) int test_strstr_empty(void *ctx) { return bpf_strstr(str, ""); }
+__test(0) int test_strcasestr_empty(void *ctx) { return bpf_strcasestr(str, ""); }
+__test(0) int test_strnstr_found1(void *ctx) { return bpf_strnstr("", "", 0); }
+__test(0) int test_strnstr_found2(void *ctx) { return bpf_strnstr(str, "hello", 5); }
+__test(0) int test_strnstr_found3(void *ctx) { return bpf_strnstr(str, "hello", 6); }
+__test(-ENOENT) int test_strnstr_notfound1(void *ctx) { return bpf_strnstr(str, "hi", 10); }
+__test(-ENOENT) int test_strnstr_notfound2(void *ctx) { return bpf_strnstr(str, "hello", 4); }
+__test(-ENOENT) int test_strnstr_notfound3(void *ctx) { return bpf_strnstr("", "a", 0); }
+__test(0) int test_strnstr_empty(void *ctx) { return bpf_strnstr(str, "", 1); }
+__test(0) int test_strncasestr_found1(void *ctx) { return bpf_strncasestr("", "", 0); }
+__test(0) int test_strncasestr_found2(void *ctx) { return bpf_strncasestr(str, "heLLO", 5); }
+__test(0) int test_strncasestr_found3(void *ctx) { return bpf_strncasestr(str, "heLLO", 6); }
+__test(-ENOENT) int test_strncasestr_notfound1(void *ctx) { return bpf_strncasestr(str, "hi", 10); }
+__test(-ENOENT) int test_strncasestr_notfound2(void *ctx) { return bpf_strncasestr(str, "hello", 4); }
+__test(-ENOENT) int test_strncasestr_notfound3(void *ctx) { return bpf_strncasestr("", "a", 0); }
+__test(0) int test_strncasestr_empty(void *ctx) { return bpf_strncasestr(str, "", 1); }
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/strncmp_bench.c b/tools/testing/selftests/bpf/progs/strncmp_bench.c
index 18373a7df76e..f47bf88f8d2a 100644
--- a/tools/testing/selftests/bpf/progs/strncmp_bench.c
+++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c
@@ -35,7 +35,10 @@ static __always_inline int local_strncmp(const char *s1, unsigned int sz,
 SEC("tp/syscalls/sys_enter_getpgid")
 int strncmp_no_helper(void *ctx)
 {
-	if (local_strncmp(str, cmp_str_len + 1, target) < 0)
+	const char *target_str = target;
+
+	barrier_var(target_str);
+	if (local_strncmp(str, cmp_str_len + 1, target_str) < 0)
 		__sync_add_and_fetch(&hits, 1);
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index a5c74d31a244..6e1918deaf26 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -330,9 +330,9 @@ static void *calc_location(struct strobe_value_loc *loc, void *tls_base)
 	}
 	bpf_probe_read_user(&tls_ptr, sizeof(void *), dtv);
 	/* if pointer has (void *)-1 value, then TLS wasn't initialized yet */
-	return tls_ptr && tls_ptr != (void *)-1
-		? tls_ptr + tls_index.offset
-		: NULL;
+	if (!tls_ptr || tls_ptr == (void *)-1)
+		return NULL;
+	return tls_ptr + tls_index.offset;
 }
 
 #ifdef SUBPROGS
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_detach.c b/tools/testing/selftests/bpf/progs/struct_ops_detach.c
index d7fdcabe7d90..284a5b008e0c 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_detach.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_detach.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c b/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c
index 3c822103bd40..d8cc99f5c2e2 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c
new file mode 100644
index 000000000000..ad8bb546c9bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define bpf_kfunc_multi_st_ops_test_1(args) bpf_kfunc_multi_st_ops_test_1(args, st_ops_id)
+int st_ops_id;
+
+int test_pid;
+int test_err;
+
+#define MAP1_MAGIC 1234
+
+SEC("struct_ops")
+int BPF_PROG(test_1, struct st_ops_args *args)
+{
+	return MAP1_MAGIC;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(sys_enter, struct pt_regs *regs, long id)
+{
+	struct st_ops_args args = {};
+	struct task_struct *task;
+	int ret;
+
+	task = bpf_get_current_task_btf();
+	if (!test_pid || task->pid != test_pid)
+		return 0;
+
+	ret = bpf_kfunc_multi_st_ops_test_1(&args);
+	if (ret != MAP1_MAGIC)
+		test_err++;
+
+	return 0;
+}
+
+SEC("syscall")
+int syscall_prog(void *ctx)
+{
+	struct st_ops_args args = {};
+	int ret;
+
+	ret = bpf_kfunc_multi_st_ops_test_1(&args);
+	if (ret != MAP1_MAGIC)
+		test_err++;
+
+	return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map = {
+	.test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c
new file mode 100644
index 000000000000..cea1a2f4b62f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "../test_kmods/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define bpf_kfunc_multi_st_ops_test_1(args) bpf_kfunc_multi_st_ops_test_1(args, st_ops_id)
+int st_ops_id;
+
+int test_pid;
+int test_err;
+
+#define MAP2_MAGIC 4567
+
+SEC("struct_ops")
+int BPF_PROG(test_1, struct st_ops_args *args)
+{
+	return MAP2_MAGIC;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(sys_enter, struct pt_regs *regs, long id)
+{
+	struct st_ops_args args = {};
+	struct task_struct *task;
+	int ret;
+
+	task = bpf_get_current_task_btf();
+	if (!test_pid || task->pid != test_pid)
+		return 0;
+
+	ret = bpf_kfunc_multi_st_ops_test_1(&args);
+	if (ret != MAP2_MAGIC)
+		test_err++;
+
+	return 0;
+}
+
+SEC("syscall")
+int syscall_prog(void *ctx)
+{
+	struct st_ops_args args = {};
+	int ret;
+
+	ret = bpf_kfunc_multi_st_ops_test_1(&args);
+	if (ret != MAP2_MAGIC)
+		test_err++;
+
+	return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_multi_st_ops st_ops_map = {
+	.test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c
new file mode 100644
index 000000000000..2b98b7710816
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return.c
@@ -0,0 +1,30 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * allow a referenced kptr or a NULL pointer to be returned. A referenced kptr to task
+ * here is acquired automatically as the task argument is tagged with "__ref".
+ */
+SEC("struct_ops/test_return_ref_kptr")
+struct task_struct *BPF_PROG(kptr_return, int dummy,
+			     struct task_struct *task, struct cgroup *cgrp)
+{
+	if (dummy % 2) {
+		bpf_task_release(task);
+		return NULL;
+	}
+	return task;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+	.test_return_ref_kptr = (void *)kptr_return,
+};
+
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c
new file mode 100644
index 000000000000..caeea158ef69
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__invalid_scalar.c
@@ -0,0 +1,26 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a non-zero scalar value.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("At program exit the register R0 has smin=1 smax=1 should have been in [0, 0]")
+struct task_struct *BPF_PROG(kptr_return_fail__invalid_scalar, int dummy,
+			     struct task_struct *task, struct cgroup *cgrp)
+{
+	bpf_task_release(task);
+	return (struct task_struct *)1;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+	.test_return_ref_kptr = (void *)kptr_return_fail__invalid_scalar,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c
new file mode 100644
index 000000000000..b8b4f05c3d7f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__local_kptr.c
@@ -0,0 +1,34 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a local kptr.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("At program exit the register R0 is not a known value (ptr_or_null_)")
+struct task_struct *BPF_PROG(kptr_return_fail__local_kptr, int dummy,
+			     struct task_struct *task, struct cgroup *cgrp)
+{
+	struct task_struct *t;
+
+	bpf_task_release(task);
+
+	t = bpf_obj_new(typeof(*task));
+	if (!t)
+		return NULL;
+
+	return t;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+	.test_return_ref_kptr = (void *)kptr_return_fail__local_kptr,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c
new file mode 100644
index 000000000000..7ddeb28c2329
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__nonzero_offset.c
@@ -0,0 +1,25 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a modified referenced kptr.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("dereference of modified trusted_ptr_ ptr R0 off={{[0-9]+}} disallowed")
+struct task_struct *BPF_PROG(kptr_return_fail__nonzero_offset, int dummy,
+			     struct task_struct *task, struct cgroup *cgrp)
+{
+	return (struct task_struct *)&task->jobctl;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+	.test_return_ref_kptr = (void *)kptr_return_fail__nonzero_offset,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c
new file mode 100644
index 000000000000..6a2dd5367802
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_kptr_return_fail__wrong_type.c
@@ -0,0 +1,30 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This test struct_ops BPF programs returning referenced kptr. The verifier should
+ * reject programs returning a referenced kptr of the wrong type.
+ */
+SEC("struct_ops/test_return_ref_kptr")
+__failure __msg("At program exit the register R0 is not a known value (ptr_or_null_)")
+struct task_struct *BPF_PROG(kptr_return_fail__wrong_type, int dummy,
+			     struct task_struct *task, struct cgroup *cgrp)
+{
+	struct task_struct *ret;
+
+	ret = (struct task_struct *)bpf_cgroup_acquire(cgrp);
+	bpf_task_release(task);
+
+	return ret;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_kptr_return = {
+	.test_return_ref_kptr = (void *)kptr_return_fail__wrong_type,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c
index b450f72e744a..ccab3935aa42 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c
index 6283099ec383..8b5515f4f724 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c
index 4c56d4a9d9f4..71c420c3a5a6 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_module.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c
@@ -3,7 +3,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c
index 9efcc6e4d356..5b23ea817f1f 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c
@@ -3,7 +3,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c b/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c
index fa2021388485..5d0937fa07be 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
index 8ea57e5348ab..dbe646013811 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack.c
@@ -3,11 +3,11 @@
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
-#if defined(__TARGET_ARCH_x86)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
 bool skip __attribute((__section__(".data"))) = false;
 #else
 bool skip = true;
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
index 1f55ec4cee37..3d89ad7cbe2a 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_fail.c
@@ -3,11 +3,11 @@
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
-#if defined(__TARGET_ARCH_x86)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
 bool skip __attribute((__section__(".data"))) = false;
 #else
 bool skip = true;
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
index f2f300d50988..b1f6d7e5a8e5 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_private_stack_recur.c
@@ -3,11 +3,11 @@
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
-#if defined(__TARGET_ARCH_x86)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
 bool skip __attribute((__section__(".data"))) = false;
 #else
 bool skip = true;
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c
new file mode 100644
index 000000000000..9c0a65466356
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted.c
@@ -0,0 +1,31 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__attribute__((nomerge)) extern void bpf_task_release(struct task_struct *p) __ksym;
+
+/* This is a test BPF program that uses struct_ops to access a referenced
+ * kptr argument. This is a test for the verifier to ensure that it
+ * 1) recognizes the task as a referenced object (i.e., ref_obj_id > 0), and
+ * 2) the same reference can be acquired from multiple paths as long as it
+ *    has not been released.
+ */
+SEC("struct_ops/test_refcounted")
+int BPF_PROG(refcounted, int dummy, struct task_struct *task)
+{
+	if (dummy == 1)
+		bpf_task_release(task);
+	else
+		bpf_task_release(task);
+	return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_refcounted = {
+	.test_refcounted = (void *)refcounted,
+};
+
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c
new file mode 100644
index 000000000000..ae074aa62852
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__global_subprog.c
@@ -0,0 +1,39 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+extern void bpf_task_release(struct task_struct *p) __ksym;
+
+__noinline int subprog_release(__u64 *ctx __arg_ctx)
+{
+	struct task_struct *task = (struct task_struct *)ctx[1];
+	int dummy = (int)ctx[0];
+
+	bpf_task_release(task);
+
+	return dummy + 1;
+}
+
+/* Test that the verifier rejects a program that contains a global
+ * subprogram with referenced kptr arguments
+ */
+SEC("struct_ops/test_refcounted")
+__failure __log_level(2)
+__msg("Validating subprog_release() func#1...")
+__msg("invalid bpf_context access off=8. Reference may already be released")
+int refcounted_fail__global_subprog(unsigned long long *ctx)
+{
+	struct task_struct *task = (struct task_struct *)ctx[1];
+
+	bpf_task_release(task);
+
+	return subprog_release(ctx);
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_ref_acquire = {
+	.test_refcounted = (void *)refcounted_fail__global_subprog,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c
new file mode 100644
index 000000000000..e945b1a04294
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__ref_leak.c
@@ -0,0 +1,22 @@
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Test that the verifier rejects a program that acquires a referenced
+ * kptr through context without releasing the reference
+ */
+SEC("struct_ops/test_refcounted")
+__failure __msg("Unreleased reference id=1 alloc_insn=0")
+int BPF_PROG(refcounted_fail__ref_leak, int dummy,
+	     struct task_struct *task)
+{
+	return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_ref_acquire = {
+	.test_refcounted = (void *)refcounted_fail__ref_leak,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c
new file mode 100644
index 000000000000..3b125025a1f2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_refcounted_fail__tail_call.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../test_kmods/bpf_testmod.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} prog_array SEC(".maps");
+
+/* Test that the verifier rejects a program with referenced kptr arguments
+ * that tail call
+ */
+SEC("struct_ops/test_refcounted")
+__failure __msg("program with __ref argument cannot tail call")
+int refcounted_fail__tail_call(unsigned long long *ctx)
+{
+	struct task_struct *task = (struct task_struct *)ctx[1];
+
+	bpf_task_release(task);
+	bpf_tail_call(ctx, &prog_array, 0);
+
+	return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_ref_acquire = {
+	.test_refcounted = (void *)refcounted_fail__tail_call,
+};
+
diff --git a/tools/testing/selftests/bpf/progs/summarization.c b/tools/testing/selftests/bpf/progs/summarization.c
new file mode 100644
index 000000000000..f89effe82c9e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/summarization.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+__noinline
+long changes_pkt_data(struct __sk_buff *sk)
+{
+	return bpf_skb_pull_data(sk, 0);
+}
+
+__noinline __weak
+long does_not_change_pkt_data(struct __sk_buff *sk)
+{
+	return 0;
+}
+
+SEC("?tc")
+int main_changes_with_subprogs(struct __sk_buff *sk)
+{
+	changes_pkt_data(sk);
+	does_not_change_pkt_data(sk);
+	return 0;
+}
+
+SEC("?tc")
+int main_changes(struct __sk_buff *sk)
+{
+	bpf_skb_pull_data(sk, 0);
+	return 0;
+}
+
+SEC("?tc")
+int main_does_not_change(struct __sk_buff *sk)
+{
+	return 0;
+}
+
+__noinline
+long might_sleep(struct pt_regs *ctx __arg_ctx)
+{
+	int i;
+
+	bpf_copy_from_user(&i, sizeof(i), NULL);
+	return i;
+}
+
+__noinline __weak
+long does_not_sleep(struct pt_regs *ctx __arg_ctx)
+{
+	return 0;
+}
+
+SEC("?uprobe.s")
+int main_might_sleep_with_subprogs(struct pt_regs *ctx)
+{
+	might_sleep(ctx);
+	does_not_sleep(ctx);
+	return 0;
+}
+
+SEC("?uprobe.s")
+int main_might_sleep(struct pt_regs *ctx)
+{
+	int i;
+
+	bpf_copy_from_user(&i, sizeof(i), NULL);
+	return i;
+}
+
+SEC("?uprobe.s")
+int main_does_not_sleep(struct pt_regs *ctx)
+{
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/summarization_freplace.c b/tools/testing/selftests/bpf/progs/summarization_freplace.c
new file mode 100644
index 000000000000..935f00e0e9ea
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/summarization_freplace.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("?freplace")
+long changes_pkt_data(struct __sk_buff *sk)
+{
+	return bpf_skb_pull_data(sk, 0);
+}
+
+SEC("?freplace")
+long does_not_change_pkt_data(struct __sk_buff *sk)
+{
+	return 0;
+}
+
+SEC("?freplace")
+long might_sleep(struct pt_regs *ctx)
+{
+	int i;
+
+	bpf_copy_from_user(&i, sizeof(i), NULL);
+	return i;
+}
+
+SEC("?freplace")
+long does_not_sleep(struct pt_regs *ctx)
+{
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c
index 0f4dfb770c32..b698cc62a371 100644
--- a/tools/testing/selftests/bpf/progs/syscall.c
+++ b/tools/testing/selftests/bpf/progs/syscall.c
@@ -76,9 +76,9 @@ static int btf_load(void)
 			.magic = BTF_MAGIC,
 			.version = BTF_VERSION,
 			.hdr_len = sizeof(struct btf_header),
-			.type_len = sizeof(__u32) * 8,
-			.str_off = sizeof(__u32) * 8,
-			.str_len = sizeof(__u32),
+			.type_len = sizeof(raw_btf.types),
+			.str_off = offsetof(struct btf_blob, str) - offsetof(struct btf_blob, types),
+			.str_len = sizeof(raw_btf.str),
 		},
 		.types = {
 			/* long */
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
index 327ca395e860..d556b19413d7 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c
@@ -2,6 +2,7 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include "bpf_legacy.h"
+#include "bpf_test_utils.h"
 
 struct {
 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
@@ -24,6 +25,8 @@ int entry(struct __sk_buff *skb)
 {
 	int ret = 1;
 
+	clobber_regs_stack();
+
 	count++;
 	subprog_tail(skb);
 	subprog_tail(skb);
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
index 72fd0d577506..ae94c9c70ab7 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c
@@ -2,6 +2,7 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
+#include "bpf_test_utils.h"
 
 int classifier_0(struct __sk_buff *skb);
 int classifier_1(struct __sk_buff *skb);
@@ -60,6 +61,8 @@ int tailcall_bpf2bpf_hierarchy_2(struct __sk_buff *skb)
 {
 	int ret = 0;
 
+	clobber_regs_stack();
+
 	subprog_tail0(skb);
 	subprog_tail1(skb);
 
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
index a7fb91cb05b7..56b6b0099840 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c
@@ -2,6 +2,7 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
+#include "bpf_test_utils.h"
 
 int classifier_0(struct __sk_buff *skb);
 
@@ -53,6 +54,8 @@ int tailcall_bpf2bpf_hierarchy_3(struct __sk_buff *skb)
 {
 	int ret = 0;
 
+	clobber_regs_stack();
+
 	bpf_tail_call_static(skb, &jmp_table0, 0);
 
 	__sink(ret);
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
index c87f9ca982d3..5261395713cd 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c
@@ -4,6 +4,7 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include "bpf_test_utils.h"
 
 struct {
 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
@@ -24,6 +25,8 @@ int subprog_tail(void *ctx)
 SEC("fentry/dummy")
 int BPF_PROG(fentry, struct sk_buff *skb)
 {
+	clobber_regs_stack();
+
 	count++;
 	subprog_tail(ctx);
 	subprog_tail(ctx);
diff --git a/tools/testing/selftests/bpf/progs/task_local_data.bpf.h b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
new file mode 100644
index 000000000000..432fff2af844
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h
@@ -0,0 +1,237 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TASK_LOCAL_DATA_BPF_H
+#define __TASK_LOCAL_DATA_BPF_H
+
+/*
+ * Task local data is a library that facilitates sharing per-task data
+ * between user space and bpf programs.
+ *
+ *
+ * USAGE
+ *
+ * A TLD, an entry of data in task local data, first needs to be created by the
+ * user space. This is done by calling user space API, TLD_DEFINE_KEY() or
+ * tld_create_key(), with the name of the TLD and the size.
+ *
+ * TLD_DEFINE_KEY(prio, "priority", sizeof(int));
+ *
+ * or
+ *
+ * void func_call(...) {
+ *     tld_key_t prio, in_cs;
+ *
+ *     prio = tld_create_key("priority", sizeof(int));
+ *     in_cs = tld_create_key("in_critical_section", sizeof(bool));
+ *     ...
+ *
+ * A key associated with the TLD, which has an opaque type tld_key_t, will be
+ * initialized or returned. It can be used to get a pointer to the TLD in the
+ * user space by calling tld_get_data().
+ *
+ * In a bpf program, tld_object_init() first needs to be called to initialized a
+ * tld_object on the stack. Then, TLDs can be accessed by calling tld_get_data().
+ * The API will try to fetch the key by the name and use it to locate the data.
+ * A pointer to the TLD will be returned. It also caches the key in a task local
+ * storage map, tld_key_map, whose value type, struct tld_keys, must be defined
+ * by the developer.
+ *
+ * struct tld_keys {
+ *     tld_key_t prio;
+ *     tld_key_t in_cs;
+ * };
+ *
+ * SEC("struct_ops")
+ * void prog(struct task_struct task, ...)
+ * {
+ *     struct tld_object tld_obj;
+ *     int err, *p;
+ *
+ *     err = tld_object_init(task, &tld_obj);
+ *     if (err)
+ *         return;
+ *
+ *     p = tld_get_data(&tld_obj, prio, "priority", sizeof(int));
+ *     if (p)
+ *         // do something depending on *p
+ */
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+
+#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1))
+#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1)
+
+#define TLD_MAX_DATA_CNT (__PAGE_SIZE / sizeof(struct tld_metadata) - 1)
+
+#ifndef TLD_NAME_LEN
+#define TLD_NAME_LEN 62
+#endif
+
+#ifndef TLD_KEY_MAP_CREATE_RETRY
+#define TLD_KEY_MAP_CREATE_RETRY 10
+#endif
+
+typedef struct {
+	__s16 off;
+} tld_key_t;
+
+struct tld_metadata {
+	char name[TLD_NAME_LEN];
+	__u16 size;
+};
+
+struct tld_meta_u {
+	__u8 cnt;
+	__u16 size;
+	struct tld_metadata metadata[TLD_MAX_DATA_CNT];
+};
+
+struct tld_data_u {
+	__u64 start; /* offset of tld_data_u->data in a page */
+	char data[__PAGE_SIZE - sizeof(__u64)];
+};
+
+struct tld_map_value {
+	struct tld_data_u __uptr *data;
+	struct tld_meta_u __uptr *meta;
+};
+
+typedef struct tld_uptr_dummy {
+	struct tld_data_u data[0];
+	struct tld_meta_u meta[0];
+} *tld_uptr_dummy_t;
+
+struct tld_object {
+	struct tld_map_value *data_map;
+	struct tld_keys *key_map;
+	/*
+	 * Force the compiler to generate the actual definition of tld_meta_u
+	 * and tld_data_u in BTF. Without it, tld_meta_u and u_tld_data will
+	 * be BTF_KIND_FWD.
+	 */
+	tld_uptr_dummy_t dummy[0];
+};
+
+/*
+ * Map value of tld_key_map for caching keys. Must be defined by the developer.
+ * Members should be tld_key_t and passed to the 3rd argument of tld_fetch_key().
+ */
+struct tld_keys;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct tld_map_value);
+} tld_data_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct tld_keys);
+} tld_key_map SEC(".maps");
+
+/**
+ * tld_object_init() - Initialize a tld_object.
+ *
+ * @task: The task_struct of the target task
+ * @tld_obj: A pointer to a tld_object to be initialized
+ *
+ * Return 0 on success; -ENODATA if the user space did not initialize task local data
+ * for the current task through tld_get_data(); -ENOMEM if the creation of tld_key_map
+ * fails
+ */
+__attribute__((unused))
+static int tld_object_init(struct task_struct *task, struct tld_object *tld_obj)
+{
+	int i;
+
+	tld_obj->data_map = bpf_task_storage_get(&tld_data_map, task, 0, 0);
+	if (!tld_obj->data_map)
+		return -ENODATA;
+
+	bpf_for(i, 0, TLD_KEY_MAP_CREATE_RETRY) {
+		tld_obj->key_map = bpf_task_storage_get(&tld_key_map, task, 0,
+							BPF_LOCAL_STORAGE_GET_F_CREATE);
+		if (likely(tld_obj->key_map))
+			break;
+	}
+	if (!tld_obj->key_map)
+		return -ENOMEM;
+
+	return 0;
+}
+
+/*
+ * Return the offset of TLD if @name is found. Otherwise, return the current TLD count
+ * using the nonpositive range so that the next tld_get_data() can skip fetching key if
+ * no new TLD is added or start comparing name from the first newly added TLD.
+ */
+__attribute__((unused))
+static int __tld_fetch_key(struct tld_object *tld_obj, const char *name, int i_start)
+{
+	struct tld_metadata *metadata;
+	int i, cnt, start, off = 0;
+
+	if (!tld_obj->data_map || !tld_obj->data_map->data || !tld_obj->data_map->meta)
+		return 0;
+
+	start = tld_obj->data_map->data->start;
+	cnt = tld_obj->data_map->meta->cnt;
+	metadata = tld_obj->data_map->meta->metadata;
+
+	bpf_for(i, 0, cnt) {
+		if (i >= TLD_MAX_DATA_CNT)
+			break;
+
+		if (i >= i_start && !bpf_strncmp(metadata[i].name, TLD_NAME_LEN, name))
+			return start + off;
+
+		off += TLD_ROUND_UP(metadata[i].size, 8);
+	}
+
+	return -cnt;
+}
+
+/**
+ * tld_get_data() - Retrieve a pointer to the TLD associated with the name.
+ *
+ * @tld_obj: A pointer to a valid tld_object initialized by tld_object_init()
+ * @key: The cached key of the TLD in tld_key_map
+ * @name: The name of the key associated with a TLD
+ * @size: The size of the TLD. Must be a known constant value
+ *
+ * Return a pointer to the TLD associated with @name; NULL if not found or @size is too
+ * big. @key is used to cache the key if the TLD is found to speed up subsequent calls.
+ * It should be defined as an member of tld_keys of tld_key_t type by the developer.
+ */
+#define tld_get_data(tld_obj, key, name, size)						\
+	({										\
+		void *data = NULL, *_data = (tld_obj)->data_map->data;			\
+		long off = (tld_obj)->key_map->key.off;					\
+		int cnt;								\
+											\
+		if (likely(_data)) {							\
+			if (likely(off > 0)) {						\
+				barrier_var(off);					\
+				if (likely(off < __PAGE_SIZE - size))			\
+					data = _data + off;				\
+			} else {							\
+				cnt = -(off);						\
+				if (likely((tld_obj)->data_map->meta) &&		\
+				    cnt < (tld_obj)->data_map->meta->cnt) {		\
+					off = __tld_fetch_key(tld_obj, name, cnt);	\
+					(tld_obj)->key_map->key.off = off;		\
+											\
+					if (likely(off < __PAGE_SIZE - size)) {		\
+						barrier_var(off);			\
+						if (off > 0)				\
+							data = _data + off;		\
+					}						\
+				}							\
+			}								\
+		}									\
+		data;									\
+	})
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
index ea2dbb80f7b3..986829aaf73a 100644
--- a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
+++ b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
@@ -10,7 +10,7 @@ char _license[] SEC("license") = "GPL";
 #define EBUSY 16
 #endif
 
-extern bool CONFIG_PREEMPT __kconfig __weak;
+extern bool CONFIG_PREEMPTION __kconfig __weak;
 int nr_get_errs = 0;
 int nr_del_errs = 0;
 
@@ -29,7 +29,7 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
 	int ret, zero = 0;
 	int *value;
 
-	if (!CONFIG_PREEMPT)
+	if (!CONFIG_PREEMPTION)
 		return 0;
 
 	task = bpf_get_current_task_btf();
diff --git a/tools/testing/selftests/bpf/progs/task_work.c b/tools/testing/selftests/bpf/progs/task_work.c
new file mode 100644
index 000000000000..663a80990f8f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_work.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "errno.h"
+
+char _license[] SEC("license") = "GPL";
+
+const void *user_ptr = NULL;
+
+struct elem {
+	char data[128];
+	struct bpf_task_work tw;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct elem);
+} hmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct elem);
+} arrmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_LRU_HASH);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct elem);
+} lrumap SEC(".maps");
+
+static int process_work(struct bpf_map *map, void *key, void *value)
+{
+	struct elem *work = value;
+
+	bpf_copy_from_user_str(work->data, sizeof(work->data), (const void *)user_ptr, 0);
+	return 0;
+}
+
+int key = 0;
+
+SEC("perf_event")
+int oncpu_hash_map(struct pt_regs *args)
+{
+	struct elem empty_work = { .data = { 0 } };
+	struct elem *work;
+	struct task_struct *task;
+	int err;
+
+	task = bpf_get_current_task_btf();
+	err = bpf_map_update_elem(&hmap, &key, &empty_work, BPF_NOEXIST);
+	if (err)
+		return 0;
+	work = bpf_map_lookup_elem(&hmap, &key);
+	if (!work)
+		return 0;
+
+	bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL);
+	return 0;
+}
+
+SEC("perf_event")
+int oncpu_array_map(struct pt_regs *args)
+{
+	struct elem *work;
+	struct task_struct *task;
+
+	task = bpf_get_current_task_btf();
+	work = bpf_map_lookup_elem(&arrmap, &key);
+	if (!work)
+		return 0;
+	bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, process_work, NULL);
+	return 0;
+}
+
+SEC("perf_event")
+int oncpu_lru_map(struct pt_regs *args)
+{
+	struct elem empty_work = { .data = { 0 } };
+	struct elem *work;
+	struct task_struct *task;
+	int err;
+
+	task = bpf_get_current_task_btf();
+	work = bpf_map_lookup_elem(&lrumap, &key);
+	if (work)
+		return 0;
+	err = bpf_map_update_elem(&lrumap, &key, &empty_work, BPF_NOEXIST);
+	if (err)
+		return 0;
+	work = bpf_map_lookup_elem(&lrumap, &key);
+	if (!work || work->data[0])
+		return 0;
+	bpf_task_work_schedule_resume_impl(task, &work->tw, &lrumap, process_work, NULL);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_work_fail.c b/tools/testing/selftests/bpf/progs/task_work_fail.c
new file mode 100644
index 000000000000..1270953fd092
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_work_fail.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+const void *user_ptr = NULL;
+
+struct elem {
+	char data[128];
+	struct bpf_task_work tw;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct elem);
+} hmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct elem);
+} arrmap SEC(".maps");
+
+static int process_work(struct bpf_map *map, void *key, void *value)
+{
+	struct elem *work = value;
+
+	bpf_copy_from_user_str(work->data, sizeof(work->data), (const void *)user_ptr, 0);
+	return 0;
+}
+
+int key = 0;
+
+SEC("perf_event")
+__failure __msg("doesn't match map pointer in R3")
+int mismatch_map(struct pt_regs *args)
+{
+	struct elem *work;
+	struct task_struct *task;
+
+	task = bpf_get_current_task_btf();
+	work = bpf_map_lookup_elem(&arrmap, &key);
+	if (!work)
+		return 0;
+	bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL);
+	return 0;
+}
+
+SEC("perf_event")
+__failure __msg("arg#1 doesn't point to a map value")
+int no_map_task_work(struct pt_regs *args)
+{
+	struct task_struct *task;
+	struct bpf_task_work tw;
+
+	task = bpf_get_current_task_btf();
+	bpf_task_work_schedule_resume_impl(task, &tw, &hmap, process_work, NULL);
+	return 0;
+}
+
+SEC("perf_event")
+__failure __msg("Possibly NULL pointer passed to trusted arg1")
+int task_work_null(struct pt_regs *args)
+{
+	struct task_struct *task;
+
+	task = bpf_get_current_task_btf();
+	bpf_task_work_schedule_resume_impl(task, NULL, &hmap, process_work, NULL);
+	return 0;
+}
+
+SEC("perf_event")
+__failure __msg("Possibly NULL pointer passed to trusted arg2")
+int map_null(struct pt_regs *args)
+{
+	struct elem *work;
+	struct task_struct *task;
+
+	task = bpf_get_current_task_btf();
+	work = bpf_map_lookup_elem(&arrmap, &key);
+	if (!work)
+		return 0;
+	bpf_task_work_schedule_resume_impl(task, &work->tw, NULL, process_work, NULL);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_work_stress.c b/tools/testing/selftests/bpf/progs/task_work_stress.c
new file mode 100644
index 000000000000..55e555f7f41b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_work_stress.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <string.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+#define ENTRIES 128
+
+char _license[] SEC("license") = "GPL";
+
+__u64 callback_scheduled = 0;
+__u64 callback_success = 0;
+__u64 schedule_error = 0;
+__u64 delete_success = 0;
+
+struct elem {
+	__u32 count;
+	struct bpf_task_work tw;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__uint(max_entries, ENTRIES);
+	__type(key, int);
+	__type(value, struct elem);
+} hmap SEC(".maps");
+
+static int process_work(struct bpf_map *map, void *key, void *value)
+{
+	__sync_fetch_and_add(&callback_success, 1);
+	return 0;
+}
+
+SEC("syscall")
+int schedule_task_work(void *ctx)
+{
+	struct elem empty_work = {.count = 0};
+	struct elem *work;
+	int key = 0, err;
+
+	key = bpf_ktime_get_ns() % ENTRIES;
+	work = bpf_map_lookup_elem(&hmap, &key);
+	if (!work) {
+		bpf_map_update_elem(&hmap, &key, &empty_work, BPF_NOEXIST);
+		work = bpf_map_lookup_elem(&hmap, &key);
+		if (!work)
+			return 0;
+	}
+	err = bpf_task_work_schedule_signal_impl(bpf_get_current_task_btf(), &work->tw, &hmap,
+						 process_work, NULL);
+	if (err)
+		__sync_fetch_and_add(&schedule_error, 1);
+	else
+		__sync_fetch_and_add(&callback_scheduled, 1);
+	return 0;
+}
+
+SEC("syscall")
+int delete_task_work(void *ctx)
+{
+	int key = 0, err;
+
+	key = bpf_get_prandom_u32() % ENTRIES;
+	err = bpf_map_delete_elem(&hmap, &key);
+	if (!err)
+		__sync_fetch_and_add(&delete_success, 1);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c
index d1a57f7d09bd..fe6249d99b31 100644
--- a/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/tc_bpf2bpf.c
@@ -11,6 +11,8 @@ int subprog_tc(struct __sk_buff *skb)
 
 	__sink(skb);
 	__sink(ret);
+	/* let verifier know that 'subprog_tc' can change pointers to skb->data */
+	bpf_skb_change_proto(skb, 0, 0);
 	return ret;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
index a58b5194fc89..022291f21dfb 100644
--- a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
@@ -8,8 +8,6 @@ char _license[] SEC("license") = "GPL";
 
 #define USEC_PER_SEC 1000000UL
 
-#define min(a, b) ((a) < (b) ? (a) : (b))
-
 static unsigned int tcp_left_out(const struct tcp_sock *tp)
 {
 	return tp->sacked_out + tp->lost_out;
diff --git a/tools/testing/selftests/bpf/progs/test_btf_ext.c b/tools/testing/selftests/bpf/progs/test_btf_ext.c
new file mode 100644
index 000000000000..cdf20331db04
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_ext.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2025 Meta Platforms Inc. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__noinline static void f0(void)
+{
+	__u64 a = 1;
+
+	__sink(a);
+}
+
+SEC("xdp")
+__u64 global_func(struct xdp_md *xdp)
+{
+	f0();
+	return XDP_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
index 44628865fe1d..4fee0fdc7607 100644
--- a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
+++ b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
@@ -51,13 +51,13 @@ out:
 }
 
 SEC("lsm/bpf")
-int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
 {
 	return bpf_link_create_verify(cmd);
 }
 
 SEC("lsm.s/bpf")
-int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
 {
 	return bpf_link_create_verify(cmd);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c
index 2ec1de11a3ae..7b6b2b342c1d 100644
--- a/tools/testing/selftests/bpf/progs/test_check_mtu.c
+++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c
@@ -7,6 +7,7 @@
 
 #include <stddef.h>
 #include <stdint.h>
+#include <errno.h>
 
 char _license[] SEC("license") = "GPL";
 
@@ -288,3 +289,14 @@ int tc_input_len_exceed(struct __sk_buff *ctx)
 	global_bpf_mtu_xdp = mtu_len;
 	return retval;
 }
+
+SEC("tc")
+int tc_chk_segs_flag(struct __sk_buff *ctx)
+{
+	__u32 mtu_len = 0;
+	int err;
+
+	err = bpf_check_mtu(ctx, GLOBAL_USER_IFINDEX, &mtu_len, 0, BPF_MTU_CHK_SEGS);
+
+	return err == -EINVAL ? BPF_OK : BPF_DROP;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index 683c8aaa63da..26a53e54b8fa 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -15,13 +15,14 @@
 #include <linux/ipv6.h>
 #include <linux/pkt_cls.h>
 #include <linux/tcp.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
 #include "bpf_compiler.h"
 #include "test_cls_redirect.h"
+#include "bpf_misc.h"
 
 #pragma GCC diagnostic ignored "-Waddress-of-packed-member"
 
@@ -31,9 +32,6 @@
 #define INLINING __always_inline
 #endif
 
-#define offsetofend(TYPE, MEMBER) \
-	(offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
-
 #define IP_OFFSET_MASK (0x1FFF)
 #define IP_MF (0x2000)
 
@@ -129,7 +127,7 @@ typedef uint8_t *net_ptr __attribute__((align_value(8)));
 typedef struct buf {
 	struct __sk_buff *skb;
 	net_ptr head;
-	/* NB: tail musn't have alignment other than 1, otherwise
+	/* NB: tail mustn't have alignment other than 1, otherwise
 	* LLVM will go and eliminate code, e.g. when checking packet lengths.
 	*/
 	uint8_t *const tail;
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.h b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
index 233b089d1fba..eb55cb8a3dbd 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.h
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
@@ -10,7 +10,7 @@
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 
 /* offsetof() is used in static asserts, and the libbpf-redefined CO-RE
  * friendly version breaks compilation for older clang versions <= 15
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
index 464515b824b9..dfd4a2710391 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
@@ -15,7 +15,7 @@
 #include <linux/ipv6.h>
 #include <linux/pkt_cls.h>
 #include <linux/tcp.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
@@ -494,7 +494,7 @@ static ret_t get_next_hop(struct bpf_dynptr *dynptr, __u64 *offset, encap_header
 
 	*offset += sizeof(*next_hop);
 
-	/* Skip the remainig next hops (may be zero). */
+	/* Skip the remaining next hops (may be zero). */
 	return skip_next_hops(offset, encap->unigue.hop_count - encap->unigue.next_hop - 1);
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
index 51b3f79df523..448403634eea 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c
@@ -15,6 +15,7 @@ struct {
 
 struct core_reloc_arrays_output {
 	int a2;
+	int a3;
 	char b123;
 	int c1c;
 	int d00d;
@@ -41,6 +42,7 @@ int test_core_arrays(void *ctx)
 {
 	struct core_reloc_arrays *in = (void *)&data.in;
 	struct core_reloc_arrays_output *out = (void *)&data.out;
+	int *a;
 
 	if (CORE_READ(&out->a2, &in->a[2]))
 		return 1;
@@ -53,6 +55,9 @@ int test_core_arrays(void *ctx)
 	if (CORE_READ(&out->f01c, &in->f[0][1].c))
 		return 1;
 
+	a = __builtin_preserve_access_index(({ in->a; }));
+	out->a3 = a[0] + a[1] + a[2] + a[3];
+
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c
index 6afa834756e9..fac33a14f200 100644
--- a/tools/testing/selftests/bpf/progs/test_fill_link_info.c
+++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c
@@ -6,13 +6,20 @@
 #include <stdbool.h>
 
 extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak;
+extern bool CONFIG_PPC_FTRACE_OUT_OF_LINE __kconfig __weak;
+extern bool CONFIG_KPROBES_ON_FTRACE __kconfig __weak;
+extern bool CONFIG_PPC64 __kconfig __weak;
 
-/* This function is here to have CONFIG_X86_KERNEL_IBT
- * used and added to object BTF.
+/* This function is here to have CONFIG_X86_KERNEL_IBT,
+ * CONFIG_PPC_FTRACE_OUT_OF_LINE, CONFIG_KPROBES_ON_FTRACE,
+ * CONFIG_PPC6 used and added to object BTF.
  */
 int unused(void)
 {
-	return CONFIG_X86_KERNEL_IBT ? 0 : 1;
+	return CONFIG_X86_KERNEL_IBT ||
+			CONFIG_PPC_FTRACE_OUT_OF_LINE ||
+			CONFIG_KPROBES_ON_FTRACE ||
+			CONFIG_PPC64 ? 0 : 1;
 }
 
 SEC("kprobe")
diff --git a/tools/testing/selftests/bpf/progs/test_get_xattr.c b/tools/testing/selftests/bpf/progs/test_get_xattr.c
index 66e737720f7c..54305f4c9f2d 100644
--- a/tools/testing/selftests/bpf/progs/test_get_xattr.c
+++ b/tools/testing/selftests/bpf/progs/test_get_xattr.c
@@ -6,6 +6,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_kfuncs.h"
+#include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -17,12 +18,23 @@ static const char expected_value[] = "hello";
 char value1[32];
 char value2[32];
 
+/* Matches caller of test_get_xattr() in prog_tests/fs_kfuncs.c */
+static const char xattr_names[][64] = {
+	/* The following work. */
+	"user.kfuncs",
+	"security.bpf.xxx",
+
+	/* The following do not work. */
+	"security.bpf",
+	"security.selinux"
+};
+
 SEC("lsm.s/file_open")
 int BPF_PROG(test_file_open, struct file *f)
 {
 	struct bpf_dynptr value_ptr;
 	__u32 pid;
-	int ret;
+	int ret, i;
 
 	pid = bpf_get_current_pid_tgid() >> 32;
 	if (pid != monitored_pid)
@@ -30,7 +42,11 @@ int BPF_PROG(test_file_open, struct file *f)
 
 	bpf_dynptr_from_mem(value1, sizeof(value1), 0, &value_ptr);
 
-	ret = bpf_get_file_xattr(f, "user.kfuncs", &value_ptr);
+	for (i = 0; i < ARRAY_SIZE(xattr_names); i++) {
+		ret = bpf_get_file_xattr(f, xattr_names[i], &value_ptr);
+		if (ret == sizeof(expected_value))
+			break;
+	}
 	if (ret != sizeof(expected_value))
 		return 0;
 	if (bpf_strncmp(value1, ret, expected_value))
@@ -44,7 +60,7 @@ int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name)
 {
 	struct bpf_dynptr value_ptr;
 	__u32 pid;
-	int ret;
+	int ret, i;
 
 	pid = bpf_get_current_pid_tgid() >> 32;
 	if (pid != monitored_pid)
@@ -52,7 +68,11 @@ int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name)
 
 	bpf_dynptr_from_mem(value2, sizeof(value2), 0, &value_ptr);
 
-	ret = bpf_get_dentry_xattr(dentry, "user.kfuncs", &value_ptr);
+	for (i = 0; i < ARRAY_SIZE(xattr_names); i++) {
+		ret = bpf_get_dentry_xattr(dentry, xattr_names[i], &value_ptr);
+		if (ret == sizeof(expected_value))
+			break;
+	}
 	if (ret != sizeof(expected_value))
 		return 0;
 	if (bpf_strncmp(value2, ret, expected_value))
diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c
index 5da001ca57a5..09d027bd3ea8 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func10.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func10.c
@@ -26,7 +26,7 @@ __noinline int foo(const struct Big *big)
 }
 
 SEC("cgroup_skb/ingress")
-__failure __msg("invalid indirect access to stack")
+__failure __msg("invalid read from stack")
 int global_func10(struct __sk_buff *skb)
 {
 	const struct Small small = {.x = skb->len };
diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
index a3f220ba7025..ee65bad0436d 100644
--- a/tools/testing/selftests/bpf/progs/test_global_map_resize.c
+++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
@@ -32,6 +32,16 @@ int my_int_last SEC(".data.array_not_last");
 
 int percpu_arr[1] SEC(".data.percpu_arr");
 
+/* at least one extern is included, to ensure that a specific
+ * regression is tested whereby resizing resulted in a free-after-use
+ * bug after type information is invalidated by the resize operation.
+ *
+ * There isn't a particularly good API to test for this specific condition,
+ * but by having externs for the resizing tests it will cover this path.
+ */
+extern int LINUX_KERNEL_VERSION __kconfig;
+long version_sink;
+
 SEC("tp/syscalls/sys_enter_getpid")
 int bss_array_sum(void *ctx)
 {
@@ -44,6 +54,9 @@ int bss_array_sum(void *ctx)
 	for (size_t i = 0; i < bss_array_len; ++i)
 		sum += array[i];
 
+	/* see above; ensure this is not optimized out */
+	version_sink = LINUX_KERNEL_VERSION;
+
 	return 0;
 }
 
@@ -59,6 +72,9 @@ int data_array_sum(void *ctx)
 	for (size_t i = 0; i < data_array_len; ++i)
 		sum += my_array[i];
 
+	/* see above; ensure this is not optimized out */
+	version_sink = LINUX_KERNEL_VERSION;
+
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_kernel_flag.c b/tools/testing/selftests/bpf/progs/test_kernel_flag.c
new file mode 100644
index 000000000000..b45fab3be352
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_kernel_flag.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2025 Microsoft Corporation
+ *
+ * Author: Blaise Boscaccy <bboscaccy@linux.microsoft.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_tid;
+
+SEC("lsm.s/bpf")
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
+{
+	__u32 tid;
+
+	tid = bpf_get_current_pid_tgid() & 0xFFFFFFFF;
+	if (!kernel || tid != monitored_tid)
+		return 0;
+	else
+		return -EINVAL;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
index e68667aec6a6..061befb004c2 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@@ -36,7 +36,7 @@ char _license[] SEC("license") = "GPL";
 
 SEC("?lsm.s/bpf")
 __failure __msg("cannot pass in dynptr at an offset=-8")
-int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
 {
 	unsigned long val;
 
@@ -45,8 +45,8 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
 }
 
 SEC("?lsm.s/bpf")
-__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr")
-int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
+__failure __msg("arg#0 expected pointer to stack or const struct bpf_dynptr")
+int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
 {
 	unsigned long val = 0;
 
@@ -55,7 +55,7 @@ int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
 }
 
 SEC("lsm.s/bpf")
-int BPF_PROG(dynptr_data_null, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(dynptr_data_null, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
 {
 	struct bpf_key *trusted_keyring;
 	struct bpf_dynptr ptr;
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
index 7ac7e1de34d8..0ad1bf1ede8d 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
@@ -4,7 +4,7 @@
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
 #include "bpf_kfuncs.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 SEC("tc")
 int kfunc_dynptr_nullable_test1(struct __sk_buff *skb)
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_key.c b/tools/testing/selftests/bpf/progs/test_lookup_key.c
index c73776990ae3..1f7e1e59b073 100644
--- a/tools/testing/selftests/bpf/progs/test_lookup_key.c
+++ b/tools/testing/selftests/bpf/progs/test_lookup_key.c
@@ -14,16 +14,16 @@
 char _license[] SEC("license") = "GPL";
 
 __u32 monitored_pid;
-__u32 key_serial;
+__s32 key_serial;
 __u32 key_id;
 __u64 flags;
 
-extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
 extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
 extern void bpf_key_put(struct bpf_key *key) __ksym;
 
 SEC("lsm.s/bpf")
-int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
 {
 	struct bpf_key *bkey;
 	__u32 pid;
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index cc1a012d038f..03d7f89787a1 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -5,7 +5,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 #include <bpf/bpf_core_read.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 __u32 raw_tp_read_sz = 0;
 
@@ -19,7 +19,7 @@ int BPF_PROG(handle_raw_tp,
 
 __u32 raw_tp_bare_write_sz = 0;
 
-SEC("raw_tp/bpf_testmod_test_write_bare")
+SEC("raw_tp/bpf_testmod_test_write_bare_tp")
 int BPF_PROG(handle_raw_tp_bare,
 	     struct task_struct *task, struct bpf_testmod_test_write_ctx *write_ctx)
 {
@@ -31,7 +31,7 @@ int raw_tp_writable_bare_in_val = 0;
 int raw_tp_writable_bare_early_ret = 0;
 int raw_tp_writable_bare_out_val = 0;
 
-SEC("raw_tp.w/bpf_testmod_test_writable_bare")
+SEC("raw_tp.w/bpf_testmod_test_writable_bare_tp")
 int BPF_PROG(handle_raw_tp_writable_bare,
 	     struct bpf_testmod_test_writable_ctx *writable)
 {
@@ -117,7 +117,7 @@ int BPF_PROG(handle_fexit_ret, int arg, struct file *ret)
 
 	bpf_probe_read_kernel(&buf, 8, ret);
 	bpf_probe_read_kernel(&buf, 8, (char *)ret + 256);
-	*(volatile long long *)ret;
+	*(volatile int *)ret;
 	*(volatile int *)&ret->f_mode;
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c
index abb7344b531f..5edf3cdc213d 100644
--- a/tools/testing/selftests/bpf/progs/test_overhead.c
+++ b/tools/testing/selftests/bpf/progs/test_overhead.c
@@ -1,9 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
-#include <stdbool.h>
-#include <stddef.h>
-#include <linux/bpf.h>
-#include <linux/ptrace.h>
+#include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
diff --git a/tools/testing/selftests/bpf/progs/test_perf_branches.c b/tools/testing/selftests/bpf/progs/test_perf_branches.c
index a1ccc831c882..05ac9410cd68 100644
--- a/tools/testing/selftests/bpf/progs/test_perf_branches.c
+++ b/tools/testing/selftests/bpf/progs/test_perf_branches.c
@@ -8,6 +8,7 @@
 #include <bpf/bpf_tracing.h>
 
 int valid = 0;
+int run_cnt = 0;
 int required_size_out = 0;
 int written_stack_out = 0;
 int written_global_out = 0;
@@ -24,6 +25,8 @@ int perf_branches(void *ctx)
 	__u64 entries[4 * 3] = {0};
 	int required_size, written_stack, written_global;
 
+	++run_cnt;
+
 	/* write to stack */
 	written_stack = bpf_read_branch_records(ctx, entries, sizeof(entries), 0);
 	/* ignore spurious events */
diff --git a/tools/testing/selftests/bpf/progs/test_pinning_devmap.c b/tools/testing/selftests/bpf/progs/test_pinning_devmap.c
new file mode 100644
index 000000000000..c855f8f87eff
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pinning_devmap.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+	__uint(pinning, LIBBPF_PIN_BY_NAME);
+} pinmap1 SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, __u32);
+	__uint(pinning, LIBBPF_PIN_BY_NAME);
+} pinmap2 SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/test_pinning_htab.c b/tools/testing/selftests/bpf/progs/test_pinning_htab.c
new file mode 100644
index 000000000000..ae227930c73c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pinning_htab.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct timer_val {
+	struct bpf_timer timer;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, __u32);
+	__type(value, struct timer_val);
+	__uint(max_entries, 1);
+} timer_prealloc SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, __u32);
+	__type(value, struct timer_val);
+	__uint(max_entries, 1);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+} timer_no_prealloc SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
index 2fdc44e76624..89b0cd5a3e06 100644
--- a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
+++ b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
@@ -7,7 +7,7 @@
 char tp_name[128];
 
 SEC("lsm.s/bpf")
-int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
 {
 	switch (cmd) {
 	case BPF_RAW_TRACEPOINT_OPEN:
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c b/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c
new file mode 100644
index 000000000000..ff4aa67ddacc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_overwrite.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(map_flags, BPF_F_RB_OVERWRITE);
+} ringbuf SEC(".maps");
+
+int pid;
+
+const volatile unsigned long LEN1;
+const volatile unsigned long LEN2;
+const volatile unsigned long LEN3;
+const volatile unsigned long LEN4;
+const volatile unsigned long LEN5;
+
+long reserve1_fail = 0;
+long reserve2_fail = 0;
+long reserve3_fail = 0;
+long reserve4_fail = 0;
+long reserve5_fail = 0;
+
+unsigned long avail_data = 0;
+unsigned long ring_size = 0;
+unsigned long cons_pos = 0;
+unsigned long prod_pos = 0;
+unsigned long over_pos = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_overwrite_ringbuf(void *ctx)
+{
+	char *rec1, *rec2, *rec3, *rec4, *rec5;
+	int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (cur_pid != pid)
+		return 0;
+
+	rec1 = bpf_ringbuf_reserve(&ringbuf, LEN1, 0);
+	if (!rec1) {
+		reserve1_fail = 1;
+		return 0;
+	}
+
+	rec2 = bpf_ringbuf_reserve(&ringbuf, LEN2, 0);
+	if (!rec2) {
+		bpf_ringbuf_discard(rec1, 0);
+		reserve2_fail = 1;
+		return 0;
+	}
+
+	rec3 = bpf_ringbuf_reserve(&ringbuf, LEN3, 0);
+	/* expect failure */
+	if (!rec3) {
+		reserve3_fail = 1;
+	} else {
+		bpf_ringbuf_discard(rec1, 0);
+		bpf_ringbuf_discard(rec2, 0);
+		bpf_ringbuf_discard(rec3, 0);
+		return 0;
+	}
+
+	rec4 = bpf_ringbuf_reserve(&ringbuf, LEN4, 0);
+	if (!rec4) {
+		reserve4_fail = 1;
+		bpf_ringbuf_discard(rec1, 0);
+		bpf_ringbuf_discard(rec2, 0);
+		return 0;
+	}
+
+	bpf_ringbuf_submit(rec1, 0);
+	bpf_ringbuf_submit(rec2, 0);
+	bpf_ringbuf_submit(rec4, 0);
+
+	rec5 = bpf_ringbuf_reserve(&ringbuf, LEN5, 0);
+	if (!rec5) {
+		reserve5_fail = 1;
+		return 0;
+	}
+
+	for (int i = 0; i < LEN3; i++)
+		rec5[i] = 0xdd;
+
+	bpf_ringbuf_submit(rec5, 0);
+
+	ring_size = bpf_ringbuf_query(&ringbuf, BPF_RB_RING_SIZE);
+	avail_data = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
+	cons_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_CONS_POS);
+	prod_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_PROD_POS);
+	over_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_OVERWRITE_POS);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
index 350513c0e4c9..f063a0013f85 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
@@ -26,11 +26,11 @@ int test_ringbuf_write(void *ctx)
 	if (cur_pid != pid)
 		return 0;
 
-	sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
+	sample1 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0);
 	if (!sample1)
 		return 0;
 	/* first one can pass */
-	sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
+	sample2 = bpf_ringbuf_reserve(&ringbuf, 0x30000, 0);
 	if (!sample2) {
 		bpf_ringbuf_discard(sample1, 0);
 		__sync_fetch_and_add(&discarded, 1);
diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
index 5eb25c6ad75b..a5be3267dbb0 100644
--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2018 Facebook */
 
-#include <stdlib.h>
 #include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
diff --git a/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c b/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c
new file mode 100644
index 000000000000..6a612cf168d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_set_remove_xattr.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_pid;
+
+const char xattr_foo[] = "security.bpf.foo";
+const char xattr_bar[] = "security.bpf.bar";
+static const char xattr_selinux[] = "security.selinux";
+char value_bar[] = "world";
+char read_value[32];
+
+bool set_security_bpf_bar_success;
+bool remove_security_bpf_bar_success;
+bool set_security_selinux_fail;
+bool remove_security_selinux_fail;
+
+char name_buf[32];
+
+static inline bool name_match_foo(const char *name)
+{
+	bpf_probe_read_kernel(name_buf, sizeof(name_buf), name);
+
+	return !bpf_strncmp(name_buf, sizeof(xattr_foo), xattr_foo);
+}
+
+/* Test bpf_set_dentry_xattr and bpf_remove_dentry_xattr */
+SEC("lsm.s/inode_getxattr")
+int BPF_PROG(test_inode_getxattr, struct dentry *dentry, char *name)
+{
+	struct bpf_dynptr value_ptr;
+	__u32 pid;
+	int ret;
+
+	pid = bpf_get_current_pid_tgid() >> 32;
+	if (pid != monitored_pid)
+		return 0;
+
+	/* Only do the following for security.bpf.foo */
+	if (!name_match_foo(name))
+		return 0;
+
+	bpf_dynptr_from_mem(read_value, sizeof(read_value), 0, &value_ptr);
+
+	/* read security.bpf.bar */
+	ret = bpf_get_dentry_xattr(dentry, xattr_bar, &value_ptr);
+
+	if (ret < 0) {
+		/* If security.bpf.bar doesn't exist, set it */
+		bpf_dynptr_from_mem(value_bar, sizeof(value_bar), 0, &value_ptr);
+
+		ret = bpf_set_dentry_xattr(dentry, xattr_bar, &value_ptr, 0);
+		if (!ret)
+			set_security_bpf_bar_success = true;
+		ret = bpf_set_dentry_xattr(dentry, xattr_selinux, &value_ptr, 0);
+		if (ret)
+			set_security_selinux_fail = true;
+	} else {
+		/* If security.bpf.bar exists, remove it */
+		ret = bpf_remove_dentry_xattr(dentry, xattr_bar);
+		if (!ret)
+			remove_security_bpf_bar_success = true;
+
+		ret = bpf_remove_dentry_xattr(dentry, xattr_selinux);
+		if (ret)
+			remove_security_selinux_fail = true;
+	}
+
+	return 0;
+}
+
+bool locked_set_security_bpf_bar_success;
+bool locked_remove_security_bpf_bar_success;
+bool locked_set_security_selinux_fail;
+bool locked_remove_security_selinux_fail;
+
+/* Test bpf_set_dentry_xattr_locked and bpf_remove_dentry_xattr_locked.
+ * It not necessary to differentiate the _locked version and the
+ * not-_locked version in the BPF program. The verifier will fix them up
+ * properly.
+ */
+SEC("lsm.s/inode_setxattr")
+int BPF_PROG(test_inode_setxattr, struct mnt_idmap *idmap,
+	     struct dentry *dentry, const char *name,
+	     const void *value, size_t size, int flags)
+{
+	struct bpf_dynptr value_ptr;
+	__u32 pid;
+	int ret;
+
+	pid = bpf_get_current_pid_tgid() >> 32;
+	if (pid != monitored_pid)
+		return 0;
+
+	/* Only do the following for security.bpf.foo */
+	if (!name_match_foo(name))
+		return 0;
+
+	bpf_dynptr_from_mem(read_value, sizeof(read_value), 0, &value_ptr);
+
+	/* read security.bpf.bar */
+	ret = bpf_get_dentry_xattr(dentry, xattr_bar, &value_ptr);
+
+	if (ret < 0) {
+		/* If security.bpf.bar doesn't exist, set it */
+		bpf_dynptr_from_mem(value_bar, sizeof(value_bar), 0, &value_ptr);
+
+		ret = bpf_set_dentry_xattr(dentry, xattr_bar, &value_ptr, 0);
+		if (!ret)
+			locked_set_security_bpf_bar_success = true;
+		ret = bpf_set_dentry_xattr(dentry, xattr_selinux, &value_ptr, 0);
+		if (ret)
+			locked_set_security_selinux_fail = true;
+	} else {
+		/* If security.bpf.bar exists, remove it */
+		ret = bpf_remove_dentry_xattr(dentry, xattr_bar);
+		if (!ret)
+			locked_remove_security_bpf_bar_success = true;
+
+		ret = bpf_remove_dentry_xattr(dentry, xattr_selinux);
+		if (ret)
+			locked_remove_security_selinux_fail = true;
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
index 8ef6b39335b6..34b30e2603f0 100644
--- a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
+++ b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
@@ -40,7 +40,7 @@ char digest[MAGIC_SIZE + SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE];
 __u32 monitored_pid;
 char sig[MAX_SIG_SIZE];
 __u32 sig_size;
-__u32 user_keyring_serial;
+__s32 user_keyring_serial;
 
 SEC("lsm.s/file_open")
 int BPF_PROG(test_file_open, struct file *f)
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
new file mode 100644
index 000000000000..1c7941a4ad00
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_change_tail.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 ByteDance */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+#endif
+#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2)
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sock_map_rx SEC(".maps");
+
+long change_tail_ret = 1;
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+	char *data, *data_end;
+
+	bpf_skb_pull_data(skb, 1);
+	data = (char *)(unsigned long)skb->data;
+	data_end = (char *)(unsigned long)skb->data_end;
+
+	if (data + 1 > data_end)
+		return SK_PASS;
+
+	if (data[0] == 'T') { /* Trim the packet */
+		change_tail_ret = bpf_skb_change_tail(skb, skb->len - 1, 0);
+		return SK_PASS;
+	} else if (data[0] == 'G') { /* Grow the packet */
+		change_tail_ret = bpf_skb_change_tail(skb, skb->len + 1, 0);
+		return SK_PASS;
+	} else if (data[0] == 'E') { /* Error */
+		change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0);
+		return SK_PASS;
+	}
+	return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
new file mode 100644
index 000000000000..83df4919c224
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_ktls.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+int cork_byte;
+int push_start;
+int push_end;
+int apply_bytes;
+int pop_start;
+int pop_end;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 20);
+	__type(key, int);
+	__type(value, int);
+} sock_map SEC(".maps");
+
+SEC("sk_msg")
+int prog_sk_policy(struct sk_msg_md *msg)
+{
+	if (cork_byte > 0)
+		bpf_msg_cork_bytes(msg, cork_byte);
+	if (push_start > 0 && push_end > 0)
+		bpf_msg_push_data(msg, push_start, push_end, 0);
+	if (pop_start >= 0 && pop_end > 0)
+		bpf_msg_pop_data(msg, pop_start, pop_end, 0);
+
+	return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_sk_policy_redir(struct sk_msg_md *msg)
+{
+	int two = 2;
+
+	bpf_msg_apply_bytes(msg, apply_bytes);
+	return bpf_msg_redirect_map(msg, &sock_map, two, 0);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_redir.c b/tools/testing/selftests/bpf/progs/test_sockmap_redir.c
new file mode 100644
index 000000000000..34d9f4f2f0a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_redir.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC(".maps") struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} nop_map, sock_map;
+
+SEC(".maps") struct {
+	__uint(type, BPF_MAP_TYPE_SOCKHASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} nop_hash, sock_hash;
+
+SEC(".maps") struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 2);
+	__type(key, int);
+	__type(value, unsigned int);
+} verdict_map;
+
+/* Set by user space */
+int redirect_type;
+int redirect_flags;
+
+#define redirect_map(__data)                                                   \
+	_Generic((__data),                                                     \
+		 struct __sk_buff * : bpf_sk_redirect_map,                     \
+		 struct sk_msg_md * : bpf_msg_redirect_map                     \
+	)((__data), &sock_map, (__u32){0}, redirect_flags)
+
+#define redirect_hash(__data)                                                  \
+	_Generic((__data),                                                     \
+		 struct __sk_buff * : bpf_sk_redirect_hash,                    \
+		 struct sk_msg_md * : bpf_msg_redirect_hash                    \
+	)((__data), &sock_hash, &(__u32){0}, redirect_flags)
+
+#define DEFINE_PROG(__type, __param)                                           \
+SEC("sk_" XSTR(__type))                                                        \
+int prog_ ## __type ## _verdict(__param data)                                  \
+{                                                                              \
+	unsigned int *count;                                                   \
+	int verdict;                                                           \
+									       \
+	if (redirect_type == BPF_MAP_TYPE_SOCKMAP)                             \
+		verdict = redirect_map(data);                                  \
+	else if (redirect_type == BPF_MAP_TYPE_SOCKHASH)                       \
+		verdict = redirect_hash(data);                                 \
+	else                                                                   \
+		verdict = redirect_type - __MAX_BPF_MAP_TYPE;                  \
+									       \
+	count = bpf_map_lookup_elem(&verdict_map, &verdict);                   \
+	if (count)                                                             \
+		(*count)++;                                                    \
+									       \
+	return verdict;                                                        \
+}
+
+DEFINE_PROG(skb, struct __sk_buff *);
+DEFINE_PROG(msg, struct sk_msg_md *);
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_strp.c b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c
new file mode 100644
index 000000000000..dde3d5bec515
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_strp.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+int verdict_max_size = 10000;
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 20);
+	__type(key, int);
+	__type(value, int);
+} sock_map SEC(".maps");
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+	__u32 one = 1;
+
+	if (skb->len > verdict_max_size)
+		return SK_PASS;
+
+	return bpf_sk_redirect_map(skb, &sock_map, one, 0);
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict_pass(struct __sk_buff *skb)
+{
+	return SK_PASS;
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+	return skb->len;
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser_partial(struct __sk_buff *skb)
+{
+	/* agreement with the test program on a 4-byte size header
+	 * and 6-byte body.
+	 */
+	if (skb->len < 4) {
+		/* need more header to determine full length */
+		return 0;
+	}
+	/* return full length decoded from header.
+	 * the return value may be larger than skb->len which
+	 * means framework must wait body coming.
+	 */
+	return 10;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
index 1c8b678e2e9a..f678ee6bd7ea 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
@@ -245,4 +245,73 @@ int lock_global_subprog_call2(struct __sk_buff *ctx)
 	return ret;
 }
 
+int __noinline
+global_subprog_int(int i)
+{
+	if (i)
+		bpf_printk("%p", &i);
+	return i;
+}
+
+int __noinline
+global_sleepable_helper_subprog(int i)
+{
+	if (i)
+		bpf_copy_from_user(&i, sizeof(i), NULL);
+	return i;
+}
+
+int __noinline
+global_sleepable_kfunc_subprog(int i)
+{
+	if (i)
+		bpf_copy_from_user_str(&i, sizeof(i), NULL, 0);
+	global_subprog_int(i);
+	return i;
+}
+
+int __noinline
+global_subprog_calling_sleepable_global(int i)
+{
+	if (!i)
+		global_sleepable_kfunc_subprog(i);
+	return i;
+}
+
+SEC("?syscall")
+int lock_global_sleepable_helper_subprog(struct __sk_buff *ctx)
+{
+	int ret = 0;
+
+	bpf_spin_lock(&lockA);
+	if (ctx->mark == 42)
+		ret = global_sleepable_helper_subprog(ctx->mark);
+	bpf_spin_unlock(&lockA);
+	return ret;
+}
+
+SEC("?syscall")
+int lock_global_sleepable_kfunc_subprog(struct __sk_buff *ctx)
+{
+	int ret = 0;
+
+	bpf_spin_lock(&lockA);
+	if (ctx->mark == 42)
+		ret = global_sleepable_kfunc_subprog(ctx->mark);
+	bpf_spin_unlock(&lockA);
+	return ret;
+}
+
+SEC("?syscall")
+int lock_global_sleepable_subprog_indirect(struct __sk_buff *ctx)
+{
+	int ret = 0;
+
+	bpf_spin_lock(&lockA);
+	if (ctx->mark == 42)
+		ret = global_subprog_calling_sleepable_global(ctx->mark);
+	bpf_spin_unlock(&lockA);
+	return ret;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_extable.c b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
index e2a21fbd4e44..dcac69f5928a 100644
--- a/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
+++ b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
@@ -21,7 +21,7 @@ static __u64 test_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data)
 SEC("fexit/bpf_testmod_return_ptr")
 int BPF_PROG(handle_fexit_ret_subprogs, int arg, struct file *ret)
 {
-	*(volatile long *)ret;
+	*(volatile int *)ret;
 	*(volatile int *)&ret->f_mode;
 	bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
 	triggered++;
@@ -31,7 +31,7 @@ int BPF_PROG(handle_fexit_ret_subprogs, int arg, struct file *ret)
 SEC("fexit/bpf_testmod_return_ptr")
 int BPF_PROG(handle_fexit_ret_subprogs2, int arg, struct file *ret)
 {
-	*(volatile long *)ret;
+	*(volatile int *)ret;
 	*(volatile int *)&ret->f_mode;
 	bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
 	triggered++;
@@ -41,7 +41,7 @@ int BPF_PROG(handle_fexit_ret_subprogs2, int arg, struct file *ret)
 SEC("fexit/bpf_testmod_return_ptr")
 int BPF_PROG(handle_fexit_ret_subprogs3, int arg, struct file *ret)
 {
-	*(volatile long *)ret;
+	*(volatile int *)ret;
 	*(volatile int *)&ret->f_mode;
 	bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
 	triggered++;
diff --git a/tools/testing/selftests/bpf/progs/test_task_local_data.c b/tools/testing/selftests/bpf/progs/test_task_local_data.c
new file mode 100644
index 000000000000..fffafc013044
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_task_local_data.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+
+#include "task_local_data.bpf.h"
+
+struct tld_keys {
+	tld_key_t value0;
+	tld_key_t value1;
+	tld_key_t value2;
+	tld_key_t value_not_exist;
+};
+
+struct test_tld_struct {
+	__u64 a;
+	__u64 b;
+	__u64 c;
+	__u64 d;
+};
+
+int test_value0;
+int test_value1;
+struct test_tld_struct test_value2;
+
+SEC("syscall")
+int task_main(void *ctx)
+{
+	struct tld_object tld_obj;
+	struct test_tld_struct *struct_p;
+	struct task_struct *task;
+	int err, *int_p;
+
+	task = bpf_get_current_task_btf();
+	err = tld_object_init(task, &tld_obj);
+	if (err)
+		return 1;
+
+	int_p = tld_get_data(&tld_obj, value0, "value0", sizeof(int));
+	if (int_p)
+		test_value0 = *int_p;
+	else
+		return 2;
+
+	int_p = tld_get_data(&tld_obj, value1, "value1", sizeof(int));
+	if (int_p)
+		test_value1 = *int_p;
+	else
+		return 3;
+
+	struct_p = tld_get_data(&tld_obj, value2, "value2", sizeof(struct test_tld_struct));
+	if (struct_p)
+		test_value2 = *struct_p;
+	else
+		return 4;
+
+	int_p = tld_get_data(&tld_obj, value_not_exist, "value_not_exist", sizeof(int));
+	if (int_p)
+		return 5;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
index 7e750309ce27..0b74b8bd22e8 100644
--- a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
+++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
@@ -49,7 +49,7 @@ out:
 }
 
 SEC("lsm.s/bpf")
-int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
 {
 	struct cgroup *cgrp = NULL;
 	struct task_struct *task;
diff --git a/tools/testing/selftests/bpf/progs/test_tc_change_tail.c b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
new file mode 100644
index 000000000000..fcba8299f0bc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_change_tail.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+#endif
+#define BPF_SKB_MAX_LEN (PAGE_SIZE << 2)
+
+long change_tail_ret = 1;
+
+static __always_inline struct iphdr *parse_ip_header(struct __sk_buff *skb, int *ip_proto)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	void *data = (void *)(long)skb->data;
+	struct ethhdr *eth = data;
+	struct iphdr *iph;
+
+	/* Verify Ethernet header */
+	if ((void *)(data + sizeof(*eth)) > data_end)
+		return NULL;
+
+	/* Skip Ethernet header to get to IP header */
+	iph = (void *)(data + sizeof(struct ethhdr));
+
+	/* Verify IP header */
+	if ((void *)(data + sizeof(struct ethhdr) + sizeof(*iph)) > data_end)
+		return NULL;
+
+	/* Basic IP header validation */
+	if (iph->version != 4)  /* Only support IPv4 */
+		return NULL;
+
+	if (iph->ihl < 5)  /* Minimum IP header length */
+		return NULL;
+
+	*ip_proto = iph->protocol;
+	return iph;
+}
+
+static __always_inline struct udphdr *parse_udp_header(struct __sk_buff *skb, struct iphdr *iph)
+{
+	void *data_end = (void *)(long)skb->data_end;
+	void *hdr = (void *)iph;
+	struct udphdr *udp;
+
+	/* Calculate UDP header position */
+	udp = hdr + (iph->ihl * 4);
+	hdr = (void *)udp;
+
+	/* Verify UDP header bounds */
+	if ((void *)(hdr + sizeof(*udp)) > data_end)
+		return NULL;
+
+	return udp;
+}
+
+SEC("tc/ingress")
+int change_tail(struct __sk_buff *skb)
+{
+	int len = skb->len;
+	struct udphdr *udp;
+	struct iphdr *iph;
+	void *data_end;
+	char *payload;
+	int ip_proto;
+
+	bpf_skb_pull_data(skb, len);
+
+	data_end = (void *)(long)skb->data_end;
+	iph = parse_ip_header(skb, &ip_proto);
+	if (!iph)
+		return TCX_PASS;
+
+	if (ip_proto != IPPROTO_UDP)
+		return TCX_PASS;
+
+	udp = parse_udp_header(skb, iph);
+	if (!udp)
+		return TCX_PASS;
+
+	payload = (char *)udp + (sizeof(struct udphdr));
+	if (payload + 1 > (char *)data_end)
+		return TCX_PASS;
+
+	if (payload[0] == 'T') { /* Trim the packet */
+		change_tail_ret = bpf_skb_change_tail(skb, len - 1, 0);
+		if (!change_tail_ret)
+			bpf_skb_change_tail(skb, len, 0);
+		return TCX_PASS;
+	} else if (payload[0] == 'G') { /* Grow the packet */
+		change_tail_ret = bpf_skb_change_tail(skb, len + 1, 0);
+		if (!change_tail_ret)
+			bpf_skb_change_tail(skb, len, 0);
+		return TCX_PASS;
+	} else if (payload[0] == 'E') { /* Error */
+		change_tail_ret = bpf_skb_change_tail(skb, BPF_SKB_MAX_LEN, 0);
+		return TCX_PASS;
+	} else if (payload[0] == 'Z') { /* Zero */
+		change_tail_ret = bpf_skb_change_tail(skb, 0, 0);
+		return TCX_PASS;
+	}
+	return TCX_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c
index 950a70b61e74..4f6f03122d61 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_edt.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c
@@ -14,7 +14,6 @@
 #define TIME_HORIZON_NS (2000 * 1000 * 1000)
 #define NS_PER_SEC 1000000000
 #define ECN_HORIZON_NS 5000000
-#define THROTTLE_RATE_BPS (5 * 1000 * 1000)
 
 /* flow_key => last_tstamp timestamp used */
 struct {
@@ -24,12 +23,13 @@ struct {
 	__uint(max_entries, 1);
 } flow_map SEC(".maps");
 
+__uint64_t target_rate;
+
 static inline int throttle_flow(struct __sk_buff *skb)
 {
 	int key = 0;
 	uint64_t *last_tstamp = bpf_map_lookup_elem(&flow_map, &key);
-	uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC /
-			THROTTLE_RATE_BPS;
+	uint64_t delay_ns = ((uint64_t)skb->len) * NS_PER_SEC / target_rate;
 	uint64_t now = bpf_ktime_get_ns();
 	uint64_t tstamp, next_tstamp = 0;
 
@@ -70,7 +70,7 @@ static inline int handle_tcp(struct __sk_buff *skb, struct tcphdr *tcp)
 	if ((void *)(tcp + 1) > data_end)
 		return TC_ACT_SHOT;
 
-	if (tcp->dest == bpf_htons(9000))
+	if (tcp->source == bpf_htons(9000))
 		return throttle_flow(skb);
 
 	return TC_ACT_OK;
@@ -99,7 +99,8 @@ static inline int handle_ipv4(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("cls_test") int tc_prog(struct __sk_buff *skb)
+SEC("tc")
+int tc_prog(struct __sk_buff *skb)
 {
 	if (skb->protocol == bpf_htons(ETH_P_IP))
 		return handle_ipv4(skb);
diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c
index 10d825928499..630f12e51b07 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_link.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_link.c
@@ -8,6 +8,7 @@
 #include <linux/if_packet.h>
 #include <bpf/bpf_endian.h>
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
 
 char LICENSE[] SEC("license") = "GPL";
 
@@ -27,6 +28,7 @@ bool seen_host;
 bool seen_mcast;
 
 int mark, prio;
+unsigned short headroom, tailroom;
 
 SEC("tc/ingress")
 int tc1(struct __sk_buff *skb)
@@ -104,11 +106,24 @@ out:
 	return TCX_PASS;
 }
 
+struct sk_buff {
+	struct net_device *dev;
+};
+
+struct net_device {
+	unsigned short needed_headroom;
+	unsigned short needed_tailroom;
+};
+
 SEC("tc/egress")
 int tc8(struct __sk_buff *skb)
 {
+	struct net_device *dev = BPF_CORE_READ((struct sk_buff *)skb, dev);
+
 	seen_tc8 = true;
 	mark = skb->mark;
 	prio = skb->priority;
+	headroom = BPF_CORE_READ(dev, needed_headroom);
+	tailroom = BPF_CORE_READ(dev, needed_tailroom);
 	return TCX_PASS;
 }
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 404124a93892..7330c61b5730 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -2,23 +2,11 @@
 
 /* In-place tunneling */
 
-#include <stdbool.h>
-#include <string.h>
-
-#include <linux/stddef.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/mpls.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/pkt_cls.h>
-#include <linux/types.h>
+#include <vmlinux.h>
 
-#include <bpf/bpf_endian.h>
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tracing_net.h"
 #include "bpf_compiler.h"
 
 #pragma GCC diagnostic ignored "-Waddress-of-packed-member"
@@ -27,6 +15,14 @@ static const int cfg_port = 8000;
 
 static const int cfg_udp_src = 20000;
 
+#define ETH_P_MPLS_UC	0x8847
+#define ETH_P_TEB	0x6558
+
+#define MPLS_LS_S_MASK	0x00000100
+#define BPF_F_ADJ_ROOM_ENCAP_L2(len)			\
+	(((__u64)len & BPF_ADJ_ROOM_ENCAP_L2_MASK)	\
+	 << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
+
 #define	L2_PAD_SZ	(sizeof(struct vxlanhdr) + ETH_HLEN)
 
 #define	UDP_PORT		5555
@@ -36,10 +32,9 @@ static const int cfg_udp_src = 20000;
 
 #define	EXTPROTO_VXLAN	0x1
 
-#define	VXLAN_N_VID     (1u << 24)
-#define	VXLAN_VNI_MASK	bpf_htonl((VXLAN_N_VID - 1) << 8)
-#define	VXLAN_FLAGS     0x8
-#define	VXLAN_VNI       1
+#define	VXLAN_FLAGS     bpf_htonl(1<<27)
+#define	VNI_ID		1
+#define	VXLAN_VNI	bpf_htonl(VNI_ID << 8)
 
 #ifndef NEXTHDR_DEST
 #define NEXTHDR_DEST	60
@@ -48,12 +43,6 @@ static const int cfg_udp_src = 20000;
 /* MPLS label 1000 with S bit (last label) set and ttl of 255. */
 static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
 						     MPLS_LS_S_MASK | 0xff);
-
-struct vxlanhdr {
-	__be32 vx_flags;
-	__be32 vx_vni;
-} __attribute__((packed));
-
 struct gre_hdr {
 	__be16 flags;
 	__be16 protocol;
@@ -94,8 +83,8 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
 static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 					__u16 l2_proto, __u16 ext_proto)
 {
+	struct iphdr iph_inner = {0};
 	__u16 udp_dst = UDP_PORT;
-	struct iphdr iph_inner;
 	struct v4hdr h_outer;
 	struct tcphdr tcph;
 	int olen, l2_len;
@@ -122,7 +111,6 @@ static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 			return TC_ACT_OK;
 
 		/* Derive the IPv4 header fields from the IPv6 header */
-		memset(&iph_inner, 0, sizeof(iph_inner));
 		iph_inner.version = 4;
 		iph_inner.ihl = 5;
 		iph_inner.tot_len = bpf_htons(sizeof(iph6_inner) +
@@ -210,7 +198,7 @@ static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
 			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
 
 			vxlan_hdr->vx_flags = VXLAN_FLAGS;
-			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+			vxlan_hdr->vx_vni = VXLAN_VNI;
 
 			l2_hdr += sizeof(struct vxlanhdr);
 		}
@@ -340,7 +328,7 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 			struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
 
 			vxlan_hdr->vx_flags = VXLAN_FLAGS;
-			vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+			vxlan_hdr->vx_vni = VXLAN_VNI;
 
 			l2_hdr += sizeof(struct vxlanhdr);
 		}
@@ -372,8 +360,8 @@ static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 
 static int encap_ipv6_ipip6(struct __sk_buff *skb)
 {
+	struct v6hdr h_outer = {0};
 	struct iphdr iph_inner;
-	struct v6hdr h_outer;
 	struct tcphdr tcph;
 	struct ethhdr eth;
 	__u64 flags;
@@ -400,13 +388,12 @@ static int encap_ipv6_ipip6(struct __sk_buff *skb)
 		return TC_ACT_SHOT;
 
 	/* prepare new outer network header */
-	memset(&h_outer.ip, 0, sizeof(h_outer.ip));
 	h_outer.ip.version = 6;
 	h_outer.ip.hop_limit = iph_inner.ttl;
-	h_outer.ip.saddr.s6_addr[1] = 0xfd;
-	h_outer.ip.saddr.s6_addr[15] = 1;
-	h_outer.ip.daddr.s6_addr[1] = 0xfd;
-	h_outer.ip.daddr.s6_addr[15] = 2;
+	h_outer.ip.saddr.in6_u.u6_addr8[1] = 0xfd;
+	h_outer.ip.saddr.in6_u.u6_addr8[15] = 1;
+	h_outer.ip.daddr.in6_u.u6_addr8[1] = 0xfd;
+	h_outer.ip.daddr.in6_u.u6_addr8[15] = 2;
 	h_outer.ip.payload_len = iph_inner.tot_len;
 	h_outer.ip.nexthdr = IPPROTO_IPIP;
 
@@ -431,7 +418,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
 	return __encap_ipv6(skb, encap_proto, l2_proto, 0);
 }
 
-SEC("encap_ipip_none")
+SEC("tc")
 int __encap_ipip_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -440,7 +427,7 @@ int __encap_ipip_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_gre_none")
+SEC("tc")
 int __encap_gre_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -449,7 +436,7 @@ int __encap_gre_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_gre_mpls")
+SEC("tc")
 int __encap_gre_mpls(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -458,7 +445,7 @@ int __encap_gre_mpls(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_gre_eth")
+SEC("tc")
 int __encap_gre_eth(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -467,7 +454,7 @@ int __encap_gre_eth(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_udp_none")
+SEC("tc")
 int __encap_udp_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -476,7 +463,7 @@ int __encap_udp_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_udp_mpls")
+SEC("tc")
 int __encap_udp_mpls(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -485,7 +472,7 @@ int __encap_udp_mpls(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_udp_eth")
+SEC("tc")
 int __encap_udp_eth(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -494,7 +481,7 @@ int __encap_udp_eth(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_vxlan_eth")
+SEC("tc")
 int __encap_vxlan_eth(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -505,7 +492,7 @@ int __encap_vxlan_eth(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_sit_none")
+SEC("tc")
 int __encap_sit_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -514,7 +501,7 @@ int __encap_sit_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6tnl_none")
+SEC("tc")
 int __encap_ip6tnl_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -523,7 +510,7 @@ int __encap_ip6tnl_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ipip6_none")
+SEC("tc")
 int __encap_ipip6_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
@@ -532,7 +519,7 @@ int __encap_ipip6_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6gre_none")
+SEC("tc")
 int __encap_ip6gre_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -541,7 +528,7 @@ int __encap_ip6gre_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6gre_mpls")
+SEC("tc")
 int __encap_ip6gre_mpls(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -550,7 +537,7 @@ int __encap_ip6gre_mpls(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6gre_eth")
+SEC("tc")
 int __encap_ip6gre_eth(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -559,7 +546,7 @@ int __encap_ip6gre_eth(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6udp_none")
+SEC("tc")
 int __encap_ip6udp_none(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -568,7 +555,7 @@ int __encap_ip6udp_none(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6udp_mpls")
+SEC("tc")
 int __encap_ip6udp_mpls(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -577,7 +564,7 @@ int __encap_ip6udp_mpls(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6udp_eth")
+SEC("tc")
 int __encap_ip6udp_eth(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -586,7 +573,7 @@ int __encap_ip6udp_eth(struct __sk_buff *skb)
 		return TC_ACT_OK;
 }
 
-SEC("encap_ip6vxlan_eth")
+SEC("tc")
 int __encap_ip6vxlan_eth(struct __sk_buff *skb)
 {
 	if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
@@ -693,7 +680,7 @@ static int decap_ipv6(struct __sk_buff *skb)
 			      iph_outer.nexthdr);
 }
 
-SEC("decap")
+SEC("tc")
 int decap_f(struct __sk_buff *skb)
 {
 	switch (skb->protocol) {
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
index eb5cca1fce16..7d5293de1952 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
@@ -294,7 +294,9 @@ static int tcp_validate_sysctl(struct tcp_syncookie *ctx)
 	    (ctx->ipv6 && ctx->attrs.mss != MSS_LOCAL_IPV6))
 		goto err;
 
-	if (!ctx->attrs.wscale_ok || ctx->attrs.snd_wscale != 7)
+	if (!ctx->attrs.wscale_ok ||
+	    !ctx->attrs.snd_wscale ||
+	    ctx->attrs.snd_wscale >= BPF_SYNCOOKIE_WSCALE_MASK)
 		goto err;
 
 	if (!ctx->attrs.tstamp_ok)
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
index 5f4e87ee949a..1ecdf4c54de4 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
@@ -14,10 +14,7 @@
 #include <bpf/bpf_endian.h>
 #define BPF_PROG_TEST_TCP_HDR_OPTIONS
 #include "test_tcp_hdr_options.h"
-
-#ifndef sizeof_field
-#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
-#endif
+#include "bpf_misc.h"
 
 __u8 test_kind = TCPOPT_EXP;
 __u16 test_magic = 0xeB9F;
diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
index 540181c115a8..ef00d38b0a8d 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
@@ -23,7 +23,6 @@ struct {
 
 struct {
 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
-	__uint(max_entries, 2);
 	__type(key, int);
 	__type(value, __u32);
 } perf_event_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
index 5aaf2b065f86..cf0547a613ff 100644
--- a/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
+++ b/tools/testing/selftests/bpf/progs/test_tp_btf_nullable.c
@@ -3,21 +3,17 @@
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 #include "bpf_misc.h"
 
-SEC("tp_btf/bpf_testmod_test_nullable_bare")
-/* This used to be a failure test, but raw_tp nullable arguments can now
- * directly be dereferenced, whether they have nullable annotation or not,
- * and don't need to be explicitly checked.
- */
-__success
+SEC("tp_btf/bpf_testmod_test_nullable_bare_tp")
+__failure __msg("R1 invalid mem access 'trusted_ptr_or_null_'")
 int BPF_PROG(handle_tp_btf_nullable_bare1, struct bpf_testmod_test_read_ctx *nullable_ctx)
 {
 	return nullable_ctx->len;
 }
 
-SEC("tp_btf/bpf_testmod_test_nullable_bare")
+SEC("tp_btf/bpf_testmod_test_nullable_bare_tp")
 int BPF_PROG(handle_tp_btf_nullable_bare2, struct bpf_testmod_test_read_ctx *nullable_ctx)
 {
 	if (nullable_ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe.c b/tools/testing/selftests/bpf/progs/test_uprobe.c
index 896c88a4960d..12f4065fca20 100644
--- a/tools/testing/selftests/bpf/progs/test_uprobe.c
+++ b/tools/testing/selftests/bpf/progs/test_uprobe.c
@@ -59,3 +59,41 @@ int BPF_UPROBE(test4)
 	test4_result = 1;
 	return 0;
 }
+
+#if defined(__TARGET_ARCH_x86)
+struct pt_regs regs;
+
+SEC("uprobe")
+int BPF_UPROBE(test_regs_change)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (pid != my_pid)
+		return 0;
+
+	ctx->ax  = regs.ax;
+	ctx->cx  = regs.cx;
+	ctx->dx  = regs.dx;
+	ctx->r8  = regs.r8;
+	ctx->r9  = regs.r9;
+	ctx->r10 = regs.r10;
+	ctx->r11 = regs.r11;
+	ctx->di  = regs.di;
+	ctx->si  = regs.si;
+	return 0;
+}
+
+unsigned long ip;
+
+SEC("uprobe")
+int BPF_UPROBE(test_regs_change_ip)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+	if (pid != my_pid)
+		return 0;
+
+	ctx->ip = ip;
+	return 0;
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c
index 505aab9a5234..a78c87537b07 100644
--- a/tools/testing/selftests/bpf/progs/test_usdt.c
+++ b/tools/testing/selftests/bpf/progs/test_usdt.c
@@ -11,6 +11,7 @@ int usdt0_called;
 u64 usdt0_cookie;
 int usdt0_arg_cnt;
 int usdt0_arg_ret;
+int usdt0_arg_size;
 
 SEC("usdt")
 int usdt0(struct pt_regs *ctx)
@@ -26,6 +27,7 @@ int usdt0(struct pt_regs *ctx)
 	usdt0_arg_cnt = bpf_usdt_arg_cnt(ctx);
 	/* should return -ENOENT for any arg_num */
 	usdt0_arg_ret = bpf_usdt_arg(ctx, bpf_get_prandom_u32(), &tmp);
+	usdt0_arg_size = bpf_usdt_arg_size(ctx, bpf_get_prandom_u32());
 	return 0;
 }
 
@@ -34,6 +36,7 @@ u64 usdt3_cookie;
 int usdt3_arg_cnt;
 int usdt3_arg_rets[3];
 u64 usdt3_args[3];
+int usdt3_arg_sizes[3];
 
 SEC("usdt//proc/self/exe:test:usdt3")
 int usdt3(struct pt_regs *ctx)
@@ -50,12 +53,15 @@ int usdt3(struct pt_regs *ctx)
 
 	usdt3_arg_rets[0] = bpf_usdt_arg(ctx, 0, &tmp);
 	usdt3_args[0] = (int)tmp;
+	usdt3_arg_sizes[0] = bpf_usdt_arg_size(ctx, 0);
 
 	usdt3_arg_rets[1] = bpf_usdt_arg(ctx, 1, &tmp);
 	usdt3_args[1] = (long)tmp;
+	usdt3_arg_sizes[1] = bpf_usdt_arg_size(ctx, 1);
 
 	usdt3_arg_rets[2] = bpf_usdt_arg(ctx, 2, &tmp);
 	usdt3_args[2] = (uintptr_t)tmp;
+	usdt3_arg_sizes[2] = bpf_usdt_arg_size(ctx, 2);
 
 	return 0;
 }
@@ -64,12 +70,15 @@ int usdt12_called;
 u64 usdt12_cookie;
 int usdt12_arg_cnt;
 u64 usdt12_args[12];
+int usdt12_arg_sizes[12];
 
 SEC("usdt//proc/self/exe:test:usdt12")
 int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
 		     long a6, __u64 a7, uintptr_t a8, int a9, short a10,
 		     short a11, signed char a12)
 {
+	int i;
+
 	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
 		return 0;
 
@@ -90,6 +99,42 @@ int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
 	usdt12_args[9] = a10;
 	usdt12_args[10] = a11;
 	usdt12_args[11] = a12;
+
+	bpf_for(i, 0, 12) {
+		usdt12_arg_sizes[i] = bpf_usdt_arg_size(ctx, i);
+	}
+
+	return 0;
+}
+
+int usdt_sib_called;
+u64 usdt_sib_cookie;
+int usdt_sib_arg_cnt;
+int usdt_sib_arg_ret;
+short usdt_sib_arg;
+int usdt_sib_arg_size;
+
+/*
+ * usdt_sib is only tested on x86-related architectures, so it requires
+ * manual attach since auto-attach will panic tests under other architectures
+ */
+SEC("usdt")
+int usdt_sib(struct pt_regs *ctx)
+{
+	long tmp;
+
+	if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
+
+	__sync_fetch_and_add(&usdt_sib_called, 1);
+
+	usdt_sib_cookie = bpf_usdt_cookie(ctx);
+	usdt_sib_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+	usdt_sib_arg_ret = bpf_usdt_arg(ctx, 0, &tmp);
+	usdt_sib_arg = (short)tmp;
+	usdt_sib_arg_size = bpf_usdt_arg_size(ctx, 0);
+
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
index 12034a73ee2d..ff8d755548b9 100644
--- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
+++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
@@ -17,7 +17,7 @@
 #define MAX_SIG_SIZE 1024
 
 __u32 monitored_pid;
-__u32 user_keyring_serial;
+__s32 user_keyring_serial;
 __u64 system_keyring_id;
 
 struct data {
@@ -37,7 +37,7 @@ struct {
 char _license[] SEC("license") = "GPL";
 
 SEC("lsm.s/bpf")
-int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size)
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size, bool kernel)
 {
 	struct bpf_dynptr data_ptr, sig_ptr;
 	struct data *data_val;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
index 81bb38d72ced..5904f45cfbc4 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -10,6 +10,8 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
 	/* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */
 #if defined(__TARGET_ARCH_s390)
 	int tailroom = 512;
+#elif defined(__TARGET_ARCH_powerpc)
+	int tailroom = 384;
 #else
 	int tailroom = 320;
 #endif
@@ -17,7 +19,9 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
 	/* Data length determine test case */
 
 	if (data_len == 54) { /* sizeof(pkt_v4) */
-		offset = 4096; /* test too large offset */
+		offset = 4096; /* test too large offset, 4k page size */
+	} else if (data_len == 53) { /* sizeof(pkt_v4) - 1 */
+		offset = 65536; /* test too large offset, 64k page size */
 	} else if (data_len == 74) { /* sizeof(pkt_v6) */
 		offset = 40;
 	} else if (data_len == 64) {
@@ -29,6 +33,10 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
 		offset = 10;
 	} else if (data_len == 9001) {
 		offset = 4096;
+	} else if (data_len == 90000) {
+		offset = 10; /* test a small offset, 64k page size */
+	} else if (data_len == 90001) {
+		offset = 65536; /* test too large offset, 64k page size */
 	} else {
 		return XDP_ABORTED; /* No matching test */
 	}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c
new file mode 100644
index 000000000000..814e2a980e97
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_tailcall.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+SEC("xdp")
+int xdp_devmap(struct xdp_md *ctx)
+{
+	return ctx->egress_ifindex;
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__array(values, int (void *));
+} xdp_map SEC(".maps") = {
+	.values = {
+		[0] = (void *)&xdp_devmap,
+	},
+};
+
+SEC("xdp")
+int xdp_entry(struct xdp_md *ctx)
+{
+	bpf_tail_call(ctx, &xdp_map, 0);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
index 3abf068b8446..5928ed0911ca 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
@@ -98,6 +98,18 @@ int xdp_count_pkts(struct xdp_md *xdp)
 	return XDP_DROP;
 }
 
+SEC("xdp")
+int xdp_redirect_to_111(struct xdp_md *xdp)
+{
+	return bpf_redirect(111, 0);
+}
+
+SEC("xdp")
+int xdp_redirect_to_222(struct xdp_md *xdp)
+{
+	return bpf_redirect(222, 0);
+}
+
 SEC("tc")
 int tc_count_pkts(struct __sk_buff *skb)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index a7c4a7d49fe6..0a0f371a2dec 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -1,40 +1,320 @@
+#include <stdbool.h>
 #include <linux/bpf.h>
+#include <linux/errno.h>
 #include <linux/if_ether.h>
 #include <linux/pkt_cls.h>
 
+#include <bpf/bpf_endian.h>
 #include <bpf/bpf_helpers.h>
+#include "bpf_kfuncs.h"
+
+#define META_SIZE 32
 
-#define __round_mask(x, y) ((__typeof__(x))((y) - 1))
-#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
 #define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
 
-SEC("t")
+/* Demonstrate passing metadata from XDP to TC using bpf_xdp_adjust_meta.
+ *
+ * The XDP program extracts a fixed-size payload following the Ethernet header
+ * and stores it as packet metadata to test the driver's metadata support. The
+ * TC program then verifies if the passed metadata is correct.
+ */
+
+bool test_pass;
+
+static const __u8 smac_want[ETH_ALEN] = {
+	0x12, 0x34, 0xDE, 0xAD, 0xBE, 0xEF,
+};
+
+static const __u8 meta_want[META_SIZE] = {
+	0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
+	0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
+	0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
+	0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
+};
+
+static bool check_smac(const struct ethhdr *eth)
+{
+	return !__builtin_memcmp(eth->h_source, smac_want, ETH_ALEN);
+}
+
+static bool check_metadata(const char *file, int line, __u8 *meta_have)
+{
+	if (!__builtin_memcmp(meta_have, meta_want, META_SIZE))
+		return true;
+
+	bpf_stream_printk(BPF_STREAM_STDERR,
+			  "FAIL:%s:%d: metadata mismatch\n"
+			  "  have:\n    %pI6\n    %pI6\n"
+			  "  want:\n    %pI6\n    %pI6\n",
+			  file, line,
+			  &meta_have[0x00], &meta_have[0x10],
+			  &meta_want[0x00], &meta_want[0x10]);
+	return false;
+}
+
+#define check_metadata(meta_have) check_metadata(__FILE__, __LINE__, meta_have)
+
+static bool check_skb_metadata(const char *file, int line, struct __sk_buff *skb)
+{
+	__u8 *data_meta = ctx_ptr(skb, data_meta);
+	__u8 *data = ctx_ptr(skb, data);
+
+	return data_meta + META_SIZE <= data && (check_metadata)(file, line, data_meta);
+}
+
+#define check_skb_metadata(skb) check_skb_metadata(__FILE__, __LINE__, skb)
+
+SEC("tc")
 int ing_cls(struct __sk_buff *ctx)
 {
-	__u8 *data, *data_meta, *data_end;
-	__u32 diff = 0;
+	__u8 *meta_have = ctx_ptr(ctx, data_meta);
+	__u8 *data = ctx_ptr(ctx, data);
 
-	data_meta = ctx_ptr(ctx, data_meta);
-	data_end  = ctx_ptr(ctx, data_end);
-	data      = ctx_ptr(ctx, data);
+	if (meta_have + META_SIZE > data)
+		goto out;
+
+	if (!check_metadata(meta_have))
+		goto out;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
+/* Read from metadata using bpf_dynptr_read helper */
+SEC("tc")
+int ing_cls_dynptr_read(struct __sk_buff *ctx)
+{
+	__u8 meta_have[META_SIZE];
+	struct bpf_dynptr meta;
+
+	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+	bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+
+	if (!check_metadata(meta_have))
+		goto out;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
+/* Write to metadata using bpf_dynptr_write helper */
+SEC("tc")
+int ing_cls_dynptr_write(struct __sk_buff *ctx)
+{
+	struct bpf_dynptr data, meta;
+	__u8 *src;
+
+	bpf_dynptr_from_skb(ctx, 0, &data);
+	src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
+	if (!src)
+		return TC_ACT_SHOT;
+
+	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+	bpf_dynptr_write(&meta, 0, src, META_SIZE, 0);
+
+	return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Read from metadata using read-only dynptr slice */
+SEC("tc")
+int ing_cls_dynptr_slice(struct __sk_buff *ctx)
+{
+	struct bpf_dynptr meta;
+	__u8 *meta_have;
+
+	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+	meta_have = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE);
+	if (!meta_have)
+		goto out;
+
+	if (!check_metadata(meta_have))
+		goto out;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
+/* Write to metadata using writeable dynptr slice */
+SEC("tc")
+int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx)
+{
+	struct bpf_dynptr data, meta;
+	__u8 *src, *dst;
 
-	if (data + ETH_ALEN > data_end ||
-	    data_meta + round_up(ETH_ALEN, 4) > data)
+	bpf_dynptr_from_skb(ctx, 0, &data);
+	src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE);
+	if (!src)
 		return TC_ACT_SHOT;
 
-	diff |= ((__u32 *)data_meta)[0] ^ ((__u32 *)data)[0];
-	diff |= ((__u16 *)data_meta)[2] ^ ((__u16 *)data)[2];
+	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+	dst = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE);
+	if (!dst)
+		return TC_ACT_SHOT;
+
+	__builtin_memcpy(dst, src, META_SIZE);
+
+	return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Read skb metadata in chunks from various offsets in different ways. */
+SEC("tc")
+int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx)
+{
+	const __u32 chunk_len = META_SIZE / 4;
+	__u8 meta_have[META_SIZE];
+	struct bpf_dynptr meta;
+	__u8 *dst, *src;
+
+	dst = meta_have;
+
+	/* 1. Regular read */
+	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+	bpf_dynptr_read(dst, chunk_len, &meta, 0, 0);
+	dst += chunk_len;
+
+	/* 2. Read from an offset-adjusted dynptr */
+	bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta));
+	bpf_dynptr_read(dst, chunk_len, &meta, 0, 0);
+	dst += chunk_len;
+
+	/* 3. Read at an offset */
+	bpf_dynptr_read(dst, chunk_len, &meta, chunk_len, 0);
+	dst += chunk_len;
+
+	/* 4. Read from a slice starting at an offset */
+	src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len);
+	if (!src)
+		goto out;
+	__builtin_memcpy(dst, src, chunk_len);
+
+	if (!check_metadata(meta_have))
+		goto out;
 
-	return diff ? TC_ACT_SHOT : TC_ACT_OK;
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
 }
 
-SEC("x")
+/* Write skb metadata in chunks at various offsets in different ways. */
+SEC("tc")
+int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx)
+{
+	const __u32 chunk_len = META_SIZE / 4;
+	__u8 payload[META_SIZE];
+	struct bpf_dynptr meta;
+	__u8 *dst, *src;
+
+	bpf_skb_load_bytes(ctx, sizeof(struct ethhdr), payload, sizeof(payload));
+	src = payload;
+
+	/* 1. Regular write */
+	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+	bpf_dynptr_write(&meta, 0, src, chunk_len, 0);
+	src += chunk_len;
+
+	/* 2. Write to an offset-adjusted dynptr */
+	bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta));
+	bpf_dynptr_write(&meta, 0, src, chunk_len, 0);
+	src += chunk_len;
+
+	/* 3. Write at an offset */
+	bpf_dynptr_write(&meta, chunk_len, src, chunk_len, 0);
+	src += chunk_len;
+
+	/* 4. Write to a slice starting at an offset */
+	dst = bpf_dynptr_slice_rdwr(&meta, 2 * chunk_len, NULL, chunk_len);
+	if (!dst)
+		return TC_ACT_SHOT;
+	__builtin_memcpy(dst, src, chunk_len);
+
+	return TC_ACT_UNSPEC; /* pass */
+}
+
+/* Pass an OOB offset to dynptr read, write, adjust, slice. */
+SEC("tc")
+int ing_cls_dynptr_offset_oob(struct __sk_buff *ctx)
+{
+	struct bpf_dynptr meta;
+	__u8 md, *p;
+	int err;
+
+	err = bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+	if (err)
+		goto fail;
+
+	/* read offset OOB */
+	err = bpf_dynptr_read(&md, sizeof(md), &meta, META_SIZE, 0);
+	if (err != -E2BIG)
+		goto fail;
+
+	/* write offset OOB */
+	err = bpf_dynptr_write(&meta, META_SIZE, &md, sizeof(md), 0);
+	if (err != -E2BIG)
+		goto fail;
+
+	/* adjust end offset OOB */
+	err = bpf_dynptr_adjust(&meta, 0, META_SIZE + 1);
+	if (err != -ERANGE)
+		goto fail;
+
+	/* adjust start offset OOB */
+	err = bpf_dynptr_adjust(&meta, META_SIZE + 1, META_SIZE + 1);
+	if (err != -ERANGE)
+		goto fail;
+
+	/* slice offset OOB */
+	p = bpf_dynptr_slice(&meta, META_SIZE, NULL, sizeof(*p));
+	if (p)
+		goto fail;
+
+	/* slice rdwr offset OOB */
+	p = bpf_dynptr_slice_rdwr(&meta, META_SIZE, NULL, sizeof(*p));
+	if (p)
+		goto fail;
+
+	return TC_ACT_UNSPEC;
+fail:
+	return TC_ACT_SHOT;
+}
+
+/* Reserve and clear space for metadata but don't populate it */
+SEC("xdp")
+int ing_xdp_zalloc_meta(struct xdp_md *ctx)
+{
+	struct ethhdr *eth = ctx_ptr(ctx, data);
+	__u8 *meta;
+	int ret;
+
+	/* Drop any non-test packets */
+	if (eth + 1 > ctx_ptr(ctx, data_end))
+		return XDP_DROP;
+	if (!check_smac(eth))
+		return XDP_DROP;
+
+	ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
+	if (ret < 0)
+		return XDP_DROP;
+
+	meta = ctx_ptr(ctx, data_meta);
+	if (meta + META_SIZE > ctx_ptr(ctx, data))
+		return XDP_DROP;
+
+	__builtin_memset(meta, 0, META_SIZE);
+
+	return XDP_PASS;
+}
+
+SEC("xdp")
 int ing_xdp(struct xdp_md *ctx)
 {
-	__u8 *data, *data_meta, *data_end;
+	__u8 *data, *data_meta, *data_end, *payload;
+	struct ethhdr *eth;
 	int ret;
 
-	ret = bpf_xdp_adjust_meta(ctx, -round_up(ETH_ALEN, 4));
+	ret = bpf_xdp_adjust_meta(ctx, -META_SIZE);
 	if (ret < 0)
 		return XDP_DROP;
 
@@ -42,12 +322,352 @@ int ing_xdp(struct xdp_md *ctx)
 	data_end  = ctx_ptr(ctx, data_end);
 	data      = ctx_ptr(ctx, data);
 
-	if (data + ETH_ALEN > data_end ||
-	    data_meta + round_up(ETH_ALEN, 4) > data)
+	eth = (struct ethhdr *)data;
+	payload = data + sizeof(struct ethhdr);
+
+	if (payload + META_SIZE > data_end ||
+	    data_meta + META_SIZE > data)
+		return XDP_DROP;
+
+	/* The Linux networking stack may send other packets on the test
+	 * interface that interfere with the test. Just drop them.
+	 * The test packets can be recognized by their source MAC address.
+	 */
+	if (!check_smac(eth))
 		return XDP_DROP;
 
-	__builtin_memcpy(data_meta, data, ETH_ALEN);
+	__builtin_memcpy(data_meta, payload, META_SIZE);
 	return XDP_PASS;
 }
 
+/*
+ * Check that, when operating on a cloned packet, skb->data_meta..skb->data is
+ * kept intact if prog writes to packet _payload_ using packet pointers.
+ */
+SEC("tc")
+int clone_data_meta_survives_data_write(struct __sk_buff *ctx)
+{
+	__u8 *meta_have = ctx_ptr(ctx, data_meta);
+	struct ethhdr *eth = ctx_ptr(ctx, data);
+
+	if (eth + 1 > ctx_ptr(ctx, data_end))
+		goto out;
+	/* Ignore non-test packets */
+	if (!check_smac(eth))
+		goto out;
+
+	if (meta_have + META_SIZE > eth)
+		goto out;
+
+	if (!check_metadata(meta_have))
+		goto out;
+
+	/* Packet write to trigger unclone in prologue */
+	eth->h_proto = 42;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
+/*
+ * Check that, when operating on a cloned packet, skb->data_meta..skb->data is
+ * kept intact if prog writes to packet _metadata_ using packet pointers.
+ */
+SEC("tc")
+int clone_data_meta_survives_meta_write(struct __sk_buff *ctx)
+{
+	__u8 *meta_have = ctx_ptr(ctx, data_meta);
+	struct ethhdr *eth = ctx_ptr(ctx, data);
+
+	if (eth + 1 > ctx_ptr(ctx, data_end))
+		goto out;
+	/* Ignore non-test packets */
+	if (!check_smac(eth))
+		goto out;
+
+	if (meta_have + META_SIZE > eth)
+		goto out;
+
+	if (!check_metadata(meta_have))
+		goto out;
+
+	/* Metadata write to trigger unclone in prologue */
+	*meta_have = 42;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
+/*
+ * Check that, when operating on a cloned packet, metadata remains intact if
+ * prog creates a r/w slice to packet _payload_.
+ */
+SEC("tc")
+int clone_meta_dynptr_survives_data_slice_write(struct __sk_buff *ctx)
+{
+	struct bpf_dynptr data, meta;
+	__u8 meta_have[META_SIZE];
+	struct ethhdr *eth;
+
+	bpf_dynptr_from_skb(ctx, 0, &data);
+	eth = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*eth));
+	if (!eth)
+		goto out;
+	/* Ignore non-test packets */
+	if (!check_smac(eth))
+		goto out;
+
+	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+	bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+	if (!check_metadata(meta_have))
+		goto out;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
+/*
+ * Check that, when operating on a cloned packet, metadata remains intact if
+ * prog creates an r/w slice to packet _metadata_.
+ */
+SEC("tc")
+int clone_meta_dynptr_survives_meta_slice_write(struct __sk_buff *ctx)
+{
+	struct bpf_dynptr data, meta;
+	const struct ethhdr *eth;
+	__u8 *meta_have;
+
+	bpf_dynptr_from_skb(ctx, 0, &data);
+	eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+	if (!eth)
+		goto out;
+	/* Ignore non-test packets */
+	if (!check_smac(eth))
+		goto out;
+
+	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+	meta_have = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE);
+	if (!meta_have)
+		goto out;
+
+	if (!check_metadata(meta_have))
+		goto out;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
+/*
+ * Check that, when operating on a cloned packet, skb_meta dynptr is read-write
+ * before prog writes to packet _payload_ using dynptr_write helper and metadata
+ * remains intact before and after the write.
+ */
+SEC("tc")
+int clone_meta_dynptr_rw_before_data_dynptr_write(struct __sk_buff *ctx)
+{
+	struct bpf_dynptr data, meta;
+	__u8 meta_have[META_SIZE];
+	const struct ethhdr *eth;
+	int err;
+
+	bpf_dynptr_from_skb(ctx, 0, &data);
+	eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+	if (!eth)
+		goto out;
+	/* Ignore non-test packets */
+	if (!check_smac(eth))
+		goto out;
+
+	/* Expect read-write metadata before unclone */
+	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+	if (bpf_dynptr_is_rdonly(&meta))
+		goto out;
+
+	err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+	if (err || !check_metadata(meta_have))
+		goto out;
+
+	/* Helper write to payload will unclone the packet */
+	bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0);
+
+	err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+	if (err || !check_metadata(meta_have))
+		goto out;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
+/*
+ * Check that, when operating on a cloned packet, skb_meta dynptr is read-write
+ * before prog writes to packet _metadata_ using dynptr_write helper and
+ * metadata remains intact before and after the write.
+ */
+SEC("tc")
+int clone_meta_dynptr_rw_before_meta_dynptr_write(struct __sk_buff *ctx)
+{
+	struct bpf_dynptr data, meta;
+	__u8 meta_have[META_SIZE];
+	const struct ethhdr *eth;
+	int err;
+
+	bpf_dynptr_from_skb(ctx, 0, &data);
+	eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth));
+	if (!eth)
+		goto out;
+	/* Ignore non-test packets */
+	if (!check_smac(eth))
+		goto out;
+
+	/* Expect read-write metadata before unclone */
+	bpf_dynptr_from_skb_meta(ctx, 0, &meta);
+	if (bpf_dynptr_is_rdonly(&meta))
+		goto out;
+
+	err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+	if (err || !check_metadata(meta_have))
+		goto out;
+
+	/* Helper write to metadata will unclone the packet */
+	bpf_dynptr_write(&meta, 0, &meta_have[0], 1, 0);
+
+	err = bpf_dynptr_read(meta_have, META_SIZE, &meta, 0, 0);
+	if (err || !check_metadata(meta_have))
+		goto out;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_vlan_push_pop(struct __sk_buff *ctx)
+{
+	int err;
+
+	/* bpf_skb_vlan_push assumes HW offload for primary VLAN tag. Only
+	 * secondary tag push triggers an actual MAC header modification.
+	 */
+	err = bpf_skb_vlan_push(ctx, 0, 42);
+	if (err)
+		goto out;
+	err = bpf_skb_vlan_push(ctx, 0, 207);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	err = bpf_skb_vlan_pop(ctx);
+	if (err)
+		goto out;
+	err = bpf_skb_vlan_pop(ctx);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_adjust_room(struct __sk_buff *ctx)
+{
+	int err;
+
+	/* Grow a 1 byte hole after the MAC header */
+	err = bpf_skb_adjust_room(ctx, 1, BPF_ADJ_ROOM_MAC, 0);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	/* Shrink a 1 byte hole after the MAC header */
+	err = bpf_skb_adjust_room(ctx, -1, BPF_ADJ_ROOM_MAC, 0);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	/* Grow a 256 byte hole to trigger head reallocation */
+	err = bpf_skb_adjust_room(ctx, 256, BPF_ADJ_ROOM_MAC, 0);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_change_head_tail(struct __sk_buff *ctx)
+{
+	int err;
+
+	/* Reserve 1 extra in the front for packet data */
+	err = bpf_skb_change_head(ctx, 1, 0);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	/* Reserve 256 extra bytes in the front to trigger head reallocation */
+	err = bpf_skb_change_head(ctx, 256, 0);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	/* Reserve 4k extra bytes in the back to trigger head reallocation */
+	err = bpf_skb_change_tail(ctx, ctx->len + 4096, 0);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int helper_skb_change_proto(struct __sk_buff *ctx)
+{
+	int err;
+
+	err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IPV6), 0);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	err = bpf_skb_change_proto(ctx, bpf_htons(ETH_P_IP), 0);
+	if (err)
+		goto out;
+
+	if (!check_skb_metadata(ctx))
+		goto out;
+
+	test_pass = true;
+out:
+	return TC_ACT_SHOT;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c b/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c
new file mode 100644
index 000000000000..c41a21413eaa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_pull_data.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+int xdpf_sz;
+int sinfo_sz;
+int data_len;
+int pull_len;
+
+#define XDP_PACKET_HEADROOM 256
+
+SEC("xdp.frags")
+int xdp_find_sizes(struct xdp_md *ctx)
+{
+	xdpf_sz = sizeof(struct xdp_frame);
+	sinfo_sz = __PAGE_SIZE - XDP_PACKET_HEADROOM -
+		   (ctx->data_end - ctx->data);
+
+	return XDP_PASS;
+}
+
+SEC("xdp.frags")
+int xdp_pull_data_prog(struct xdp_md *ctx)
+{
+	__u8 *data_end = (void *)(long)ctx->data_end;
+	__u8 *data = (void *)(long)ctx->data;
+	__u8 *val_p;
+	int err;
+
+	if (data_len != data_end - data)
+		return XDP_DROP;
+
+	err = bpf_xdp_pull_data(ctx, pull_len);
+	if (err)
+		return XDP_DROP;
+
+	val_p = (void *)(long)ctx->data + 1024;
+	if (val_p + 1 > (void *)(long)ctx->data_end)
+		return XDP_DROP;
+
+	if (*val_p != 0xbb)
+		return XDP_DROP;
+
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c
deleted file mode 100644
index b778cad45485..000000000000
--- a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/* Copyright (c) 2017 VMware
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- */
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-
-SEC("redirect_to_111")
-int xdp_redirect_to_111(struct xdp_md *xdp)
-{
-	return bpf_redirect(111, 0);
-}
-SEC("redirect_to_222")
-int xdp_redirect_to_222(struct xdp_md *xdp)
-{
-	return bpf_redirect(222, 0);
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
index a7588302268d..a80cc5f2f4f2 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
@@ -102,8 +102,8 @@ bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt)
 #define TESTVLAN 4011 /* 0xFAB */
 // #define TO_VLAN  4000 /* 0xFA0 (hint 0xOA0 = 160) */
 
-SEC("xdp_drop_vlan_4011")
-int  xdp_prognum0(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_drop_vlan_4011(struct xdp_md *ctx)
 {
 	void *data_end = (void *)(long)ctx->data_end;
 	void *data     = (void *)(long)ctx->data;
@@ -144,8 +144,8 @@ Load prog with ip tool:
 /* Changing VLAN to zero, have same practical effect as removing the VLAN. */
 #define TO_VLAN	0
 
-SEC("xdp_vlan_change")
-int  xdp_prognum1(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_vlan_change(struct xdp_md *ctx)
 {
 	void *data_end = (void *)(long)ctx->data_end;
 	void *data     = (void *)(long)ctx->data;
@@ -178,8 +178,8 @@ int  xdp_prognum1(struct xdp_md *ctx)
 #endif
 #define VLAN_HDR_SZ	4	/* bytes */
 
-SEC("xdp_vlan_remove_outer")
-int  xdp_prognum2(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_vlan_remove_outer(struct xdp_md *ctx)
 {
 	void *data_end = (void *)(long)ctx->data_end;
 	void *data     = (void *)(long)ctx->data;
@@ -224,8 +224,8 @@ void shift_mac_4bytes_32bit(void *data)
 	p[1] = p[0];
 }
 
-SEC("xdp_vlan_remove_outer2")
-int  xdp_prognum3(struct xdp_md *ctx)
+SEC("xdp")
+int xdp_vlan_remove_outer2(struct xdp_md *ctx)
 {
 	void *data_end = (void *)(long)ctx->data_end;
 	void *data     = (void *)(long)ctx->data;
@@ -254,8 +254,8 @@ int  xdp_prognum3(struct xdp_md *ctx)
  * The TC-clsact eBPF programs (currently) need to be attach via TC commands
  */
 
-SEC("tc_vlan_push")
-int _tc_progA(struct __sk_buff *ctx)
+SEC("tc")
+int tc_vlan_push(struct __sk_buff *ctx)
 {
 	bpf_skb_vlan_push(ctx, bpf_htons(ETH_P_8021Q), TESTVLAN);
 
diff --git a/tools/testing/selftests/bpf/progs/timer_interrupt.c b/tools/testing/selftests/bpf/progs/timer_interrupt.c
new file mode 100644
index 000000000000..19180a455f40
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_interrupt.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define CLOCK_MONOTONIC		1
+
+int preempt_count;
+int in_interrupt;
+int in_interrupt_cb;
+
+struct elem {
+	struct bpf_timer t;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct elem);
+} array SEC(".maps");
+
+static int timer_in_interrupt(void *map, int *key, struct bpf_timer *timer)
+{
+	preempt_count = get_preempt_count();
+	in_interrupt_cb = bpf_in_interrupt();
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test_timer_interrupt)
+{
+	struct bpf_timer *timer;
+	int key = 0;
+
+	timer = bpf_map_lookup_elem(&array, &key);
+	if (!timer)
+		return 0;
+
+	in_interrupt = bpf_in_interrupt();
+	bpf_timer_init(timer, &array, CLOCK_MONOTONIC);
+	bpf_timer_set_callback(timer, timer_in_interrupt);
+	bpf_timer_start(timer, 0, 0);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tracing_failure.c b/tools/testing/selftests/bpf/progs/tracing_failure.c
index d41665d2ec8c..65e485c4468c 100644
--- a/tools/testing/selftests/bpf/progs/tracing_failure.c
+++ b/tools/testing/selftests/bpf/progs/tracing_failure.c
@@ -18,3 +18,15 @@ int BPF_PROG(test_spin_unlock, struct bpf_spin_lock *lock)
 {
 	return 0;
 }
+
+SEC("?fentry/__rcu_read_lock")
+int BPF_PROG(tracing_deny)
+{
+	return 0;
+}
+
+SEC("?fexit/do_exit")
+int BPF_PROG(fexit_noreturns)
+{
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tracing_struct.c b/tools/testing/selftests/bpf/progs/tracing_struct.c
index c435a3a8328a..d460732e2023 100644
--- a/tools/testing/selftests/bpf/progs/tracing_struct.c
+++ b/tools/testing/selftests/bpf/progs/tracing_struct.c
@@ -18,6 +18,18 @@ struct bpf_testmod_struct_arg_3 {
 	int b[];
 };
 
+union bpf_testmod_union_arg_1 {
+	char a;
+	short b;
+	struct bpf_testmod_struct_arg_1 arg;
+};
+
+union bpf_testmod_union_arg_2 {
+	int a;
+	long b;
+	struct bpf_testmod_struct_arg_2 arg;
+};
+
 long t1_a_a, t1_a_b, t1_b, t1_c, t1_ret, t1_nregs;
 __u64 t1_reg0, t1_reg1, t1_reg2, t1_reg3;
 long t2_a, t2_b_a, t2_b_b, t2_c, t2_ret;
@@ -26,6 +38,9 @@ long t4_a_a, t4_b, t4_c, t4_d, t4_e_a, t4_e_b, t4_ret;
 long t5_ret;
 int t6;
 
+long ut1_a_a, ut1_b, ut1_c;
+long ut2_a, ut2_b_a, ut2_b_b;
+
 SEC("fentry/bpf_testmod_test_struct_arg_1")
 int BPF_PROG2(test_struct_arg_1, struct bpf_testmod_struct_arg_2, a, int, b, int, c)
 {
@@ -130,4 +145,22 @@ int BPF_PROG2(test_struct_arg_11, struct bpf_testmod_struct_arg_3 *, a)
 	return 0;
 }
 
+SEC("fexit/bpf_testmod_test_union_arg_1")
+int BPF_PROG2(test_union_arg_1, union bpf_testmod_union_arg_1, a, int, b, int, c)
+{
+	ut1_a_a = a.arg.a;
+	ut1_b = b;
+	ut1_c = c;
+	return 0;
+}
+
+SEC("fexit/bpf_testmod_test_union_arg_2")
+int BPF_PROG2(test_union_arg_2, int, a, union bpf_testmod_union_arg_2, b)
+{
+	ut2_a = a;
+	ut2_b_a = b.arg.a;
+	ut2_b_b = b.arg.b;
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 044a6d78923e..2898b3749d07 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -42,12 +42,14 @@ int bench_trigger_uprobe_multi(void *ctx)
 const volatile int batch_iters = 0;
 
 SEC("?raw_tp")
-int trigger_count(void *ctx)
+int trigger_kernel_count(void *ctx)
 {
 	int i;
 
-	for (i = 0; i < batch_iters; i++)
+	for (i = 0; i < batch_iters; i++) {
 		inc_counter();
+		bpf_get_numa_node_id();
+	}
 
 	return 0;
 }
@@ -97,6 +99,12 @@ int bench_trigger_kprobe_multi(void *ctx)
 	return 0;
 }
 
+SEC("?kprobe.multi/bpf_get_numa_node_id")
+int bench_kprobe_multi_empty(void *ctx)
+{
+	return 0;
+}
+
 SEC("?kretprobe.multi/bpf_get_numa_node_id")
 int bench_trigger_kretprobe_multi(void *ctx)
 {
@@ -104,6 +112,12 @@ int bench_trigger_kretprobe_multi(void *ctx)
 	return 0;
 }
 
+SEC("?kretprobe.multi/bpf_get_numa_node_id")
+int bench_kretprobe_multi_empty(void *ctx)
+{
+	return 0;
+}
+
 SEC("?fentry/bpf_get_numa_node_id")
 int bench_trigger_fentry(void *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/uninit_stack.c b/tools/testing/selftests/bpf/progs/uninit_stack.c
index 8a403470e557..046a204c8fc6 100644
--- a/tools/testing/selftests/bpf/progs/uninit_stack.c
+++ b/tools/testing/selftests/bpf/progs/uninit_stack.c
@@ -70,7 +70,8 @@ __naked int helper_uninit_to_misc(void *ctx)
 		r1 = r10;				\
 		r1 += -128;				\
 		r2 = 32;				\
-		call %[bpf_trace_printk];		\
+		r3 = 0;					\
+		call %[bpf_probe_read_user];		\
 		/* Call to dummy() forces print_verifier_state(..., true),	\
 		 * thus showing the stack state, matched by __msg().		\
 		 */					\
@@ -79,7 +80,7 @@ __naked int helper_uninit_to_misc(void *ctx)
 		exit;					\
 "
 		      :
-		      : __imm(bpf_trace_printk),
+		      : __imm(bpf_probe_read_user),
 			__imm(dummy)
 		      : __clobber_all);
 }
diff --git a/tools/testing/selftests/bpf/progs/unsupported_ops.c b/tools/testing/selftests/bpf/progs/unsupported_ops.c
index 9180365a3568..8aa2e0dd624e 100644
--- a/tools/testing/selftests/bpf/progs/unsupported_ops.c
+++ b/tools/testing/selftests/bpf/progs/unsupported_ops.c
@@ -4,7 +4,7 @@
 #include <vmlinux.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod.h"
+#include "../test_kmods/bpf_testmod.h"
 
 char _license[] SEC("license") = "GPL";
 
diff --git a/tools/testing/selftests/bpf/progs/uprobe_syscall.c b/tools/testing/selftests/bpf/progs/uprobe_syscall.c
index 8a4fa6c7ef59..e08c31669e5a 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_syscall.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_syscall.c
@@ -7,8 +7,8 @@ struct pt_regs regs;
 
 char _license[] SEC("license") = "GPL";
 
-SEC("uretprobe//proc/self/exe:uretprobe_regs_trigger")
-int uretprobe(struct pt_regs *ctx)
+SEC("uprobe")
+int probe(struct pt_regs *ctx)
 {
 	__builtin_memcpy(&regs, ctx, sizeof(regs));
 	return 0;
diff --git a/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c b/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c
index 0d7f1a7db2e2..915d38591bf6 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/usdt.bpf.h>
 #include <string.h>
 
 struct pt_regs regs;
@@ -8,10 +10,64 @@ struct pt_regs regs;
 char _license[] SEC("license") = "GPL";
 
 int executed = 0;
+int pid;
+
+SEC("uprobe")
+int BPF_UPROBE(test_uprobe)
+{
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	executed++;
+	return 0;
+}
+
+SEC("uretprobe")
+int BPF_URETPROBE(test_uretprobe)
+{
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	executed++;
+	return 0;
+}
+
+SEC("uprobe.multi")
+int test_uprobe_multi(struct pt_regs *ctx)
+{
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	executed++;
+	return 0;
+}
 
 SEC("uretprobe.multi")
-int test(struct pt_regs *regs)
+int test_uretprobe_multi(struct pt_regs *ctx)
+{
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	executed++;
+	return 0;
+}
+
+SEC("uprobe.session")
+int test_uprobe_session(struct pt_regs *ctx)
 {
-	executed = 1;
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	executed++;
+	return 0;
+}
+
+SEC("usdt")
+int test_usdt(struct pt_regs *ctx)
+{
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	executed++;
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/uretprobe_stack.c b/tools/testing/selftests/bpf/progs/uretprobe_stack.c
index 9fdcf396b8f4..a2951e2f1711 100644
--- a/tools/testing/selftests/bpf/progs/uretprobe_stack.c
+++ b/tools/testing/selftests/bpf/progs/uretprobe_stack.c
@@ -26,8 +26,8 @@ int usdt_len;
 SEC("uprobe//proc/self/exe:target_1")
 int BPF_UPROBE(uprobe_1)
 {
-	/* target_1 is recursive wit depth of 2, so we capture two separate
-	 * stack traces, depending on which occurence it is
+	/* target_1 is recursive with depth of 2, so we capture two separate
+	 * stack traces, depending on which occurrence it is
 	 */
 	static bool recur = false;
 
diff --git a/tools/testing/selftests/bpf/progs/verifier_and.c b/tools/testing/selftests/bpf/progs/verifier_and.c
index e97e518516b6..2b4fdca162be 100644
--- a/tools/testing/selftests/bpf/progs/verifier_and.c
+++ b/tools/testing/selftests/bpf/progs/verifier_and.c
@@ -85,8 +85,14 @@ l0_%=:	r0 = r0;					\
 
 SEC("socket")
 __description("check known subreg with unknown reg")
-__success __failure_unpriv __msg_unpriv("R1 !read_ok")
+__success __success_unpriv
 __retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if w0 < 0x1 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R1 !read_ok'` */
+__xlated_unpriv("goto pc-1") /* `r1 = *(u32*)(r1 + 512)`, sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
 __naked void known_subreg_with_unknown_reg(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
index 67509c5d3982..7f4827eede3c 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
@@ -3,6 +3,7 @@
 
 #define BPF_NO_KFUNC_PROTOTYPES
 #include <vmlinux.h>
+#include <errno.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_misc.h"
@@ -114,6 +115,111 @@ int basic_alloc3(void *ctx)
 	return 0;
 }
 
+SEC("syscall")
+__success __retval(0)
+int basic_reserve1(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+	char __arena *page;
+	int ret;
+
+	page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+	if (!page)
+		return 1;
+
+	page += __PAGE_SIZE;
+
+	/* Reserve the second page */
+	ret = bpf_arena_reserve_pages(&arena, page, 1);
+	if (ret)
+		return 2;
+
+	/* Try to explicitly allocate the reserved page. */
+	page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
+	if (page)
+		return 3;
+
+	/* Try to implicitly allocate the page (since there's only 2 of them). */
+	page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+	if (page)
+		return 4;
+#endif
+	return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int basic_reserve2(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+	char __arena *page;
+	int ret;
+
+	page = arena_base(&arena);
+	ret = bpf_arena_reserve_pages(&arena, page, 1);
+	if (ret)
+		return 1;
+
+	page = bpf_arena_alloc_pages(&arena, page, 1, NUMA_NO_NODE, 0);
+	if ((u64)page)
+		return 2;
+#endif
+	return 0;
+}
+
+/* Reserve the same page twice, should return -EBUSY. */
+SEC("syscall")
+__success __retval(0)
+int reserve_twice(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+	char __arena *page;
+	int ret;
+
+	page = arena_base(&arena);
+
+	ret = bpf_arena_reserve_pages(&arena, page, 1);
+	if (ret)
+		return 1;
+
+	ret = bpf_arena_reserve_pages(&arena, page, 1);
+	if (ret != -EBUSY)
+		return 2;
+#endif
+	return 0;
+}
+
+/* Try to reserve past the end of the arena. */
+SEC("syscall")
+__success __retval(0)
+int reserve_invalid_region(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+	char __arena *page;
+	int ret;
+
+	/* Try a NULL pointer. */
+	ret = bpf_arena_reserve_pages(&arena, NULL, 3);
+	if (ret != -EINVAL)
+		return 1;
+
+	page = arena_base(&arena);
+
+	ret = bpf_arena_reserve_pages(&arena, page, 3);
+	if (ret != -EINVAL)
+		return 2;
+
+	ret = bpf_arena_reserve_pages(&arena, page, 4096);
+	if (ret != -EINVAL)
+		return 3;
+
+	ret = bpf_arena_reserve_pages(&arena, page, (1ULL << 32) - 1);
+	if (ret != -EINVAL)
+		return 4;
+#endif
+	return 0;
+}
+
 SEC("iter.s/bpf_map")
 __success __log_level(2)
 int iter_maps1(struct bpf_iter__bpf_map *ctx)
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
index f94f30cf1bb8..f19e15400b3e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
@@ -67,6 +67,104 @@ int big_alloc1(void *ctx)
 	return 0;
 }
 
+/* Try to access a reserved page. Behavior should be identical with accessing unallocated pages. */
+SEC("syscall")
+__success __retval(0)
+int access_reserved(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+	volatile char __arena *page;
+	char __arena *base;
+	const size_t len = 4;
+	int ret, i;
+
+	/* Get a separate region of the arena. */
+	page = base = arena_base(&arena) + 16384 * PAGE_SIZE;
+
+	ret = bpf_arena_reserve_pages(&arena, base, len);
+	if (ret)
+		return 1;
+
+	/* Try to dirty reserved memory. */
+	for (i = 0; i < len && can_loop; i++)
+		*page = 0x5a;
+
+	for (i = 0; i < len && can_loop; i++) {
+		page = (volatile char __arena *)(base + i * PAGE_SIZE);
+
+		/*
+		 * Error out in case either the write went through,
+		 * or the address has random garbage.
+		 */
+		if (*page == 0x5a)
+			return 2 + 2 * i;
+
+		if (*page)
+			return 2 + 2 * i + 1;
+	}
+#endif
+	return 0;
+}
+
+/* Try to allocate a region overlapping with a reservation. */
+SEC("syscall")
+__success __retval(0)
+int request_partially_reserved(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+	volatile char __arena *page;
+	char __arena *base;
+	int ret;
+
+	/* Add an arbitrary page offset. */
+	page = base = arena_base(&arena) + 4096 * __PAGE_SIZE;
+
+	ret = bpf_arena_reserve_pages(&arena, base + 3 * __PAGE_SIZE, 4);
+	if (ret)
+		return 1;
+
+	page = bpf_arena_alloc_pages(&arena, base, 5, NUMA_NO_NODE, 0);
+	if ((u64)page != 0ULL)
+		return 2;
+#endif
+	return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int free_reserved(void *ctx)
+{
+#if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+	char __arena *addr;
+	char __arena *page;
+	int ret;
+
+	/* Add an arbitrary page offset. */
+	addr = arena_base(&arena) + 32768 * __PAGE_SIZE;
+
+	page = bpf_arena_alloc_pages(&arena, addr, 2, NUMA_NO_NODE, 0);
+	if (!page)
+		return 1;
+
+	ret = bpf_arena_reserve_pages(&arena, addr + 2 * __PAGE_SIZE, 2);
+	if (ret)
+		return 2;
+
+	/*
+	 * Reserved and allocated pages should be interchangeable for
+	 * bpf_arena_free_pages(). Free a reserved and an allocated
+	 * page with a single call.
+	 */
+	bpf_arena_free_pages(&arena, addr + __PAGE_SIZE , 2);
+
+	/* The free call above should have succeeded, so this allocation should too. */
+	page = bpf_arena_alloc_pages(&arena, addr + __PAGE_SIZE, 2, NUMA_NO_NODE, 0);
+	if (!page)
+		return 3;
+#endif
+	return 0;
+}
+
 #if defined(__BPF_FEATURE_ADDR_SPACE_CAST)
 #define PAGE_CNT 100
 __u8 __arena * __arena page[PAGE_CNT]; /* occupies the first page */
@@ -142,6 +240,7 @@ int big_alloc2(void *ctx)
 			return 5;
 		bpf_arena_free_pages(&arena, (void __arena *)pg, 2);
 		page[i] = NULL;
+		barrier();
 		page[i + 1] = NULL;
 		cond_break;
 	}
diff --git a/tools/testing/selftests/bpf/progs/verifier_array_access.c b/tools/testing/selftests/bpf/progs/verifier_array_access.c
index 4195aa824ba5..0a187ff725cc 100644
--- a/tools/testing/selftests/bpf/progs/verifier_array_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_array_access.c
@@ -29,6 +29,20 @@ struct {
 } map_array_wo SEC(".maps");
 
 struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, struct test_val);
+} map_array_pcpu SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, struct test_val);
+} map_array SEC(".maps");
+
+struct {
 	__uint(type, BPF_MAP_TYPE_HASH);
 	__uint(max_entries, 1);
 	__type(key, long long);
@@ -525,4 +539,193 @@ l0_%=:	exit;						\
 	: __clobber_all);
 }
 
+SEC("socket")
+__description("valid map access into an array using constant without nullness")
+__success __retval(4) __log_level(2)
+__msg("mark_precise: frame0: regs= stack=-8 before {{[0-9]}}: ({{[a-f0-9]+}}) *(u32 *)(r10 -8) = {{(1|r[0-9])}}")
+unsigned int an_array_with_a_constant_no_nullness(void)
+{
+	/* Need 8-byte alignment for spill tracking */
+	__u32 __attribute__((aligned(8))) key = 1;
+	struct test_val *val;
+
+	val = bpf_map_lookup_elem(&map_array, &key);
+	val->index = offsetof(struct test_val, foo);
+
+	return val->index;
+}
+
+SEC("socket")
+__description("valid multiple map access into an array using constant without nullness")
+__success __retval(8) __log_level(2)
+__msg("mark_precise: frame0: regs= stack=-8 before {{[0-9]}}: ({{[a-f0-9]+}}) *(u32 *)(r10 -16) = {{(0|r[0-9])}}")
+__msg("mark_precise: frame0: regs= stack=-8 before {{[0-9]}}: ({{[a-f0-9]+}}) *(u32 *)(r10 -8) = {{(1|r[0-9])}}")
+unsigned int multiple_array_with_a_constant_no_nullness(void)
+{
+	__u32 __attribute__((aligned(8))) key = 1;
+	__u32 __attribute__((aligned(8))) key2 = 0;
+	struct test_val *val, *val2;
+
+	val = bpf_map_lookup_elem(&map_array, &key);
+	val->index = offsetof(struct test_val, foo);
+
+	val2 = bpf_map_lookup_elem(&map_array, &key2);
+	val2->index = offsetof(struct test_val, foo);
+
+	return val->index + val2->index;
+}
+
+SEC("socket")
+__description("valid map access into an array using natural aligned 32-bit constant 0 without nullness")
+__success __retval(4)
+unsigned int an_array_with_a_32bit_constant_0_no_nullness(void)
+{
+	/* Unlike the above tests, 32-bit zeroing is precisely tracked even
+	 * if writes are not aligned to BPF_REG_SIZE. This tests that our
+	 * STACK_ZERO handling functions.
+	 */
+	struct test_val *val;
+	__u32 key = 0;
+
+	val = bpf_map_lookup_elem(&map_array, &key);
+	val->index = offsetof(struct test_val, foo);
+
+	return val->index;
+}
+
+SEC("socket")
+__description("valid map access into a pcpu array using constant without nullness")
+__success __retval(4) __log_level(2)
+__msg("mark_precise: frame0: regs= stack=-8 before {{[0-9]}}: ({{[a-f0-9]+}}) *(u32 *)(r10 -8) = {{(1|r[0-9])}}")
+unsigned int a_pcpu_array_with_a_constant_no_nullness(void)
+{
+	__u32 __attribute__((aligned(8))) key = 1;
+	struct test_val *val;
+
+	val = bpf_map_lookup_elem(&map_array_pcpu, &key);
+	val->index = offsetof(struct test_val, foo);
+
+	return val->index;
+}
+
+SEC("socket")
+__description("invalid map access into an array using constant without nullness")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+unsigned int an_array_with_a_constant_no_nullness_out_of_bounds(void)
+{
+	/* Out of bounds */
+	__u32 __attribute__((aligned(8))) key = 3;
+	struct test_val *val;
+
+	val = bpf_map_lookup_elem(&map_array, &key);
+	val->index = offsetof(struct test_val, foo);
+
+	return val->index;
+}
+
+SEC("socket")
+__description("invalid map access into an array using constant smaller than key_size")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+unsigned int an_array_with_a_constant_too_small(void)
+{
+	__u32 __attribute__((aligned(8))) key;
+	struct test_val *val;
+
+	/* Mark entire key as STACK_MISC */
+	bpf_probe_read_user(&key, sizeof(key), NULL);
+
+	/* Spilling only the bottom byte results in a tnum const of 1.
+	 * We want to check that the verifier rejects it, as the spill is < 4B.
+	 */
+	*(__u8 *)&key = 1;
+	val = bpf_map_lookup_elem(&map_array, &key);
+
+	/* Should fail, as verifier cannot prove in-bound lookup */
+	val->index = offsetof(struct test_val, foo);
+
+	return val->index;
+}
+
+SEC("socket")
+__description("invalid map access into an array using constant larger than key_size")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+unsigned int an_array_with_a_constant_too_big(void)
+{
+	struct test_val *val;
+	__u64 key = 1;
+
+	/* Even if the constant value is < max_entries, if the spill size is
+	 * larger than the key size, the set bits may not be where we expect them
+	 * to be on different endian architectures.
+	 */
+	val = bpf_map_lookup_elem(&map_array, &key);
+	val->index = offsetof(struct test_val, foo);
+
+	return val->index;
+}
+
+SEC("socket")
+__description("invalid elided lookup using const and non-const key")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+unsigned int mixed_const_and_non_const_key_lookup(void)
+{
+	__u32 __attribute__((aligned(8))) key;
+	struct test_val *val;
+	__u32 rand;
+
+	rand = bpf_get_prandom_u32();
+	key = rand > 42 ? 1 : rand;
+	val = bpf_map_lookup_elem(&map_array, &key);
+
+	return val->index;
+}
+
+SEC("socket")
+__failure __msg("invalid read from stack R2 off=4096 size=4")
+__naked void key_lookup_at_invalid_fp(void)
+{
+	asm volatile ("					\
+	r1 = %[map_array] ll;				\
+	r2 = r10;					\
+	r2 += 4096;					\
+	call %[bpf_map_lookup_elem];			\
+	r0 = *(u64*)(r0 + 0);				\
+	exit;						\
+"	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm_addr(map_array)
+	: __clobber_all);
+}
+
+volatile __u32 __attribute__((aligned(8))) global_key;
+
+SEC("socket")
+__description("invalid elided lookup using non-stack key")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+unsigned int non_stack_key_lookup(void)
+{
+	struct test_val *val;
+
+	global_key = 1;
+	val = bpf_map_lookup_elem(&map_array, (void *)&global_key);
+	val->index = offsetof(struct test_val, foo);
+
+	return val->index;
+}
+
+SEC("socket")
+__description("doesn't reject UINT64_MAX as s64 for irrelevant maps")
+__success __retval(42)
+unsigned int doesnt_reject_irrelevant_maps(void)
+{
+	__u64 key = 0xFFFFFFFFFFFFFFFF;
+	struct test_val *val;
+
+	val = bpf_map_lookup_elem(&map_hash_48b, &key);
+	if (val)
+		return val->index;
+
+	return 42;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c b/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
new file mode 100644
index 000000000000..7efa9521105e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_async_cb_context.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Timer tests */
+
+struct timer_elem {
+	struct bpf_timer t;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct timer_elem);
+} timer_map SEC(".maps");
+
+static int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+	u32 data;
+	/* Timer callbacks are never sleepable, even from non-sleepable programs */
+	bpf_copy_from_user(&data, sizeof(data), NULL);
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+__failure __msg("helper call might sleep in a non-sleepable prog")
+int timer_non_sleepable_prog(void *ctx)
+{
+	struct timer_elem *val;
+	int key = 0;
+
+	val = bpf_map_lookup_elem(&timer_map, &key);
+	if (!val)
+		return 0;
+
+	bpf_timer_init(&val->t, &timer_map, 0);
+	bpf_timer_set_callback(&val->t, timer_cb);
+	return 0;
+}
+
+SEC("lsm.s/file_open")
+__failure __msg("helper call might sleep in a non-sleepable prog")
+int timer_sleepable_prog(void *ctx)
+{
+	struct timer_elem *val;
+	int key = 0;
+
+	val = bpf_map_lookup_elem(&timer_map, &key);
+	if (!val)
+		return 0;
+
+	bpf_timer_init(&val->t, &timer_map, 0);
+	bpf_timer_set_callback(&val->t, timer_cb);
+	return 0;
+}
+
+/* Workqueue tests */
+
+struct wq_elem {
+	struct bpf_wq w;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct wq_elem);
+} wq_map SEC(".maps");
+
+static int wq_cb(void *map, int *key, void *value)
+{
+	u32 data;
+	/* Workqueue callbacks are always sleepable, even from non-sleepable programs */
+	bpf_copy_from_user(&data, sizeof(data), NULL);
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+__success
+int wq_non_sleepable_prog(void *ctx)
+{
+	struct wq_elem *val;
+	int key = 0;
+
+	val = bpf_map_lookup_elem(&wq_map, &key);
+	if (!val)
+		return 0;
+
+	if (bpf_wq_init(&val->w, &wq_map, 0) != 0)
+		return 0;
+	if (bpf_wq_set_callback_impl(&val->w, wq_cb, 0, NULL) != 0)
+		return 0;
+	return 0;
+}
+
+SEC("lsm.s/file_open")
+__success
+int wq_sleepable_prog(void *ctx)
+{
+	struct wq_elem *val;
+	int key = 0;
+
+	val = bpf_map_lookup_elem(&wq_map, &key);
+	if (!val)
+		return 0;
+
+	if (bpf_wq_init(&val->w, &wq_map, 0) != 0)
+		return 0;
+	if (bpf_wq_set_callback_impl(&val->w, wq_cb, 0, NULL) != 0)
+		return 0;
+	return 0;
+}
+
+/* Task work tests */
+
+struct task_work_elem {
+	struct bpf_task_work tw;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, struct task_work_elem);
+} task_work_map SEC(".maps");
+
+static int task_work_cb(struct bpf_map *map, void *key, void *value)
+{
+	u32 data;
+	/* Task work callbacks are always sleepable, even from non-sleepable programs */
+	bpf_copy_from_user(&data, sizeof(data), NULL);
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+__success
+int task_work_non_sleepable_prog(void *ctx)
+{
+	struct task_work_elem *val;
+	struct task_struct *task;
+	int key = 0;
+
+	val = bpf_map_lookup_elem(&task_work_map, &key);
+	if (!val)
+		return 0;
+
+	task = bpf_get_current_task_btf();
+	if (!task)
+		return 0;
+
+	bpf_task_work_schedule_resume_impl(task, &val->tw, &task_work_map, task_work_cb, NULL);
+	return 0;
+}
+
+SEC("lsm.s/file_open")
+__success
+int task_work_sleepable_prog(void *ctx)
+{
+	struct task_work_elem *val;
+	struct task_struct *task;
+	int key = 0;
+
+	val = bpf_map_lookup_elem(&task_work_map, &key);
+	if (!val)
+		return 0;
+
+	task = bpf_get_current_task_btf();
+	if (!task)
+		return 0;
+
+	bpf_task_work_schedule_resume_impl(task, &val->tw, &task_work_map, task_work_cb, NULL);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
index 8d77cc5323d3..fb62e09f2114 100644
--- a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
@@ -28,7 +28,7 @@ __naked void stack_out_of_bounds(void)
 SEC("socket")
 __description("uninitialized stack1")
 __success __log_level(4) __msg("stack depth 8")
-__failure_unpriv __msg_unpriv("invalid indirect read from stack")
+__failure_unpriv __msg_unpriv("invalid read from stack")
 __naked void uninitialized_stack1(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c
index 7c881bca9af5..8bcddadfc4da 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c
@@ -32,18 +32,18 @@ int BPF_PROG(no_destroy, struct bpf_iter_meta *meta, struct cgroup *cgrp)
 
 SEC("iter/cgroup")
 __description("uninitialized iter in ->next()")
-__failure __msg("expected an initialized iter_bits as arg #1")
+__failure __msg("expected an initialized iter_bits as arg #0")
 int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp)
 {
-	struct bpf_iter_bits *it = NULL;
+	struct bpf_iter_bits it = {};
 
-	bpf_iter_bits_next(it);
+	bpf_iter_bits_next(&it);
 	return 0;
 }
 
 SEC("iter/cgroup")
 __description("uninitialized iter in ->destroy()")
-__failure __msg("expected an initialized iter_bits as arg #1")
+__failure __msg("expected an initialized iter_bits as arg #0")
 int BPF_PROG(destroy_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp)
 {
 	struct bpf_iter_bits it = {};
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index a0bb7fb40ea5..411a18437d7e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -2,6 +2,7 @@
 /* Converted from tools/testing/selftests/bpf/verifier/bounds.c */
 
 #include <linux/bpf.h>
+#include <../../../include/linux/filter.h>
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
 
@@ -620,8 +621,14 @@ l1_%=:	exit;						\
 
 SEC("socket")
 __description("bounds check mixed 32bit and 64bit arithmetic. test1")
-__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'")
+__success __success_unpriv
 __retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("exit")
+#endif
 __naked void _32bit_and_64bit_arithmetic_test1(void)
 {
 	asm volatile ("					\
@@ -643,8 +650,14 @@ l1_%=:	exit;						\
 
 SEC("socket")
 __description("bounds check mixed 32bit and 64bit arithmetic. test2")
-__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'")
+__success __success_unpriv
 __retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 invalid mem access 'scalar'` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("exit")
+#endif
 __naked void _32bit_and_64bit_arithmetic_test2(void)
 {
 	asm volatile ("					\
@@ -691,9 +704,14 @@ l0_%=:	r0 = 0;						\
 
 SEC("socket")
 __description("bounds check for reg = 0, reg xor 1")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
 __retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r1 != 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
 __naked void reg_0_reg_xor_1(void)
 {
 	asm volatile ("					\
@@ -719,9 +737,14 @@ l1_%=:	r0 = 0;						\
 
 SEC("socket")
 __description("bounds check for reg32 = 0, reg32 xor 1")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
 __retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if w1 != 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
 __naked void reg32_0_reg32_xor_1(void)
 {
 	asm volatile ("					\
@@ -747,9 +770,14 @@ l1_%=:	r0 = 0;						\
 
 SEC("socket")
 __description("bounds check for reg = 2, reg xor 3")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
 __retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r1 > 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
 __naked void reg_2_reg_xor_3(void)
 {
 	asm volatile ("					\
@@ -829,9 +857,14 @@ l1_%=:	r0 = 0;						\
 
 SEC("socket")
 __description("bounds check for reg > 0, reg xor 3")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
 __retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r1 >= 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
 __naked void reg_0_reg_xor_3(void)
 {
 	asm volatile ("					\
@@ -858,9 +891,14 @@ l1_%=:	r0 = 0;						\
 
 SEC("socket")
 __description("bounds check for reg32 > 0, reg32 xor 3")
-__success __failure_unpriv
-__msg_unpriv("R0 min value is outside of the allowed memory range")
+__success __success_unpriv
 __retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if w1 >= 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 min value is outside of the allowed memory range` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("r0 = 0")
+#endif
 __naked void reg32_0_reg32_xor_3(void)
 {
 	asm volatile ("					\
@@ -888,7 +926,7 @@ l1_%=:	r0 = 0;						\
 SEC("socket")
 __description("bounds check for non const xor src dst")
 __success __log_level(2)
-__msg("5: (af) r0 ^= r6                      ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
+__msg("5: (af) r0 ^= r6                      ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
 __naked void non_const_xor_src_dst(void)
 {
 	asm volatile ("					\
@@ -909,7 +947,7 @@ __naked void non_const_xor_src_dst(void)
 SEC("socket")
 __description("bounds check for non const or src dst")
 __success __log_level(2)
-__msg("5: (4f) r0 |= r6                      ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
+__msg("5: (4f) r0 |= r6                      ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
 __naked void non_const_or_src_dst(void)
 {
 	asm volatile ("					\
@@ -930,7 +968,7 @@ __naked void non_const_or_src_dst(void)
 SEC("socket")
 __description("bounds check for non const mul regs")
 __success __log_level(2)
-__msg("5: (2f) r0 *= r6                      ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=3825,var_off=(0x0; 0xfff))")
+__msg("5: (2f) r0 *= r6                      ; R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=3825,var_off=(0x0; 0xfff))")
 __naked void non_const_mul_regs(void)
 {
 	asm volatile ("					\
@@ -1028,7 +1066,7 @@ l0_%=:	r0 = 0;						\
 SEC("xdp")
 __description("bound check with JMP_JSLT for crossing 64-bit signed boundary")
 __success __retval(0)
-__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */
+__flag(BPF_F_TEST_REG_INVARIANTS)
 __naked void crossing_64_bit_signed_boundary_2(void)
 {
 	asm volatile ("					\
@@ -1200,4 +1238,629 @@ l0_%=:	r0 = 0;						\
 	: __clobber_all);
 }
 
+SEC("tc")
+__description("multiply mixed sign bounds. test 1")
+__success __log_level(2)
+__msg("r6 *= r7 {{.*}}; R6=scalar(smin=umin=0x1bc16d5cd4927ee1,smax=umax=0x1bc16d674ec80000,smax32=0x7ffffeff,umax32=0xfffffeff,var_off=(0x1bc16d4000000000; 0x3ffffffeff))")
+__naked void mult_mixed0_sign(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"r6 = r0;"
+	"call %[bpf_get_prandom_u32];"
+	"r7 = r0;"
+	"r6 &= 0xf;"
+	"r6 -= 1000000000;"
+	"r7 &= 0xf;"
+	"r7 -= 2000000000;"
+	"r6 *= r7;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_skb_store_bytes)
+	: __clobber_all);
+}
+
+SEC("tc")
+__description("multiply mixed sign bounds. test 2")
+__success __log_level(2)
+__msg("r6 *= r7 {{.*}}; R6=scalar(smin=smin32=-100,smax=smax32=200)")
+__naked void mult_mixed1_sign(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"r6 = r0;"
+	"call %[bpf_get_prandom_u32];"
+	"r7 = r0;"
+	"r6 &= 0xf;"
+	"r6 -= 0xa;"
+	"r7 &= 0xf;"
+	"r7 -= 0x14;"
+	"r6 *= r7;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_skb_store_bytes)
+	: __clobber_all);
+}
+
+SEC("tc")
+__description("multiply negative bounds")
+__success __log_level(2)
+__msg("r6 *= r7 {{.*}}; R6=scalar(smin=umin=smin32=umin32=0x3ff280b0,smax=umax=smax32=umax32=0x3fff0001,var_off=(0x3ff00000; 0xf81ff))")
+__naked void mult_sign_bounds(void)
+{
+	asm volatile (
+	"r8 = 0x7fff;"
+	"call %[bpf_get_prandom_u32];"
+	"r6 = r0;"
+	"call %[bpf_get_prandom_u32];"
+	"r7 = r0;"
+	"r6 &= 0xa;"
+	"r6 -= r8;"
+	"r7 &= 0xf;"
+	"r7 -= r8;"
+	"r6 *= r7;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_skb_store_bytes)
+	: __clobber_all);
+}
+
+SEC("tc")
+__description("multiply bounds that don't cross signed boundary")
+__success __log_level(2)
+__msg("r8 *= r6 {{.*}}; R6=scalar(smin=smin32=0,smax=umax=smax32=umax32=11,var_off=(0x0; 0xb)) R8=scalar(smin=0,smax=umax=0x7b96bb0a94a3a7cd,var_off=(0x0; 0x7fffffffffffffff))")
+__naked void mult_no_sign_crossing(void)
+{
+	asm volatile (
+	"r6 = 0xb;"
+	"r8 = 0xb3c3f8c99262687 ll;"
+	"call %[bpf_get_prandom_u32];"
+	"r7 = r0;"
+	"r6 &= r7;"
+	"r8 *= r6;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_skb_store_bytes)
+	: __clobber_all);
+}
+
+SEC("tc")
+__description("multiplication overflow, result in unbounded reg. test 1")
+__success __log_level(2)
+__msg("r6 *= r7 {{.*}}; R6=scalar()")
+__naked void mult_unsign_ovf(void)
+{
+	asm volatile (
+	"r8 = 0x7ffffffffff ll;"
+	"call %[bpf_get_prandom_u32];"
+	"r6 = r0;"
+	"call %[bpf_get_prandom_u32];"
+	"r7 = r0;"
+	"r6 &= 0x7fffffff;"
+	"r7 &= r8;"
+	"r6 *= r7;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_skb_store_bytes)
+	: __clobber_all);
+}
+
+SEC("tc")
+__description("multiplication overflow, result in unbounded reg. test 2")
+__success __log_level(2)
+__msg("r6 *= r7 {{.*}}; R6=scalar()")
+__naked void mult_sign_ovf(void)
+{
+	asm volatile (
+	"r8 = 0x7ffffffff ll;"
+	"call %[bpf_get_prandom_u32];"
+	"r6 = r0;"
+	"call %[bpf_get_prandom_u32];"
+	"r7 = r0;"
+	"r6 &= 0xa;"
+	"r6 -= r8;"
+	"r7 &= 0x7fffffff;"
+	"r6 *= r7;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_skb_store_bytes)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit addition, all outcomes overflow")
+__success __log_level(2)
+__msg("5: (0f) r3 += r3 {{.*}} R3=scalar(umin=0x4000000000000000,umax=0xfffffffffffffffe)")
+__retval(0)
+__naked void add64_full_overflow(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"r4 = r0;"
+	"r3 = 0xa000000000000000 ll;"
+	"r3 |= r4;"
+	"r3 += r3;"
+	"r0 = 0;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit addition, partial overflow, result in unbounded reg")
+__success __log_level(2)
+__msg("4: (0f) r3 += r3 {{.*}} R3=scalar()")
+__retval(0)
+__naked void add64_partial_overflow(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"r4 = r0;"
+	"r3 = 2;"
+	"r3 |= r4;"
+	"r3 += r3;"
+	"r0 = 0;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit addition overflow, all outcomes overflow")
+__success __log_level(2)
+__msg("4: (0c) w3 += w3 {{.*}} R3=scalar(smin=umin=umin32=0x40000000,smax=umax=umax32=0xfffffffe,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void add32_full_overflow(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"w4 = w0;"
+	"w3 = 0xa0000000;"
+	"w3 |= w4;"
+	"w3 += w3;"
+	"r0 = 0;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit addition, partial overflow, result in unbounded u32 bounds")
+__success __log_level(2)
+__msg("4: (0c) w3 += w3 {{.*}} R3=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void add32_partial_overflow(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"w4 = w0;"
+	"w3 = 2;"
+	"w3 |= w4;"
+	"w3 += w3;"
+	"r0 = 0;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit subtraction, all outcomes underflow")
+__success __log_level(2)
+__msg("6: (1f) r3 -= r1 {{.*}} R3=scalar(umin=1,umax=0x8000000000000000)")
+__retval(0)
+__naked void sub64_full_overflow(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"r1 = r0;"
+	"r2 = 0x8000000000000000 ll;"
+	"r1 |= r2;"
+	"r3 = 0;"
+	"r3 -= r1;"
+	"r0 = 0;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("64-bit subtraction, partial overflow, result in unbounded reg")
+__success __log_level(2)
+__msg("3: (1f) r3 -= r2 {{.*}} R3=scalar()")
+__retval(0)
+__naked void sub64_partial_overflow(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"r3 = r0;"
+	"r2 = 1;"
+	"r3 -= r2;"
+	"r0 = 0;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit subtraction overflow, all outcomes underflow")
+__success __log_level(2)
+__msg("5: (1c) w3 -= w1 {{.*}} R3=scalar(smin=umin=umin32=1,smax=umax=umax32=0x80000000,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void sub32_full_overflow(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"w1 = w0;"
+	"w2 = 0x80000000;"
+	"w1 |= w2;"
+	"w3 = 0;"
+	"w3 -= w1;"
+	"r0 = 0;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("32-bit subtraction, partial overflow, result in unbounded u32 bounds")
+__success __log_level(2)
+__msg("3: (1c) w3 -= w2 {{.*}} R3=scalar(smin=0,smax=umax=0xffffffff,var_off=(0x0; 0xffffffff))")
+__retval(0)
+__naked void sub32_partial_overflow(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"w3 = w0;"
+	"w2 = 1;"
+	"w3 -= w2;"
+	"r0 = 0;"
+	"exit"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("dead branch on jset, does not result in invariants violation error")
+__success __log_level(2)
+__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_range_analysis(void)
+{
+	asm volatile ("			\
+	call %[bpf_get_netns_cookie];	\
+	if r0 == 0 goto l0_%=;		\
+	if r0 & 0xffffffff goto +0; 	\
+l0_%=:	r0 = 0;				\
+	exit;				\
+"	:
+	: __imm(bpf_get_netns_cookie)
+	: __clobber_all);
+}
+
+/* This test covers the bounds deduction on 64bits when the s64 and u64 ranges
+ * overlap on the negative side. At instruction 7, the ranges look as follows:
+ *
+ * 0          umin=0xfffffcf1                 umax=0xff..ff6e  U64_MAX
+ * |                [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx]        |
+ * |----------------------------|------------------------------|
+ * |xxxxxxxxxx]                                   [xxxxxxxxxxxx|
+ * 0    smax=0xeffffeee                       smin=-655        -1
+ *
+ * We should therefore deduce the following new bounds:
+ *
+ * 0                             u64=[0xff..ffd71;0xff..ff6e]  U64_MAX
+ * |                                              [xxx]        |
+ * |----------------------------|------------------------------|
+ * |                                              [xxx]        |
+ * 0                                        s64=[-655;-146]    -1
+ *
+ * Without the deduction cross sign boundary, we end up with an invariant
+ * violation error.
+ */
+SEC("socket")
+__description("bounds deduction cross sign boundary, negative overlap")
+__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS)
+__msg("7: (1f) r0 -= r6 {{.*}} R0=scalar(smin=smin32=-655,smax=smax32=-146,umin=0xfffffffffffffd71,umax=0xffffffffffffff6e,umin32=0xfffffd71,umax32=0xffffff6e,var_off=(0xfffffffffffffc00; 0x3ff))")
+__retval(0)
+__naked void bounds_deduct_negative_overlap(void)
+{
+	asm volatile("			\
+	call %[bpf_get_prandom_u32];	\
+	w3 = w0;			\
+	w6 = (s8)w0;			\
+	r0 = (s8)r0;			\
+	if w6 >= 0xf0000000 goto l0_%=;	\
+	r0 += r6;			\
+	r6 += 400;			\
+	r0 -= r6;			\
+	if r3 < r0 goto l0_%=;		\
+l0_%=:	r0 = 0;				\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/* This test covers the bounds deduction on 64bits when the s64 and u64 ranges
+ * overlap on the positive side. At instruction 3, the ranges look as follows:
+ *
+ * 0 umin=0                      umax=0xffffffffffffff00       U64_MAX
+ * [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx]            |
+ * |----------------------------|------------------------------|
+ * |xxxxxxxx]                                         [xxxxxxxx|
+ * 0      smax=127                                smin=-128    -1
+ *
+ * We should therefore deduce the following new bounds:
+ *
+ * 0  u64=[0;127]                                              U64_MAX
+ * [xxxxxxxx]                                                  |
+ * |----------------------------|------------------------------|
+ * [xxxxxxxx]                                                  |
+ * 0  s64=[0;127]                                              -1
+ *
+ * Without the deduction cross sign boundary, the program is rejected due to
+ * the frame pointer write.
+ */
+SEC("socket")
+__description("bounds deduction cross sign boundary, positive overlap")
+__success __log_level(2) __flag(BPF_F_TEST_REG_INVARIANTS)
+__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=0,smax=umax=smax32=umax32=127,var_off=(0x0; 0x7f))")
+__retval(0)
+__naked void bounds_deduct_positive_overlap(void)
+{
+	asm volatile("			\
+	call %[bpf_get_prandom_u32];	\
+	r0 = (s8)r0;			\
+	r1 = 0xffffffffffffff00;	\
+	if r0 > r1 goto l0_%=;		\
+	if r0 < 128 goto l0_%=;		\
+	r10 = 0;			\
+l0_%=:	r0 = 0;				\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/* This test is the same as above, but the s64 and u64 ranges overlap in two
+ * places. At instruction 3, the ranges look as follows:
+ *
+ * 0 umin=0                           umax=0xffffffffffffff80  U64_MAX
+ * [xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx]        |
+ * |----------------------------|------------------------------|
+ * |xxxxxxxx]                                         [xxxxxxxx|
+ * 0      smax=127                                smin=-128    -1
+ *
+ * 0xffffffffffffff80 = (u64)-128. We therefore can't deduce anything new and
+ * the program should fail due to the frame pointer write.
+ */
+SEC("socket")
+__description("bounds deduction cross sign boundary, two overlaps")
+__failure __flag(BPF_F_TEST_REG_INVARIANTS)
+__msg("3: (2d) if r0 > r1 {{.*}} R0=scalar(smin=smin32=-128,smax=smax32=127,umax=0xffffffffffffff80)")
+__msg("frame pointer is read only")
+__naked void bounds_deduct_two_overlaps(void)
+{
+	asm volatile("			\
+	call %[bpf_get_prandom_u32];	\
+	r0 = (s8)r0;			\
+	r1 = 0xffffffffffffff80;	\
+	if r0 > r1 goto l0_%=;		\
+	if r0 < 128 goto l0_%=;		\
+	r10 = 0;			\
+l0_%=:	r0 = 0;				\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("dead jne branch due to disagreeing tnums")
+__success __log_level(2)
+__naked void jne_disagreeing_tnums(void *ctx)
+{
+	asm volatile("			\
+	call %[bpf_get_prandom_u32];	\
+	w0 = w0;			\
+	r0 >>= 30;			\
+	r0 <<= 30;			\
+	r1 = r0;			\
+	r1 += 1024;			\
+	if r1 != r0 goto +1;		\
+	r10 = 0;			\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("dead jeq branch due to disagreeing tnums")
+__success __log_level(2)
+__naked void jeq_disagreeing_tnums(void *ctx)
+{
+	asm volatile("			\
+	call %[bpf_get_prandom_u32];	\
+	w0 = w0;			\
+	r0 >>= 30;			\
+	r0 <<= 30;			\
+	r1 = r0;			\
+	r1 += 1024;			\
+	if r1 == r0 goto +1;		\
+	exit;				\
+	r10 = 0;			\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("conditional jump on same register, branch taken")
+__not_msg("20: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void condition_jump_on_same_register(void *ctx)
+{
+	asm volatile("			\
+	call %[bpf_get_prandom_u32];	\
+	w8 = 0x80000000;		\
+	r0 &= r8;			\
+	if r0 == r0 goto +1;		\
+	goto l1_%=;			\
+	if r0 >= r0 goto +1;		\
+	goto l1_%=;			\
+	if r0 s>= r0 goto +1;		\
+	goto l1_%=;			\
+	if r0 <= r0 goto +1;		\
+	goto l1_%=;			\
+	if r0 s<= r0 goto +1;		\
+	goto l1_%=;			\
+	if r0 != r0 goto l1_%=;		\
+	if r0 >  r0 goto l1_%=;		\
+	if r0 s> r0 goto l1_%=;		\
+	if r0 <  r0 goto l1_%=;		\
+	if r0 s< r0 goto l1_%=;		\
+l0_%=:	r0 = 0;				\
+	exit;				\
+l1_%=:	r0 = 1;				\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, constant value branch taken")
+__not_msg("7: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_1(void *ctx)
+{
+	asm volatile("			\
+	r0 = 0;				\
+	if r0 & r0 goto l1_%=;		\
+	r0 = 1;				\
+	if r0 & r0 goto +1;		\
+	goto l1_%=;			\
+l0_%=:	r0 = 0;				\
+	exit;				\
+l1_%=:	r0 = 1;				\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, scalar value branch taken")
+__not_msg("12: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__retval(0) __flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_2(void *ctx)
+{
+	asm volatile("			\
+	/* range [1;2] */		\
+	call %[bpf_get_prandom_u32];	\
+	r0 &= 0x1;			\
+	r0 += 1;			\
+	if r0 & r0 goto +1;		\
+	goto l1_%=;			\
+	/* range [-2;-1] */		\
+	call %[bpf_get_prandom_u32];	\
+	r0 &= 0x1;			\
+	r0 -= 2;			\
+	if r0 & r0 goto +1;		\
+	goto l1_%=;			\
+l0_%=:	r0 = 0;				\
+	exit;				\
+l1_%=:	r0 = 1;				\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, scalar value unknown branch 1")
+__msg("3: (b7) r0 = 0 {{.*}} R0=0")
+__msg("5: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_3(void *ctx)
+{
+	asm volatile("			\
+	/* range [0;1] */		\
+	call %[bpf_get_prandom_u32];	\
+	r0 &= 0x1;			\
+	if r0 & r0 goto l1_%=;		\
+l0_%=:	r0 = 0;				\
+	exit;				\
+l1_%=:	r0 = 1;				\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, scalar value unknown branch 2")
+__msg("4: (b7) r0 = 0 {{.*}} R0=0")
+__msg("6: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_4(void *ctx)
+{
+	asm volatile("			\
+	/* range [-1;0] */		\
+	call %[bpf_get_prandom_u32];	\
+	r0 &= 0x1;			\
+	r0 -= 1;			\
+	if r0 & r0 goto l1_%=;		\
+l0_%=:	r0 = 0;				\
+	exit;				\
+l1_%=:	r0 = 1;				\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("jset on same register, scalar value unknown branch 3")
+__msg("4: (b7) r0 = 0 {{.*}} R0=0")
+__msg("6: (b7) r0 = 1 {{.*}} R0=1")
+__success __log_level(2)
+__flag(BPF_F_TEST_REG_INVARIANTS)
+__naked void jset_on_same_register_5(void *ctx)
+{
+	asm volatile("			\
+	/* range [-1;1] */		\
+	call %[bpf_get_prandom_u32];	\
+	r0 &= 0x2;			\
+	r0 -= 1;			\
+	if r0 & r0 goto l1_%=;		\
+l0_%=:	r0 = 0;				\
+	exit;				\
+l1_%=:	r0 = 1;				\
+	exit;				\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
index c506afbdd936..260a6df264e3 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
@@ -159,13 +159,16 @@ __failure_unpriv
 __naked void deducing_bounds_from_const_10(void)
 {
 	asm volatile ("					\
+	r6 = r1;					\
 	r0 = 0;						\
 	if r0 s<= 0 goto l0_%=;				\
-l0_%=:	/* Marks reg as unknown. */			\
-	r0 = -r0;					\
-	r0 -= r1;					\
+l0_%=: /* Marks r0 as unknown. */			\
+	call %[bpf_get_prandom_u32];			\
+	r0 -= r6;					\
 	exit;						\
-"	::: __clobber_all);
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
 }
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
index 5094c288cfd7..fb4fa465d67c 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_fastcall.c
@@ -12,7 +12,7 @@ SEC("raw_tp")
 __arch_x86_64
 __log_level(4) __msg("stack depth 8")
 __xlated("4: r5 = 5")
-__xlated("5: w0 = ")
+__xlated("5: r0 = ")
 __xlated("6: r0 = &(void __percpu *)(r0)")
 __xlated("7: r0 = *(u32 *)(r0 +0)")
 __xlated("8: exit")
@@ -620,23 +620,66 @@ __naked void helper_call_does_not_prevent_bpf_fastcall(void)
 
 SEC("raw_tp")
 __arch_x86_64
-__log_level(4) __msg("stack depth 16")
-/* may_goto counter at -16 */
-__xlated("0: *(u64 *)(r10 -16) =")
-__xlated("1: r1 = 1")
+__log_level(4) __msg("stack depth 24")
+/* may_goto counter at -24 */
+__xlated("0: *(u64 *)(r10 -24) =")
+/* may_goto timestamp at -16 */
+__xlated("1: *(u64 *)(r10 -16) =")
+__xlated("2: r1 = 1")
 __xlated("...")
-__xlated("3: r0 = &(void __percpu *)(r0)")
+__xlated("4: r0 = &(void __percpu *)(r0)")
 __xlated("...")
 /* may_goto expansion starts */
-__xlated("5: r11 = *(u64 *)(r10 -16)")
-__xlated("6: if r11 == 0x0 goto pc+3")
-__xlated("7: r11 -= 1")
-__xlated("8: *(u64 *)(r10 -16) = r11")
+__xlated("6: r11 = *(u64 *)(r10 -24)")
+__xlated("7: if r11 == 0x0 goto pc+6")
+__xlated("8: r11 -= 1")
+__xlated("9: if r11 != 0x0 goto pc+2")
+__xlated("10: r11 = -24")
+__xlated("11: call unknown")
+__xlated("12: *(u64 *)(r10 -24) = r11")
 /* may_goto expansion ends */
-__xlated("9: *(u64 *)(r10 -8) = r1")
-__xlated("10: exit")
+__xlated("13: *(u64 *)(r10 -8) = r1")
+__xlated("14: exit")
+__success
+__naked void may_goto_interaction_x86_64(void)
+{
+	asm volatile (
+	"r1 = 1;"
+	"*(u64 *)(r10 - 16) = r1;"
+	"call %[bpf_get_smp_processor_id];"
+	"r1 = *(u64 *)(r10 - 16);"
+	".8byte %[may_goto];"
+	/* just touch some stack at -8 */
+	"*(u64 *)(r10 - 8) = r1;"
+	"exit;"
+	:
+	: __imm(bpf_get_smp_processor_id),
+	  __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, +1 /* offset */, 0))
+	: __clobber_all);
+}
+
+SEC("raw_tp")
+__arch_arm64
+__log_level(4) __msg("stack depth 24")
+/* may_goto counter at -24 */
+__xlated("0: *(u64 *)(r10 -24) =")
+/* may_goto timestamp at -16 */
+__xlated("1: *(u64 *)(r10 -16) =")
+__xlated("2: r1 = 1")
+__xlated("3: call bpf_get_smp_processor_id")
+/* may_goto expansion starts */
+__xlated("4: r11 = *(u64 *)(r10 -24)")
+__xlated("5: if r11 == 0x0 goto pc+6")
+__xlated("6: r11 -= 1")
+__xlated("7: if r11 != 0x0 goto pc+2")
+__xlated("8: r11 = -24")
+__xlated("9: call unknown")
+__xlated("10: *(u64 *)(r10 -24) = r11")
+/* may_goto expansion ends */
+__xlated("11: *(u64 *)(r10 -8) = r1")
+__xlated("12: exit")
 __success
-__naked void may_goto_interaction(void)
+__naked void may_goto_interaction_arm64(void)
 {
 	asm volatile (
 	"r1 = 1;"
@@ -666,7 +709,7 @@ SEC("raw_tp")
 __arch_x86_64
 __log_level(4) __msg("stack depth 32+0")
 __xlated("2: r1 = 1")
-__xlated("3: w0 =")
+__xlated("3: r0 =")
 __xlated("4: r0 = &(void __percpu *)(r0)")
 __xlated("5: r0 = *(u32 *)(r0 +0)")
 /* bpf_loop params setup */
@@ -715,7 +758,7 @@ __arch_x86_64
 __log_level(4) __msg("stack depth 40+0")
 /* call bpf_get_smp_processor_id */
 __xlated("2: r1 = 42")
-__xlated("3: w0 =")
+__xlated("3: r0 =")
 __xlated("4: r0 = &(void __percpu *)(r0)")
 __xlated("5: r0 = *(u32 *)(r0 +0)")
 /* call bpf_get_prandom_u32 */
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c
new file mode 100644
index 000000000000..35e2cdc00a01
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_trap.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#if __clang_major__ >= 21 && 0
+SEC("socket")
+__description("__builtin_trap with simple c code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+void bpf_builtin_trap_with_simple_c(void)
+{
+	__builtin_trap();
+}
+#endif
+
+SEC("socket")
+__description("__bpf_trap with simple c code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+void bpf_trap_with_simple_c(void)
+{
+	__bpf_trap();
+}
+
+SEC("socket")
+__description("__bpf_trap as the second-from-last insn")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+__naked void bpf_trap_at_func_end(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"call %[__bpf_trap];"
+	"exit;"
+	:
+	: __imm(__bpf_trap)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("dead code __bpf_trap in the middle of code")
+__success
+__naked void dead_bpf_trap_in_middle(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"if r0 == 0 goto +1;"
+	"call %[__bpf_trap];"
+	"r0 = 2;"
+	"exit;"
+	:
+	: __imm(__bpf_trap)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("reachable __bpf_trap in the middle of code")
+__failure __msg("unexpected __bpf_trap() due to uninitialized variable?")
+__naked void live_bpf_trap_in_middle(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"if r0 == 1 goto +1;"
+	"call %[__bpf_trap];"
+	"r0 = 2;"
+	"exit;"
+	:
+	: __imm(__bpf_trap)
+	: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
index a570e48b917a..03942cec07e5 100644
--- a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
@@ -11,7 +11,7 @@ __success __retval(0)
 __naked void btf_ctx_access_accept(void)
 {
 	asm volatile ("					\
-	r2 = *(u32*)(r1 + 8);		/* load 2nd argument value (int pointer) */\
+	r2 = *(u64 *)(r1 + 8);		/* load 2nd argument value (int pointer) */\
 	r0 = 0;						\
 	exit;						\
 "	::: __clobber_all);
@@ -23,7 +23,55 @@ __success __retval(0)
 __naked void ctx_access_u32_pointer_accept(void)
 {
 	asm volatile ("					\
-	r2 = *(u32*)(r1 + 0);		/* load 1nd argument value (u32 pointer) */\
+	r2 = *(u64 *)(r1 + 0);		/* load 1nd argument value (u32 pointer) */\
+	r0 = 0;						\
+	exit;						\
+"	::: __clobber_all);
+}
+
+SEC("fentry/bpf_fentry_test9")
+__description("btf_ctx_access u32 pointer reject u32")
+__failure __msg("size 4 must be 8")
+__naked void ctx_access_u32_pointer_reject_32(void)
+{
+	asm volatile ("					\
+	r2 = *(u32 *)(r1 + 0);		/* load 1st argument with narrow load */\
+	r0 = 0;						\
+	exit;						\
+"	::: __clobber_all);
+}
+
+SEC("fentry/bpf_fentry_test9")
+__description("btf_ctx_access u32 pointer reject u16")
+__failure __msg("size 2 must be 8")
+__naked void ctx_access_u32_pointer_reject_16(void)
+{
+	asm volatile ("					\
+	r2 = *(u16 *)(r1 + 0);		/* load 1st argument with narrow load */\
+	r0 = 0;						\
+	exit;						\
+"	::: __clobber_all);
+}
+
+SEC("fentry/bpf_fentry_test9")
+__description("btf_ctx_access u32 pointer reject u8")
+__failure __msg("size 1 must be 8")
+__naked void ctx_access_u32_pointer_reject_8(void)
+{
+	asm volatile ("					\
+	r2 = *(u8 *)(r1 + 0);		/* load 1st argument with narrow load */\
+	r0 = 0;						\
+	exit;						\
+"	::: __clobber_all);
+}
+
+SEC("fentry/bpf_fentry_test10")
+__description("btf_ctx_access const void pointer accept")
+__success __retval(0)
+__naked void ctx_access_const_void_pointer_accept(void)
+{
+	asm volatile ("					\
+	r2 = *(u64 *)(r1 + 0);		/* load 1st argument value (const void pointer) */\
 	r0 = 0;						\
 	exit;						\
 "	::: __clobber_all);
diff --git a/tools/testing/selftests/bpf/progs/verifier_const_or.c b/tools/testing/selftests/bpf/progs/verifier_const_or.c
index ba8922b2eebd..68c568c3c3a0 100644
--- a/tools/testing/selftests/bpf/progs/verifier_const_or.c
+++ b/tools/testing/selftests/bpf/progs/verifier_const_or.c
@@ -25,7 +25,7 @@ __naked void constant_should_keep_constant_type(void)
 
 SEC("tracepoint")
 __description("constant register |= constant should not bypass stack boundary checks")
-__failure __msg("invalid indirect access to stack R1 off=-48 size=58")
+__failure __msg("invalid write to stack R1 off=-48 size=58")
 __naked void not_bypass_stack_boundary_checks_1(void)
 {
 	asm volatile ("					\
@@ -62,7 +62,7 @@ __naked void register_should_keep_constant_type(void)
 
 SEC("tracepoint")
 __description("constant register |= constant register should not bypass stack boundary checks")
-__failure __msg("invalid indirect access to stack R1 off=-48 size=58")
+__failure __msg("invalid write to stack R1 off=-48 size=58")
 __naked void not_bypass_stack_boundary_checks_2(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c
index a83809a1dbbf..5ebf7d9bcc55 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ctx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Converted from tools/testing/selftests/bpf/verifier/ctx.c */
 
-#include <linux/bpf.h>
+#include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
 
@@ -218,4 +218,78 @@ __naked void null_check_8_null_bind(void)
 	: __clobber_all);
 }
 
+#define narrow_load(type, ctx, field)					\
+	SEC(type)							\
+	__description("narrow load on field " #field " of " #ctx)	\
+	__failure __msg("invalid bpf_context access")			\
+	__naked void invalid_narrow_load##ctx##field(void)		\
+	{								\
+		asm volatile ("						\
+		r1 = *(u32 *)(r1 + %[off]);				\
+		r0 = 0;							\
+		exit;"							\
+		:							\
+		: __imm_const(off, offsetof(struct ctx, field) + 4)	\
+		: __clobber_all);					\
+	}
+
+narrow_load("cgroup/getsockopt", bpf_sockopt, sk);
+narrow_load("cgroup/getsockopt", bpf_sockopt, optval);
+narrow_load("cgroup/getsockopt", bpf_sockopt, optval_end);
+narrow_load("tc", __sk_buff, sk);
+narrow_load("cgroup/bind4", bpf_sock_addr, sk);
+narrow_load("sockops", bpf_sock_ops, sk);
+narrow_load("sockops", bpf_sock_ops, skb_data);
+narrow_load("sockops", bpf_sock_ops, skb_data_end);
+narrow_load("sockops", bpf_sock_ops, skb_hwtstamp);
+
+#define unaligned_access(type, ctx, field)					\
+	SEC(type)								\
+	__description("unaligned access on field " #field " of " #ctx)		\
+	__failure __msg("invalid bpf_context access")				\
+	__naked void unaligned_ctx_access_##ctx##field(void)			\
+	{									\
+		asm volatile ("							\
+		r1 = *(u%[size] *)(r1 + %[off]);				\
+		r0 = 0;								\
+		exit;"								\
+		:								\
+		: __imm_const(size, sizeof_field(struct ctx, field) * 8),	\
+		  __imm_const(off, offsetof(struct ctx, field) + 1)		\
+		: __clobber_all);						\
+	}
+
+unaligned_access("flow_dissector", __sk_buff, data);
+unaligned_access("netfilter", bpf_nf_ctx, skb);
+
+#define padding_access(type, ctx, prev_field, sz)			\
+	SEC(type)							\
+	__description("access on " #ctx " padding after " #prev_field)	\
+	__naked void padding_ctx_access_##ctx(void)			\
+	{								\
+		asm volatile ("						\
+		r1 = *(u%[size] *)(r1 + %[off]);			\
+		r0 = 0;							\
+		exit;"							\
+		:							\
+		: __imm_const(size, sz * 8),				\
+		  __imm_const(off, offsetofend(struct ctx, prev_field))	\
+		: __clobber_all);					\
+	}
+
+__failure __msg("invalid bpf_context access")
+padding_access("cgroup/bind4", bpf_sock_addr, msg_src_ip6[3], 4);
+
+__success
+padding_access("sk_lookup", bpf_sk_lookup, remote_port, 2);
+
+__failure __msg("invalid bpf_context access")
+padding_access("tc", __sk_buff, tstamp_type, 2);
+
+__failure __msg("invalid bpf_context access")
+padding_access("cgroup/post_bind4", bpf_sock, dst_port, 2);
+
+__failure __msg("invalid bpf_context access")
+padding_access("sk_reuseport", sk_reuseport_md, hash, 4);
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_d_path.c b/tools/testing/selftests/bpf/progs/verifier_d_path.c
index ec79cbcfde91..87e51a215558 100644
--- a/tools/testing/selftests/bpf/progs/verifier_d_path.c
+++ b/tools/testing/selftests/bpf/progs/verifier_d_path.c
@@ -11,7 +11,7 @@ __success __retval(0)
 __naked void d_path_accept(void)
 {
 	asm volatile ("					\
-	r1 = *(u32*)(r1 + 0);				\
+	r1 = *(u64 *)(r1 + 0);				\
 	r2 = r10;					\
 	r2 += -8;					\
 	r6 = 0;						\
@@ -31,7 +31,7 @@ __failure __msg("helper call is not allowed in probe")
 __naked void d_path_reject(void)
 {
 	asm volatile ("					\
-	r1 = *(u32*)(r1 + 0);				\
+	r1 = *(u64 *)(r1 + 0);				\
 	r2 = r10;					\
 	r2 += -8;					\
 	r6 = 0;						\
diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
index 28b602ac9cbe..911caa8fd1b7 100644
--- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
+++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Converted from tools/testing/selftests/bpf/verifier/direct_packet_access.c */
 
+#include <linux/if_ether.h>
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
@@ -800,4 +801,62 @@ l0_%=:	/* exit(0) */					\
 	: __clobber_all);
 }
 
+#define access_test_non_linear(name, type, desc, retval, linear_sz, off)			\
+	SEC(type)										\
+	__description("direct packet access: " #name " (non-linear, " type ", " desc ")")	\
+	__success __retval(retval)								\
+	__linear_size(linear_sz)								\
+	__naked void access_non_linear_##name(void)						\
+	{											\
+		asm volatile ("									\
+		r2 = *(u32*)(r1 + %[skb_data]);							\
+		r3 = *(u32*)(r1 + %[skb_data_end]);						\
+		r0 = r2;									\
+		r0 += %[offset];								\
+		if r0 > r3 goto l0_%=;								\
+		r0 = *(u8*)(r0 - 1);								\
+		r0 = 0;										\
+		exit;										\
+	l0_%=:	r0 = 1;										\
+		exit;										\
+	"	:										\
+		: __imm_const(skb_data, offsetof(struct __sk_buff, data)),			\
+		  __imm_const(skb_data_end, offsetof(struct __sk_buff, data_end)),		\
+		  __imm_const(offset, off)							\
+		: __clobber_all);								\
+	}
+
+access_test_non_linear(test31, "tc", "too short eth", 1, ETH_HLEN, 22);
+access_test_non_linear(test32, "tc", "too short 1", 1, 1, 22);
+access_test_non_linear(test33, "tc", "long enough", 0, 22, 22);
+access_test_non_linear(test34, "cgroup_skb/ingress", "too short eth", 1, ETH_HLEN, 8);
+access_test_non_linear(test35, "cgroup_skb/ingress", "too short 1", 1, 1, 8);
+access_test_non_linear(test36, "cgroup_skb/ingress", "long enough", 0, 22, 8);
+
+SEC("tc")
+__description("direct packet access: test37 (non-linear, linearized)")
+__success __retval(0)
+__linear_size(ETH_HLEN)
+__naked void access_non_linear_linearized(void)
+{
+	asm volatile ("				\
+	r6 = r1;				\
+	r2 = 22;				\
+	call %[bpf_skb_pull_data];		\
+	r2 = *(u32*)(r6 + %[skb_data]);		\
+	r3 = *(u32*)(r6 + %[skb_data_end]);	\
+	r0 = r2;				\
+	r0 += 22;				\
+	if r0 > r3 goto l0_%=;			\
+	r0 = *(u8*)(r0 - 1);			\
+	exit;					\
+l0_%=:	r0 = 1;					\
+	exit;					\
+"	:
+	: __imm(bpf_skb_pull_data),
+	  __imm_const(skb_data, offsetof(struct __sk_buff, data)),
+	  __imm_const(skb_data_end, offsetof(struct __sk_buff, data_end))
+	: __clobber_all);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
index 458984da804c..34e0c012ee76 100644
--- a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
+++ b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
@@ -77,7 +77,7 @@ l0_%=:	exit;						\
 
 SEC("tc")
 __description("MOD32 overflow, check 1")
-__success __retval(INT_MIN)
+__success __retval(_INT_MIN)
 __naked void mod32_overflow_check_1(void)
 {
 	asm volatile ("					\
@@ -92,7 +92,7 @@ __naked void mod32_overflow_check_1(void)
 
 SEC("tc")
 __description("MOD32 overflow, check 2")
-__success __retval(INT_MIN)
+__success __retval(_INT_MIN)
 __naked void mod32_overflow_check_2(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
index 4ab0ef18d7eb..1204fbc58178 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
@@ -179,4 +179,132 @@ int BPF_PROG(trusted_acq_rel, struct task_struct *task, u64 clone_flags)
 	return subprog_trusted_acq_rel(task);
 }
 
+__weak int subprog_untrusted_bad_tags(struct task_struct *task __arg_untrusted __arg_nullable)
+{
+	return task->pid;
+}
+
+SEC("tp_btf/sys_enter")
+__failure
+__msg("arg#0 untrusted cannot be combined with any other tags")
+int untrusted_bad_tags(void *ctx)
+{
+	return subprog_untrusted_bad_tags(0);
+}
+
+struct local_type_wont_be_accepted {};
+
+__weak int subprog_untrusted_bad_type(struct local_type_wont_be_accepted *p __arg_untrusted)
+{
+	return 0;
+}
+
+SEC("tp_btf/sys_enter")
+__failure
+__msg("arg#0 reference type('STRUCT local_type_wont_be_accepted') has no matches")
+int untrusted_bad_type(void *ctx)
+{
+	return subprog_untrusted_bad_type(bpf_rdonly_cast(0, 0));
+}
+
+__weak int subprog_untrusted(const volatile struct task_struct *restrict task __arg_untrusted)
+{
+	return task->pid;
+}
+
+SEC("tp_btf/sys_enter")
+__success
+__log_level(2)
+__msg("r1 = {{.*}}; {{.*}}R1=trusted_ptr_task_struct()")
+__msg("Func#1 ('subprog_untrusted') is global and assumed valid.")
+__msg("Validating subprog_untrusted() func#1...")
+__msg(": R1=untrusted_ptr_task_struct")
+int trusted_to_untrusted(void *ctx)
+{
+	return subprog_untrusted(bpf_get_current_task_btf());
+}
+
+char mem[16];
+u32 offset;
+
+SEC("tp_btf/sys_enter")
+__success
+int anything_to_untrusted(void *ctx)
+{
+	/* untrusted to untrusted */
+	subprog_untrusted(bpf_core_cast(0, struct task_struct));
+	/* wrong type to untrusted */
+	subprog_untrusted((void *)bpf_core_cast(0, struct bpf_verifier_env));
+	/* map value to untrusted */
+	subprog_untrusted((void *)mem);
+	/* scalar to untrusted */
+	subprog_untrusted(0);
+	/* variable offset to untrusted (map) */
+	subprog_untrusted((void *)mem + offset);
+	/* variable offset to untrusted (trusted) */
+	subprog_untrusted((void *)bpf_get_current_task_btf() + offset);
+	return 0;
+}
+
+__weak int subprog_untrusted2(struct task_struct *task __arg_untrusted)
+{
+	return subprog_trusted_task_nullable(task);
+}
+
+SEC("tp_btf/sys_enter")
+__failure
+__msg("R1 type=untrusted_ptr_ expected=ptr_, trusted_ptr_, rcu_ptr_")
+__msg("Caller passes invalid args into func#{{.*}} ('subprog_trusted_task_nullable')")
+int untrusted_to_trusted(void *ctx)
+{
+	return subprog_untrusted2(bpf_get_current_task_btf());
+}
+
+__weak int subprog_void_untrusted(void *p __arg_untrusted)
+{
+	return *(int *)p;
+}
+
+__weak int subprog_char_untrusted(char *p __arg_untrusted)
+{
+	return *(int *)p;
+}
+
+__weak int subprog_enum_untrusted(enum bpf_attach_type *p __arg_untrusted)
+{
+	return *(int *)p;
+}
+
+SEC("tp_btf/sys_enter")
+__success
+__log_level(2)
+__msg("r1 = {{.*}}; {{.*}}R1=trusted_ptr_task_struct()")
+__msg("Func#1 ('subprog_void_untrusted') is global and assumed valid.")
+__msg("Validating subprog_void_untrusted() func#1...")
+__msg(": R1=rdonly_untrusted_mem(sz=0)")
+int trusted_to_untrusted_mem(void *ctx)
+{
+	return subprog_void_untrusted(bpf_get_current_task_btf());
+}
+
+SEC("tp_btf/sys_enter")
+__success
+int anything_to_untrusted_mem(void *ctx)
+{
+	/* untrusted to untrusted mem */
+	subprog_void_untrusted(bpf_core_cast(0, struct task_struct));
+	/* map value to untrusted mem */
+	subprog_void_untrusted(mem);
+	/* scalar to untrusted mem */
+	subprog_void_untrusted(0);
+	/* variable offset to untrusted mem (map) */
+	subprog_void_untrusted((void *)mem + offset);
+	/* variable offset to untrusted mem (trusted) */
+	subprog_void_untrusted(bpf_get_current_task_btf() + offset);
+	/* variable offset to untrusted char/enum (map) */
+	subprog_char_untrusted(mem + offset);
+	subprog_enum_untrusted((void *)mem + offset);
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c
index 05a329ee45ee..d5d8f24df394 100644
--- a/tools/testing/selftests/bpf/progs/verifier_gotol.c
+++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c
@@ -4,11 +4,7 @@
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
 
-#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
-	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
-	defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
-	defined(__TARGET_ARCH_loongarch)) && \
-	__clang_major__ >= 18
+#ifdef CAN_USE_GOTOL
 
 SEC("socket")
 __description("gotol, small_imm")
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotox.c b/tools/testing/selftests/bpf/progs/verifier_gotox.c
new file mode 100644
index 000000000000..607dad058ca1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_gotox.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Isovalent */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "../../../include/linux/filter.h"
+
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
+
+#define DEFINE_SIMPLE_JUMP_TABLE_PROG(NAME, SRC_REG, OFF, IMM, OUTCOME)	\
+									\
+	SEC("socket")							\
+	OUTCOME								\
+	__naked void jump_table_ ## NAME(void)				\
+	{								\
+		asm volatile ("						\
+		.pushsection .jumptables,\"\",@progbits;		\
+	jt0_%=:								\
+		.quad ret0_%= - socket;					\
+		.quad ret1_%= - socket;					\
+		.size jt0_%=, 16;					\
+		.global jt0_%=;						\
+		.popsection;						\
+									\
+		r0 = jt0_%= ll;						\
+		r0 += 8;						\
+		r0 = *(u64 *)(r0 + 0);					\
+		.8byte %[gotox_r0];					\
+		ret0_%=:						\
+		r0 = 0;							\
+		exit;							\
+		ret1_%=:						\
+		r0 = 1;							\
+		exit;							\
+	"	:							\
+		: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, (SRC_REG), (OFF) , (IMM))) \
+		: __clobber_all);					\
+	}
+
+/*
+ * The first program which doesn't use reserved fields
+ * loads and works properly. The rest fail to load.
+ */
+DEFINE_SIMPLE_JUMP_TABLE_PROG(ok,                          BPF_REG_0, 0, 0, __success __retval(1))
+DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_src_reg,      BPF_REG_1, 0, 0, __failure __msg("BPF_JA|BPF_X uses reserved fields"))
+DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_non_zero_off, BPF_REG_0, 1, 0, __failure __msg("BPF_JA|BPF_X uses reserved fields"))
+DEFINE_SIMPLE_JUMP_TABLE_PROG(reserved_field_non_zero_imm, BPF_REG_0, 0, 1, __failure __msg("BPF_JA|BPF_X uses reserved fields"))
+
+/*
+ * Gotox is forbidden when there is no jump table loaded
+ * which points to the sub-function where the gotox is used
+ */
+SEC("socket")
+__failure __msg("no jump tables found for subprog starting at 0")
+__naked void jump_table_no_jump_table(void)
+{
+	asm volatile ("						\
+	.8byte %[gotox_r0];					\
+	r0 = 1;							\
+	exit;							\
+"	:							\
+	: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+	: __clobber_all);
+}
+
+/*
+ * Incorrect type of the target register, only PTR_TO_INSN allowed
+ */
+SEC("socket")
+__failure __msg("R1 has type scalar, expected PTR_TO_INSN")
+__naked void jump_table_incorrect_dst_reg_type(void)
+{
+	asm volatile ("						\
+	.pushsection .jumptables,\"\",@progbits;		\
+jt0_%=:								\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.size jt0_%=, 16;					\
+	.global jt0_%=;						\
+	.popsection;						\
+								\
+	r0 = jt0_%= ll;						\
+	r0 += 8;						\
+	r0 = *(u64 *)(r0 + 0);					\
+	r1 = 42;						\
+	.8byte %[gotox_r1];					\
+	ret0_%=:						\
+	r0 = 0;							\
+	exit;							\
+	ret1_%=:						\
+	r0 = 1;							\
+	exit;							\
+"	:							\
+	: __imm_insn(gotox_r1, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_1, 0, 0 , 0))
+	: __clobber_all);
+}
+
+#define DEFINE_INVALID_SIZE_PROG(READ_SIZE, OUTCOME)			\
+									\
+	SEC("socket")							\
+	OUTCOME								\
+	__naked void jump_table_invalid_read_size_ ## READ_SIZE(void)	\
+	{								\
+		asm volatile ("						\
+		.pushsection .jumptables,\"\",@progbits;		\
+	jt0_%=:								\
+		.quad ret0_%= - socket;					\
+		.quad ret1_%= - socket;					\
+		.size jt0_%=, 16;					\
+		.global jt0_%=;						\
+		.popsection;						\
+									\
+		r0 = jt0_%= ll;						\
+		r0 += 8;						\
+		r0 = *(" #READ_SIZE " *)(r0 + 0);			\
+		.8byte %[gotox_r0];					\
+		ret0_%=:						\
+		r0 = 0;							\
+		exit;							\
+		ret1_%=:						\
+		r0 = 1;							\
+		exit;							\
+	"	:							\
+		: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0)) \
+		: __clobber_all);					\
+	}
+
+DEFINE_INVALID_SIZE_PROG(u32, __failure __msg("Invalid read of 4 bytes from insn_array"))
+DEFINE_INVALID_SIZE_PROG(u16, __failure __msg("Invalid read of 2 bytes from insn_array"))
+DEFINE_INVALID_SIZE_PROG(u8,  __failure __msg("Invalid read of 1 bytes from insn_array"))
+
+SEC("socket")
+__failure __msg("misaligned value access off 0+1+0 size 8")
+__naked void jump_table_misaligned_access(void)
+{
+	asm volatile ("						\
+	.pushsection .jumptables,\"\",@progbits;		\
+jt0_%=:								\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.size jt0_%=, 16;					\
+	.global jt0_%=;						\
+	.popsection;						\
+								\
+	r0 = jt0_%= ll;						\
+	r0 += 1;						\
+	r0 = *(u64 *)(r0 + 0);					\
+	.8byte %[gotox_r0];					\
+	ret0_%=:						\
+	r0 = 0;							\
+	exit;							\
+	ret1_%=:						\
+	r0 = 1;							\
+	exit;							\
+"	:							\
+	: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+	: __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("invalid access to map value, value_size=16 off=24 size=8")
+__naked void jump_table_invalid_mem_acceess_pos(void)
+{
+	asm volatile ("						\
+	.pushsection .jumptables,\"\",@progbits;		\
+jt0_%=:								\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.size jt0_%=, 16;					\
+	.global jt0_%=;						\
+	.popsection;						\
+								\
+	r0 = jt0_%= ll;						\
+	r0 += 24;						\
+	r0 = *(u64 *)(r0 + 0);					\
+	.8byte %[gotox_r0];					\
+	ret0_%=:						\
+	r0 = 0;							\
+	exit;							\
+	ret1_%=:						\
+	r0 = 1;							\
+	exit;							\
+"	:							\
+	: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+	: __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("invalid access to map value, value_size=16 off=-24 size=8")
+__naked void jump_table_invalid_mem_acceess_neg(void)
+{
+	asm volatile ("						\
+	.pushsection .jumptables,\"\",@progbits;		\
+jt0_%=:								\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.size jt0_%=, 16;					\
+	.global jt0_%=;						\
+	.popsection;						\
+								\
+	r0 = jt0_%= ll;						\
+	r0 -= 24;						\
+	r0 = *(u64 *)(r0 + 0);					\
+	.8byte %[gotox_r0];					\
+	ret0_%=:						\
+	r0 = 0;							\
+	exit;							\
+	ret1_%=:						\
+	r0 = 1;							\
+	exit;							\
+"	:							\
+	: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+	: __clobber_all);
+}
+
+SEC("socket")
+__success __retval(1)
+__naked void jump_table_add_sub_ok(void)
+{
+	asm volatile ("						\
+	.pushsection .jumptables,\"\",@progbits;		\
+jt0_%=:								\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.size jt0_%=, 16;					\
+	.global jt0_%=;						\
+	.popsection;						\
+								\
+	r0 = jt0_%= ll;						\
+	r0 -= 24;						\
+	r0 += 32;						\
+	r0 = *(u64 *)(r0 + 0);					\
+	.8byte %[gotox_r0];					\
+	ret0_%=:						\
+	r0 = 0;							\
+	exit;							\
+	ret1_%=:						\
+	r0 = 1;							\
+	exit;							\
+"	:							\
+	: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+	: __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("write into map forbidden, value_size=16 off=8 size=8")
+__naked void jump_table_no_writes(void)
+{
+	asm volatile ("						\
+	.pushsection .jumptables,\"\",@progbits;		\
+jt0_%=:								\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.size jt0_%=, 16;					\
+	.global jt0_%=;						\
+	.popsection;						\
+								\
+	r0 = jt0_%= ll;						\
+	r0 += 8;						\
+	r1 = 0xbeef;						\
+	*(u64 *)(r0 + 0) = r1;					\
+	.8byte %[gotox_r0];					\
+	ret0_%=:						\
+	r0 = 0;							\
+	exit;							\
+	ret1_%=:						\
+	r0 = 1;							\
+	exit;							\
+"	:							\
+	: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+	: __clobber_all);
+}
+
+#define DEFINE_JUMP_TABLE_USE_REG(REG)					\
+	SEC("socket")							\
+	__success __retval(1)						\
+	__naked void jump_table_use_reg_r ## REG(void)			\
+	{								\
+		asm volatile ("						\
+		.pushsection .jumptables,\"\",@progbits;		\
+	jt0_%=:								\
+		.quad ret0_%= - socket;					\
+		.quad ret1_%= - socket;					\
+		.size jt0_%=, 16;					\
+		.global jt0_%=;						\
+		.popsection;						\
+									\
+		r0 = jt0_%= ll;						\
+		r0 += 8;						\
+		r" #REG " = *(u64 *)(r0 + 0);				\
+		.8byte %[gotox_rX];					\
+		ret0_%=:						\
+		r0 = 0;							\
+		exit;							\
+		ret1_%=:						\
+		r0 = 1;							\
+		exit;							\
+	"	:							\
+		: __imm_insn(gotox_rX, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_ ## REG, 0, 0 , 0)) \
+		: __clobber_all);					\
+	}
+
+DEFINE_JUMP_TABLE_USE_REG(0)
+DEFINE_JUMP_TABLE_USE_REG(1)
+DEFINE_JUMP_TABLE_USE_REG(2)
+DEFINE_JUMP_TABLE_USE_REG(3)
+DEFINE_JUMP_TABLE_USE_REG(4)
+DEFINE_JUMP_TABLE_USE_REG(5)
+DEFINE_JUMP_TABLE_USE_REG(6)
+DEFINE_JUMP_TABLE_USE_REG(7)
+DEFINE_JUMP_TABLE_USE_REG(8)
+DEFINE_JUMP_TABLE_USE_REG(9)
+
+__used static int test_subprog(void)
+{
+	return 0;
+}
+
+SEC("socket")
+__failure __msg("jump table for insn 4 points outside of the subprog [0,10]")
+__naked void jump_table_outside_subprog(void)
+{
+	asm volatile ("						\
+	.pushsection .jumptables,\"\",@progbits;		\
+jt0_%=:								\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.quad ret_out_%= - socket;				\
+	.size jt0_%=, 24;					\
+	.global jt0_%=;						\
+	.popsection;						\
+								\
+	r0 = jt0_%= ll;						\
+	r0 += 8;						\
+	r0 = *(u64 *)(r0 + 0);					\
+	.8byte %[gotox_r0];					\
+	ret0_%=:						\
+	r0 = 0;							\
+	exit;							\
+	ret1_%=:						\
+	r0 = 1;							\
+	call test_subprog;					\
+	exit;							\
+	ret_out_%=:						\
+"	:							\
+	: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+	: __clobber_all);
+}
+
+SEC("socket")
+__success __retval(1)
+__naked void jump_table_contains_non_unique_values(void)
+{
+	asm volatile ("						\
+	.pushsection .jumptables,\"\",@progbits;		\
+jt0_%=:								\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.quad ret0_%= - socket;					\
+	.quad ret1_%= - socket;					\
+	.size jt0_%=, 80;					\
+	.global jt0_%=;						\
+	.popsection;						\
+								\
+	r0 = jt0_%= ll;						\
+	r0 += 8;						\
+	r0 = *(u64 *)(r0 + 0);					\
+	.8byte %[gotox_r0];					\
+	ret0_%=:						\
+	r0 = 0;							\
+	exit;							\
+	ret1_%=:						\
+	r0 = 1;							\
+	exit;							\
+"	:							\
+	: __imm_insn(gotox_r0, BPF_RAW_INSN(BPF_JMP | BPF_JA | BPF_X, BPF_REG_0, 0, 0 , 0))
+	: __clobber_all);
+}
+
+#endif /* __TARGET_ARCH_x86 || __TARGET_ARCH_arm64 */
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c b/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c
index 50c6b22606f6..f2c54e4d89eb 100644
--- a/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c
@@ -67,7 +67,7 @@ SEC("socket")
 __description("helper access to variable memory: stack, bitwise AND, zero included")
 /* in privileged mode reads from uninitialized stack locations are permitted */
 __success __failure_unpriv
-__msg_unpriv("invalid indirect read from stack R2 off -64+0 size 64")
+__msg_unpriv("invalid read from stack R2 off -64+0 size 64")
 __retval(0)
 __naked void stack_bitwise_and_zero_included(void)
 {
@@ -100,7 +100,7 @@ __naked void stack_bitwise_and_zero_included(void)
 
 SEC("tracepoint")
 __description("helper access to variable memory: stack, bitwise AND + JMP, wrong max")
-__failure __msg("invalid indirect access to stack R1 off=-64 size=65")
+__failure __msg("invalid write to stack R1 off=-64 size=65")
 __naked void bitwise_and_jmp_wrong_max(void)
 {
 	asm volatile ("					\
@@ -187,7 +187,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("tracepoint")
 __description("helper access to variable memory: stack, JMP, bounds + offset")
-__failure __msg("invalid indirect access to stack R1 off=-64 size=65")
+__failure __msg("invalid write to stack R1 off=-64 size=65")
 __naked void memory_stack_jmp_bounds_offset(void)
 {
 	asm volatile ("					\
@@ -211,7 +211,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("tracepoint")
 __description("helper access to variable memory: stack, JMP, wrong max")
-__failure __msg("invalid indirect access to stack R1 off=-64 size=65")
+__failure __msg("invalid write to stack R1 off=-64 size=65")
 __naked void memory_stack_jmp_wrong_max(void)
 {
 	asm volatile ("					\
@@ -260,7 +260,7 @@ SEC("socket")
 __description("helper access to variable memory: stack, JMP, no min check")
 /* in privileged mode reads from uninitialized stack locations are permitted */
 __success __failure_unpriv
-__msg_unpriv("invalid indirect read from stack R2 off -64+0 size 64")
+__msg_unpriv("invalid read from stack R2 off -64+0 size 64")
 __retval(0)
 __naked void stack_jmp_no_min_check(void)
 {
@@ -750,7 +750,7 @@ SEC("socket")
 __description("helper access to variable memory: 8 bytes leak")
 /* in privileged mode reads from uninitialized stack locations are permitted */
 __success __failure_unpriv
-__msg_unpriv("invalid indirect read from stack R2 off -64+32 size 64")
+__msg_unpriv("invalid read from stack R2 off -64+32 size 64")
 __retval(0)
 __naked void variable_memory_8_bytes_leak(void)
 {
diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
index 5f2efb895edb..59e34d558654 100644
--- a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
@@ -96,7 +96,7 @@ __naked void arg_ptr_to_long_misaligned(void)
 
 SEC("cgroup/sysctl")
 __description("arg pointer to long size < sizeof(long)")
-__failure __msg("invalid indirect access to stack R4 off=-4 size=8")
+__failure __msg("invalid write to stack R4 off=-4 size=8")
 __naked void to_long_size_sizeof_long(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
index e54bb5385bc1..75dd922e4e9f 100644
--- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -407,11 +407,7 @@ l0_%=:	call %[bpf_jiffies64];		\
 	: __clobber_all);
 }
 
-#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
-	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
-	defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
-	defined(__TARGET_ARCH_loongarch)) && \
-	__clang_major__ >= 18
+#ifdef CAN_USE_GOTOL
 SEC("socket")
 __success __retval(0)
 __naked void gotol_and_may_goto(void)
diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
index 52edee41caf6..c8494b682c31 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ldsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
@@ -3,6 +3,7 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
+#include "bpf_arena_common.h"
 
 #if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
 	(defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
@@ -10,6 +11,12 @@
 	defined(__TARGET_ARCH_loongarch)) && \
 	__clang_major__ >= 18
 
+struct {
+	__uint(type, BPF_MAP_TYPE_ARENA);
+	__uint(map_flags, BPF_F_MMAPABLE);
+	__uint(max_entries, 1);
+} arena SEC(".maps");
+
 SEC("socket")
 __description("LDSX, S8")
 __success __success_unpriv __retval(-2)
@@ -65,7 +72,7 @@ __naked void ldsx_s32(void)
 SEC("socket")
 __description("LDSX, S8 range checking, privileged")
 __log_level(2) __success __retval(1)
-__msg("R1_w=scalar(smin=smin32=-128,smax=smax32=127)")
+__msg("R1=scalar(smin=smin32=-128,smax=smax32=127)")
 __naked void ldsx_s8_range_priv(void)
 {
 	asm volatile (
@@ -256,6 +263,175 @@ __naked void ldsx_ctx_8(void)
 	: __clobber_all);
 }
 
+SEC("syscall")
+__description("Arena LDSX Disasm")
+__success
+__arch_x86_64
+__jited("movslq	0x10(%rax,%r12), %r14")
+__jited("movswq	0x18(%rax,%r12), %r14")
+__jited("movsbq	0x20(%rax,%r12), %r14")
+__jited("movslq	0x10(%rdi,%r12), %r15")
+__jited("movswq	0x18(%rdi,%r12), %r15")
+__jited("movsbq	0x20(%rdi,%r12), %r15")
+__arch_arm64
+__jited("add	x11, x7, x28")
+__jited("ldrsw	x21, [x11, #0x10]")
+__jited("add	x11, x7, x28")
+__jited("ldrsh	x21, [x11, #0x18]")
+__jited("add	x11, x7, x28")
+__jited("ldrsb	x21, [x11, #0x20]")
+__jited("add	x11, x0, x28")
+__jited("ldrsw	x22, [x11, #0x10]")
+__jited("add	x11, x0, x28")
+__jited("ldrsh	x22, [x11, #0x18]")
+__jited("add	x11, x0, x28")
+__jited("ldrsb	x22, [x11, #0x20]")
+__naked void arena_ldsx_disasm(void *ctx)
+{
+	asm volatile (
+	"r1 = %[arena] ll;"
+	"r2 = 0;"
+	"r3 = 1;"
+	"r4 = %[numa_no_node];"
+	"r5 = 0;"
+	"call %[bpf_arena_alloc_pages];"
+	"r0 = addr_space_cast(r0, 0x0, 0x1);"
+	"r1 = r0;"
+	"r8 = *(s32 *)(r0 + 16);"
+	"r8 = *(s16 *)(r0 + 24);"
+	"r8 = *(s8  *)(r0 + 32);"
+	"r9 = *(s32 *)(r1 + 16);"
+	"r9 = *(s16 *)(r1 + 24);"
+	"r9 = *(s8  *)(r1 + 32);"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_arena_alloc_pages),
+	   __imm_addr(arena),
+	   __imm_const(numa_no_node, NUMA_NO_NODE)
+	:  __clobber_all
+	);
+}
+
+SEC("syscall")
+__description("Arena LDSX Exception")
+__success __retval(0)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_exception(void *ctx)
+{
+	asm volatile (
+	"r1 = %[arena] ll;"
+	"r0 = 0xdeadbeef;"
+	"r0 = addr_space_cast(r0, 0x0, 0x1);"
+	"r1 = 0x3fe;"
+	"*(u64 *)(r0 + 0) = r1;"
+	"r0 = *(s8 *)(r0 + 0);"
+	"exit;"
+	:
+	:  __imm_addr(arena)
+	:  __clobber_all
+	);
+}
+
+SEC("syscall")
+__description("Arena LDSX, S8")
+__success __retval(-1)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_s8(void *ctx)
+{
+	asm volatile (
+	"r1 = %[arena] ll;"
+	"r2 = 0;"
+	"r3 = 1;"
+	"r4 = %[numa_no_node];"
+	"r5 = 0;"
+	"call %[bpf_arena_alloc_pages];"
+	"r0 = addr_space_cast(r0, 0x0, 0x1);"
+	"r1 = 0x3fe;"
+	"*(u64 *)(r0 + 0) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	"r0 = *(s8 *)(r0 + 0);"
+#else
+	"r0 = *(s8 *)(r0 + 7);"
+#endif
+	"r0 >>= 1;"
+	"exit;"
+	:: __imm(bpf_arena_alloc_pages),
+	   __imm_addr(arena),
+	   __imm_const(numa_no_node, NUMA_NO_NODE)
+	:  __clobber_all
+	);
+}
+
+SEC("syscall")
+__description("Arena LDSX, S16")
+__success __retval(-1)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_s16(void *ctx)
+{
+	asm volatile (
+	"r1 = %[arena] ll;"
+	"r2 = 0;"
+	"r3 = 1;"
+	"r4 = %[numa_no_node];"
+	"r5 = 0;"
+	"call %[bpf_arena_alloc_pages];"
+	"r0 = addr_space_cast(r0, 0x0, 0x1);"
+	"r1 = 0x3fffe;"
+	"*(u64 *)(r0 + 0) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	"r0 = *(s16 *)(r0 + 0);"
+#else
+	"r0 = *(s16 *)(r0 + 6);"
+#endif
+	"r0 >>= 1;"
+	"exit;"
+	:: __imm(bpf_arena_alloc_pages),
+	   __imm_addr(arena),
+	   __imm_const(numa_no_node, NUMA_NO_NODE)
+	:  __clobber_all
+	);
+}
+
+SEC("syscall")
+__description("Arena LDSX, S32")
+__success __retval(-1)
+__arch_x86_64
+__arch_arm64
+__naked void arena_ldsx_s32(void *ctx)
+{
+	asm volatile (
+	"r1 = %[arena] ll;"
+	"r2 = 0;"
+	"r3 = 1;"
+	"r4 = %[numa_no_node];"
+	"r5 = 0;"
+	"call %[bpf_arena_alloc_pages];"
+	"r0 = addr_space_cast(r0, 0x0, 0x1);"
+	"r1 = 0xfffffffe;"
+	"*(u64 *)(r0 + 0) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	"r0 = *(s32 *)(r0 + 0);"
+#else
+	"r0 = *(s32 *)(r0 + 4);"
+#endif
+	"r0 >>= 1;"
+	"exit;"
+	:: __imm(bpf_arena_alloc_pages),
+	   __imm_addr(arena),
+	   __imm_const(numa_no_node, NUMA_NO_NODE)
+	:  __clobber_all
+	);
+}
+
+/* to retain debug info for BTF generation */
+void kfunc_root(void)
+{
+	bpf_arena_alloc_pages(0, 0, 0, 0, 0);
+}
+
 #else
 
 SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_live_stack.c b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
new file mode 100644
index 000000000000..2de105057bbc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, long long);
+} map SEC(".maps");
+
+SEC("socket")
+__log_level(2)
+__msg("(0) frame 0 insn 2 +written -8")
+__msg("(0) frame 0 insn 1 +live -24")
+__msg("(0) frame 0 insn 1 +written -8")
+__msg("(0) frame 0 insn 0 +live -8,-24")
+__msg("(0) frame 0 insn 0 +written -8")
+__msg("(0) live stack update done in 2 iterations")
+__naked void simple_read_simple_write(void)
+{
+	asm volatile (
+	"r1 = *(u64 *)(r10 - 8);"
+	"r2 = *(u64 *)(r10 - 24);"
+	"*(u64 *)(r10 - 8) = r1;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("(0) frame 0 insn 1 +live -8")
+__not_msg("(0) frame 0 insn 1 +written")
+__msg("(0) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 1 +live -16")
+__msg("(0) frame 0 insn 1 +written -32")
+__msg("(0) live stack update done in 2 iterations")
+__naked void read_write_join(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"if r0 > 42 goto 1f;"
+	"r0 = *(u64 *)(r10 - 8);"
+	"*(u64 *)(r10 - 32) = r0;"
+	"*(u64 *)(r10 - 40) = r0;"
+	"exit;"
+"1:"
+	"r0 = *(u64 *)(r10 - 16);"
+	"*(u64 *)(r10 - 32) = r0;"
+	"exit;"
+	:: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("2: (25) if r0 > 0x2a goto pc+1")
+__msg("7: (95) exit")
+__msg("(0) frame 0 insn 2 +written -16")
+__msg("(0) live stack update done in 2 iterations")
+__msg("7: (95) exit")
+__not_msg("(0) frame 0 insn 2")
+__msg("(0) live stack update done in 1 iterations")
+__naked void must_write_not_same_slot(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"r1 = -8;"
+	"if r0 > 42 goto 1f;"
+	"r1 = -16;"
+"1:"
+	"r2 = r10;"
+	"r2 += r1;"
+	"*(u64 *)(r2 + 0) = r0;"
+	"exit;"
+	:: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("(0) frame 0 insn 0 +written -8,-16")
+__msg("(0) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 0 +written -8")
+__msg("(0) live stack update done in 2 iterations")
+__naked void must_write_not_same_type(void)
+{
+	asm volatile (
+	"*(u64*)(r10 - 8) = 0;"
+	"r2 = r10;"
+	"r2 += -8;"
+	"r1 = %[map] ll;"
+	"call %[bpf_map_lookup_elem];"
+	"if r0 != 0 goto 1f;"
+	"r0 = r10;"
+	"r0 += -16;"
+"1:"
+	"*(u64 *)(r0 + 0) = 42;"
+	"exit;"
+	:
+        : __imm(bpf_get_prandom_u32),
+	  __imm(bpf_map_lookup_elem),
+	  __imm_addr(map)
+	: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("(2,4) frame 0 insn 4 +written -8")
+__msg("(2,4) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 2 +written -8")
+__msg("(0) live stack update done in 2 iterations")
+__naked void caller_stack_write(void)
+{
+	asm volatile (
+	"r1 = r10;"
+	"r1 += -8;"
+	"call write_first_param;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void write_first_param(void)
+{
+	asm volatile (
+	"*(u64 *)(r1 + 0) = 7;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+/* caller_stack_read() function */
+__msg("2: .12345.... (85) call pc+4")
+__msg("5: .12345.... (85) call pc+1")
+__msg("6: 0......... (95) exit")
+/* read_first_param() function */
+__msg("7: .1........ (79) r0 = *(u64 *)(r1 +0)")
+__msg("8: 0......... (95) exit")
+/* update for callsite at (2) */
+__msg("(2,7) frame 0 insn 7 +live -8")
+__msg("(2,7) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 2 +live -8")
+__msg("(0) live stack update done in 2 iterations")
+/* update for callsite at (5) */
+__msg("(5,7) frame 0 insn 7 +live -16")
+__msg("(5,7) live stack update done in 2 iterations")
+__msg("(0) frame 0 insn 5 +live -16")
+__msg("(0) live stack update done in 2 iterations")
+__naked void caller_stack_read(void)
+{
+	asm volatile (
+	"r1 = r10;"
+	"r1 += -8;"
+	"call read_first_param;"
+	"r1 = r10;"
+	"r1 += -16;"
+	"call read_first_param;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void read_first_param(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r1 + 0);"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__log_level(2)
+/* read_first_param2() function */
+__msg(" 9: .1........ (79) r0 = *(u64 *)(r1 +0)")
+__msg("10: .......... (b7) r0 = 0")
+__msg("11: 0......... (05) goto pc+0")
+__msg("12: 0......... (95) exit")
+/*
+ * The purpose of the test is to check that checkpoint in
+ * read_first_param2() stops path traversal. This will only happen if
+ * verifier understands that fp[0]-8 at insn (12) is not alive.
+ */
+__msg("12: safe")
+__msg("processed 20 insns")
+__naked void caller_stack_pruning(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"if r0 == 42 goto 1f;"
+	"r0 = %[map] ll;"
+"1:"
+	"*(u64 *)(r10 - 8) = r0;"
+	"r1 = r10;"
+	"r1 += -8;"
+	/*
+	 * fp[0]-8 is either pointer to map or a scalar,
+	 * preventing state pruning at checkpoint created for call.
+	 */
+	"call read_first_param2;"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm_addr(map)
+	: __clobber_all);
+}
+
+static __used __naked void read_first_param2(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r1 + 0);"
+	"r0 = 0;"
+	/*
+	 * Checkpoint at goto +0 should fire,
+	 * as caller stack fp[0]-8 is not alive at this point.
+	 */
+	"goto +0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure
+__msg("R1 type=scalar expected=map_ptr")
+__naked void caller_stack_pruning_callback(void)
+{
+	asm volatile (
+	"r0 = %[map] ll;"
+	"*(u64 *)(r10 - 8) = r0;"
+	"r1 = 2;"
+	"r2 = loop_cb ll;"
+	"r3 = r10;"
+	"r3 += -8;"
+	"r4 = 0;"
+	/*
+	 * fp[0]-8 is either pointer to map or a scalar,
+	 * preventing state pruning at checkpoint created for call.
+	 */
+	"call %[bpf_loop];"
+	"r0 = 42;"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_loop),
+	  __imm_addr(map)
+	: __clobber_all);
+}
+
+static __used __naked void loop_cb(void)
+{
+	asm volatile (
+	/*
+	 * Checkpoint at function entry should not fire, as caller
+	 * stack fp[0]-8 is alive at this point.
+	 */
+	"r6 = r2;"
+	"r1 = *(u64 *)(r6 + 0);"
+	"*(u64*)(r10 - 8) = 7;"
+	"r2 = r10;"
+	"r2 += -8;"
+	"call %[bpf_map_lookup_elem];"
+	/*
+	 * This should stop verifier on a second loop iteration,
+	 * but only if verifier correctly maintains that fp[0]-8
+	 * is still alive.
+	 */
+	"*(u64 *)(r6 + 0) = 0;"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/*
+ * Because of a bug in verifier.c:compute_postorder()
+ * the program below overflowed traversal queue in that function.
+ */
+SEC("socket")
+__naked void syzbot_postorder_bug1(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"if r0 != 0 goto -1;"
+	"exit;"
+	::: __clobber_all);
+}
+
+struct {
+        __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+        __uint(max_entries, 1);
+        __type(key, __u32);
+        __type(value, __u32);
+} map_array SEC(".maps");
+
+SEC("socket")
+__failure __msg("invalid read from stack R2 off=-1024 size=8")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked unsigned long caller_stack_write_tail_call(void)
+{
+        asm volatile (
+	"r6 = r1;"
+	"*(u64 *)(r10 - 8) = -8;"
+        "call %[bpf_get_prandom_u32];"
+        "if r0 != 42 goto 1f;"
+        "goto 2f;"
+  "1:"
+        "*(u64 *)(r10 - 8) = -1024;"
+  "2:"
+        "r1 = r6;"
+        "r2 = r10;"
+        "r2 += -8;"
+        "call write_tail_call;"
+        "r1 = *(u64 *)(r10 - 8);"
+        "r2 = r10;"
+        "r2 += r1;"
+        "r0 = *(u64 *)(r2 + 0);"
+        "exit;"
+        :: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+static __used __naked unsigned long write_tail_call(void)
+{
+        asm volatile (
+        "r6 = r2;"
+        "r2 = %[map_array] ll;"
+        "r3 = 0;"
+        "call %[bpf_tail_call];"
+        "*(u64 *)(r6 + 0) = -16;"
+        "r0 = 0;"
+        "exit;"
+	:
+	: __imm(bpf_tail_call),
+          __imm_addr(map_array)
+        : __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_load_acquire.c b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
new file mode 100644
index 000000000000..74f4f19c10b8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_load_acquire.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("socket")
+__description("load-acquire, 8-bit")
+__success __success_unpriv __retval(0)
+__naked void load_acquire_8(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"w1 = 0xfe;"
+	"*(u8 *)(r10 - 1) = w1;"
+	".8byte %[load_acquire_insn];" // w2 = load_acquire((u8 *)(r10 - 1));
+	"if r2 == r1 goto 1f;"
+	"r0 = 1;"
+"1:"
+	"exit;"
+	:
+	: __imm_insn(load_acquire_insn,
+		     BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -1))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire, 16-bit")
+__success __success_unpriv __retval(0)
+__naked void load_acquire_16(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"w1 = 0xfedc;"
+	"*(u16 *)(r10 - 2) = w1;"
+	".8byte %[load_acquire_insn];" // w2 = load_acquire((u16 *)(r10 - 2));
+	"if r2 == r1 goto 1f;"
+	"r0 = 1;"
+"1:"
+	"exit;"
+	:
+	: __imm_insn(load_acquire_insn,
+		     BPF_ATOMIC_OP(BPF_H, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -2))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire, 32-bit")
+__success __success_unpriv __retval(0)
+__naked void load_acquire_32(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"w1 = 0xfedcba09;"
+	"*(u32 *)(r10 - 4) = w1;"
+	".8byte %[load_acquire_insn];" // w2 = load_acquire((u32 *)(r10 - 4));
+	"if r2 == r1 goto 1f;"
+	"r0 = 1;"
+"1:"
+	"exit;"
+	:
+	: __imm_insn(load_acquire_insn,
+		     BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -4))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire, 64-bit")
+__success __success_unpriv __retval(0)
+__naked void load_acquire_64(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"r1 = 0xfedcba0987654321 ll;"
+	"*(u64 *)(r10 - 8) = r1;"
+	".8byte %[load_acquire_insn];" // r2 = load_acquire((u64 *)(r10 - 8));
+	"if r2 == r1 goto 1f;"
+	"r0 = 1;"
+"1:"
+	"exit;"
+	:
+	: __imm_insn(load_acquire_insn,
+		     BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire with uninitialized src_reg")
+__failure __failure_unpriv __msg("R2 !read_ok")
+__naked void load_acquire_with_uninitialized_src_reg(void)
+{
+	asm volatile (
+	".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r2 + 0));
+	"exit;"
+	:
+	: __imm_insn(load_acquire_insn,
+		     BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire with non-pointer src_reg")
+__failure __failure_unpriv __msg("R1 invalid mem access 'scalar'")
+__naked void load_acquire_with_non_pointer_src_reg(void)
+{
+	asm volatile (
+	"r1 = 0;"
+	".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r1 + 0));
+	"exit;"
+	:
+	: __imm_insn(load_acquire_insn,
+		     BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_1, 0))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("misaligned load-acquire")
+__failure __failure_unpriv __msg("misaligned stack access off")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void load_acquire_misaligned(void)
+{
+	asm volatile (
+	"r1 = 0;"
+	"*(u64 *)(r10 - 8) = r1;"
+	".8byte %[load_acquire_insn];" // w0 = load_acquire((u32 *)(r10 - 5));
+	"exit;"
+	:
+	: __imm_insn(load_acquire_insn,
+		     BPF_ATOMIC_OP(BPF_W, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_10, -5))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire from ctx pointer")
+__failure __failure_unpriv __msg("BPF_ATOMIC loads from R1 ctx is not allowed")
+__naked void load_acquire_from_ctx_pointer(void)
+{
+	asm volatile (
+	".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r1 + 0));
+	"exit;"
+	:
+	: __imm_insn(load_acquire_insn,
+		     BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_1, 0))
+	: __clobber_all);
+}
+
+SEC("xdp")
+__description("load-acquire from pkt pointer")
+__failure __msg("BPF_ATOMIC loads from R2 pkt is not allowed")
+__naked void load_acquire_from_pkt_pointer(void)
+{
+	asm volatile (
+	"r2 = *(u32 *)(r1 + %[xdp_md_data]);"
+	"r3 = *(u32 *)(r1 + %[xdp_md_data_end]);"
+	"r1 = r2;"
+	"r1 += 8;"
+	"if r1 >= r3 goto l0_%=;"
+	".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0));
+"l0_%=:  r0 = 0;"
+	"exit;"
+	:
+	: __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+	  __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+	  __imm_insn(load_acquire_insn,
+		     BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0))
+	: __clobber_all);
+}
+
+SEC("flow_dissector")
+__description("load-acquire from flow_keys pointer")
+__failure __msg("BPF_ATOMIC loads from R2 flow_keys is not allowed")
+__naked void load_acquire_from_flow_keys_pointer(void)
+{
+	asm volatile (
+	"r2 = *(u64 *)(r1 + %[__sk_buff_flow_keys]);"
+	".8byte %[load_acquire_insn];" // w0 = load_acquire((u8 *)(r2 + 0));
+	"exit;"
+	:
+	: __imm_const(__sk_buff_flow_keys,
+		      offsetof(struct __sk_buff, flow_keys)),
+	  __imm_insn(load_acquire_insn,
+		     BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2, 0))
+	: __clobber_all);
+}
+
+SEC("sk_reuseport")
+__description("load-acquire from sock pointer")
+__failure __msg("BPF_ATOMIC loads from R2 sock is not allowed")
+__naked void load_acquire_from_sock_pointer(void)
+{
+	asm volatile (
+	"r2 = *(u64 *)(r1 + %[sk_reuseport_md_sk]);"
+	// w0 = load_acquire((u8 *)(r2 + offsetof(struct bpf_sock, family)));
+	".8byte %[load_acquire_insn];"
+	"exit;"
+	:
+	: __imm_const(sk_reuseport_md_sk, offsetof(struct sk_reuseport_md, sk)),
+	  __imm_insn(load_acquire_insn,
+		     BPF_ATOMIC_OP(BPF_B, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_2,
+				   offsetof(struct bpf_sock, family)))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("load-acquire with invalid register R15")
+__failure __failure_unpriv __msg("R15 is invalid")
+__naked void load_acquire_with_invalid_reg(void)
+{
+	asm volatile (
+	".8byte %[load_acquire_insn];" // r0 = load_acquire((u64 *)(r15 + 0));
+	"exit;"
+	:
+	: __imm_insn(load_acquire_insn,
+		     BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, 15 /* invalid reg */, 0))
+	: __clobber_all);
+}
+
+#else /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+SEC("socket")
+__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support load-acquire, use a dummy test")
+__success
+int dummy_test(void)
+{
+	return 0;
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c
index e07b43b78fd2..fbdde80e7b90 100644
--- a/tools/testing/selftests/bpf/progs/verifier_loops1.c
+++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c
@@ -283,4 +283,25 @@ exit_%=:						\
 	: __clobber_all);
 }
 
+/*
+ * This test case triggered a bug in verifier.c:maybe_exit_scc().
+ * Speculative execution path reaches stack access instruction,
+ * stops and triggers maybe_exit_scc() w/o accompanying maybe_enter_scc() call.
+ */
+SEC("socket")
+__arch_x86_64
+__caps_unpriv(CAP_BPF)
+__naked void maybe_exit_scc_bug1(void)
+{
+	asm volatile (
+	"r0 = 100;"
+"1:"
+	/* Speculative execution path reaches and stops here. */
+	"*(u64 *)(r10 - 512) = r0;"
+	/* Condition is always false, but verifier speculatively executes the true branch. */
+	"if r0 <= 0x0 goto 1b;"
+	"exit;"
+	::: __clobber_all);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_lsm.c b/tools/testing/selftests/bpf/progs/verifier_lsm.c
index 32e5e779cb96..6af9100a37ff 100644
--- a/tools/testing/selftests/bpf/progs/verifier_lsm.c
+++ b/tools/testing/selftests/bpf/progs/verifier_lsm.c
@@ -4,7 +4,7 @@
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
 
-SEC("lsm/file_alloc_security")
+SEC("lsm/file_permission")
 __description("lsm bpf prog with -4095~0 retval. test 1")
 __success
 __naked int errno_zero_retval_test1(void *ctx)
@@ -15,7 +15,7 @@ __naked int errno_zero_retval_test1(void *ctx)
 	::: __clobber_all);
 }
 
-SEC("lsm/file_alloc_security")
+SEC("lsm/file_permission")
 __description("lsm bpf prog with -4095~0 retval. test 2")
 __success
 __naked int errno_zero_retval_test2(void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
index 4eaab1468eb7..16b761e510f0 100644
--- a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
@@ -47,7 +47,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("xdp")
 __description("map in map state pruning")
-__success __msg("processed 26 insns")
+__success __msg("processed 15 insns")
 __log_level(2) __retval(0) __flag(BPF_F_TEST_STATE_FREQ)
 __naked void map_in_map_state_pruning(void)
 {
@@ -139,4 +139,122 @@ __naked void on_the_inner_map_pointer(void)
 	: __clobber_all);
 }
 
+SEC("socket")
+__description("map_ptr is never null")
+__success
+__naked void map_ptr_is_never_null(void)
+{
+	asm volatile ("					\
+	r0 = 0;						\
+	r1 = %[map_in_map] ll;				\
+	if r1 != 0 goto l0_%=;				\
+	r10 = 42;					\
+l0_%=:	exit;						\
+"	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm_addr(map_in_map)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("map_ptr is never null inner")
+__success
+__naked void map_ptr_is_never_null_inner(void)
+{
+	asm volatile ("					\
+	r1 = 0;						\
+	*(u32*)(r10 - 4) = r1;				\
+	r2 = r10;					\
+	r2 += -4;					\
+	r1 = %[map_in_map] ll;				\
+	call %[bpf_map_lookup_elem];			\
+	if r0 == 0 goto l0_%=;				\
+	if r0 != 0 goto l0_%=;				\
+	r10 = 42;					\
+l0_%=:  exit;						\
+"	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm_addr(map_in_map)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("map_ptr is never null inner spill fill")
+__success
+__naked void map_ptr_is_never_null_inner_spill_fill(void)
+{
+	asm volatile ("					\
+	r1 = 0;						\
+	*(u32*)(r10 - 4) = r1;				\
+	r2 = r10;					\
+	r2 += -4;					\
+	r1 = %[map_in_map] ll;				\
+	call %[bpf_map_lookup_elem];			\
+	if r0 != 0 goto l0_%=;				\
+	exit;						\
+l0_%=:	*(u64 *)(r10 -16) = r0;				\
+	r1 = *(u64 *)(r10 -16);				\
+	if r1 == 0 goto l1_%=;				\
+	exit;						\
+l1_%=:	r10 = 42;					\
+	exit;						\
+"	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm_addr(map_in_map)
+	: __clobber_all);
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+	__array(values, struct {
+		__uint(type, BPF_MAP_TYPE_RINGBUF);
+		__uint(max_entries, 64 * 1024);
+	});
+} rb_in_map SEC(".maps");
+
+struct rb_ctx {
+	void *rb;
+	struct bpf_dynptr dptr;
+};
+
+static __always_inline struct rb_ctx __rb_event_reserve(__u32 sz)
+{
+	struct rb_ctx rb_ctx = {};
+	void *rb;
+	__u32 cpu = bpf_get_smp_processor_id();
+	__u32 rb_slot = cpu & 1;
+
+	rb = bpf_map_lookup_elem(&rb_in_map, &rb_slot);
+	if (!rb)
+		return rb_ctx;
+
+	rb_ctx.rb = rb;
+	bpf_ringbuf_reserve_dynptr(rb, sz, 0, &rb_ctx.dptr);
+
+	return rb_ctx;
+}
+
+static __noinline void __rb_event_submit(struct rb_ctx *ctx)
+{
+	if (!ctx->rb)
+		return;
+
+	/* If the verifier (incorrectly) concludes that ctx->rb can be
+	 * NULL at this point, we'll get "BPF_EXIT instruction in main
+	 * prog would lead to reference leak" error
+	 */
+	bpf_ringbuf_submit_dynptr(&ctx->dptr, 0);
+}
+
+SEC("socket")
+int map_ptr_is_never_null_rb(void *ctx)
+{
+	struct rb_ctx event_ctx = __rb_event_reserve(256);
+	__rb_event_submit(&event_ctx);
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
index 11a079145966..e2767d27d8aa 100644
--- a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
@@ -70,10 +70,13 @@ __naked void bpf_map_ptr_write_rejected(void)
 	: __clobber_all);
 }
 
+/* The first element of struct bpf_map is a SHA256 hash of 32 bytes, accessing
+ * into this array is valid. The opts field is now at offset 33.
+ */
 SEC("socket")
 __description("bpf_map_ptr: read non-existent field rejected")
 __failure
-__msg("cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4")
+__msg("cannot access ptr member ops with moff 32 in struct bpf_map with off 33 size 4")
 __failure_unpriv
 __msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN")
 __flag(BPF_F_ANY_ALIGNMENT)
@@ -82,7 +85,7 @@ __naked void read_non_existent_field_rejected(void)
 	asm volatile ("					\
 	r6 = 0;						\
 	r1 = %[map_array_48b] ll;			\
-	r6 = *(u32*)(r1 + 1);				\
+	r6 = *(u32*)(r1 + 33);				\
 	r0 = 1;						\
 	exit;						\
 "	:
diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
new file mode 100644
index 000000000000..6d1edaef9213
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_1.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+SEC("raw_tp")
+__description("may_goto 0")
+__arch_x86_64
+__arch_s390x
+__arch_arm64
+__xlated("0: r0 = 1")
+__xlated("1: exit")
+__success
+__naked void may_goto_simple(void)
+{
+	asm volatile (
+	".8byte %[may_goto];"
+	"r0 = 1;"
+	".8byte %[may_goto];"
+	"exit;"
+	:
+	: __imm_insn(may_goto, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0))
+	: __clobber_all);
+}
+
+SEC("raw_tp")
+__description("batch 2 of may_goto 0")
+__arch_x86_64
+__arch_s390x
+__arch_arm64
+__xlated("0: r0 = 1")
+__xlated("1: exit")
+__success
+__naked void may_goto_batch_0(void)
+{
+	asm volatile (
+	".8byte %[may_goto1];"
+	".8byte %[may_goto1];"
+	"r0 = 1;"
+	".8byte %[may_goto1];"
+	".8byte %[may_goto1];"
+	"exit;"
+	:
+	: __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0))
+	: __clobber_all);
+}
+
+SEC("raw_tp")
+__description("may_goto batch with offsets 2/1/0")
+__arch_x86_64
+__arch_s390x
+__arch_arm64
+__xlated("0: r0 = 1")
+__xlated("1: exit")
+__success
+__naked void may_goto_batch_1(void)
+{
+	asm volatile (
+	".8byte %[may_goto1];"
+	".8byte %[may_goto2];"
+	".8byte %[may_goto3];"
+	"r0 = 1;"
+	".8byte %[may_goto1];"
+	".8byte %[may_goto2];"
+	".8byte %[may_goto3];"
+	"exit;"
+	:
+	: __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)),
+	  __imm_insn(may_goto2, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 1 /* offset */, 0)),
+	  __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0))
+	: __clobber_all);
+}
+
+SEC("raw_tp")
+__description("may_goto batch with offsets 2/0")
+__arch_x86_64
+__arch_s390x
+__arch_arm64
+__xlated("0: *(u64 *)(r10 -16) = 65535")
+__xlated("1: *(u64 *)(r10 -8) = 0")
+__xlated("2: r11 = *(u64 *)(r10 -16)")
+__xlated("3: if r11 == 0x0 goto pc+6")
+__xlated("4: r11 -= 1")
+__xlated("5: if r11 != 0x0 goto pc+2")
+__xlated("6: r11 = -16")
+__xlated("7: call unknown")
+__xlated("8: *(u64 *)(r10 -16) = r11")
+__xlated("9: r0 = 1")
+__xlated("10: r0 = 2")
+__xlated("11: exit")
+__success
+__naked void may_goto_batch_2(void)
+{
+	asm volatile (
+	".8byte %[may_goto1];"
+	".8byte %[may_goto3];"
+	"r0 = 1;"
+	"r0 = 2;"
+	"exit;"
+	:
+	: __imm_insn(may_goto1, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 2 /* offset */, 0)),
+	  __imm_insn(may_goto3, BPF_RAW_INSN(BPF_JMP | BPF_JCOND, 0, 0, 0 /* offset */, 0))
+	: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_may_goto_2.c b/tools/testing/selftests/bpf/progs/verifier_may_goto_2.c
new file mode 100644
index 000000000000..b891faf50660
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_may_goto_2.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+int gvar;
+
+SEC("raw_tp")
+__description("C code with may_goto 0")
+__success
+int may_goto_c_code(void)
+{
+	int i, tmp[3];
+
+	for (i = 0; i < 3 && can_loop; i++)
+		tmp[i] = 0;
+
+	for (i = 0; i < 3 && can_loop; i++)
+		tmp[i] = gvar - i;
+
+	for (i = 0; i < 3 && can_loop; i++)
+		gvar += tmp[i];
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c
index 994bbc346d25..a4d8814eb5ed 100644
--- a/tools/testing/selftests/bpf/progs/verifier_movsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c
@@ -245,7 +245,13 @@ l0_%=:							\
 SEC("socket")
 __description("MOV32SX, S8, var_off not u32_max, positive after s8 extension")
 __success __retval(0)
-__failure_unpriv __msg_unpriv("frame pointer is read only")
+__success_unpriv
+#ifdef SPEC_V1
+__xlated_unpriv("w0 = 0")
+__xlated_unpriv("exit")
+__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */
+__xlated_unpriv("goto pc-1")
+#endif
 __naked void mov64sx_s32_varoff_2(void)
 {
 	asm volatile ("					\
@@ -267,7 +273,13 @@ l0_%=:							\
 SEC("socket")
 __description("MOV32SX, S8, var_off not u32_max, negative after s8 extension")
 __success __retval(0)
-__failure_unpriv __msg_unpriv("frame pointer is read only")
+__success_unpriv
+#ifdef SPEC_V1
+__xlated_unpriv("w0 = 0")
+__xlated_unpriv("exit")
+__xlated_unpriv("nospec") /* inserted to prevent `frame pointer is read only` */
+__xlated_unpriv("goto pc-1")
+#endif
 __naked void mov64sx_s32_varoff_3(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_mtu.c b/tools/testing/selftests/bpf/progs/verifier_mtu.c
index 70c7600a26a0..256956ea1ac5 100644
--- a/tools/testing/selftests/bpf/progs/verifier_mtu.c
+++ b/tools/testing/selftests/bpf/progs/verifier_mtu.c
@@ -6,7 +6,9 @@
 
 SEC("tc/ingress")
 __description("uninit/mtu: write rejected")
-__failure __msg("invalid indirect read from stack")
+__success
+__caps_unpriv(CAP_BPF|CAP_NET_ADMIN)
+__failure_unpriv __msg_unpriv("invalid read from stack")
 int tc_uninit_mtu(struct __sk_buff *ctx)
 {
 	__u32 mtu;
diff --git a/tools/testing/selftests/bpf/progs/verifier_mul.c b/tools/testing/selftests/bpf/progs/verifier_mul.c
new file mode 100644
index 000000000000..7145fe3351d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_mul.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Nandakumar Edamana */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+/* Intended to test the abstract multiplication technique(s) used by
+ * the verifier. Using assembly to avoid compiler optimizations.
+ */
+SEC("fentry/bpf_fentry_test1")
+void BPF_PROG(mul_precise, int x)
+{
+	/* First, force the verifier to be uncertain about the value:
+	 *     unsigned int a = (bpf_get_prandom_u32() & 0x2) | 0x1;
+	 *
+	 * Assuming the verifier is using tnum, a must be tnum{.v=0x1, .m=0x2}.
+	 * Then a * 0x3 would be m0m1 (m for uncertain). Added imprecision
+	 * would cause the following to fail, because the required return value
+	 * is 0:
+	 *     return (a * 0x3) & 0x4);
+	 */
+	asm volatile ("\
+	call %[bpf_get_prandom_u32];\
+	r0 &= 0x2;\
+	r0 |= 0x1;\
+	r0 *= 0x3;\
+	r0 &= 0x4;\
+	if r0 != 0 goto l0_%=;\
+	r0 = 0;\
+	goto l1_%=;\
+l0_%=:\
+	r0 = 1;\
+l1_%=:\
+"	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
index ab9f9f2620ed..e2cbc5bda65e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
@@ -79,11 +79,6 @@ int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx)
 	return NF_ACCEPT;
 }
 
-extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
-                               struct bpf_dynptr *ptr__uninit) __ksym;
-extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
-                                   void *buffer, uint32_t buffer__sz) __ksym;
-
 SEC("netfilter")
 __description("netfilter test prog with skb and state read access")
 __success __failure_unpriv
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
index 6b564d4c0986..1fe090cd6744 100644
--- a/tools/testing/selftests/bpf/progs/verifier_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -2,6 +2,7 @@
 /* Copyright (C) 2023 SUSE LLC */
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
 #include "bpf_misc.h"
 
 SEC("?raw_tp")
@@ -90,27 +91,74 @@ __naked int bpf_end_bswap(void)
 		::: __clobber_all);
 }
 
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (db) r2 = load_acquire((u64 *)(r10 -8))")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (7b) *(u64 *)(r10 -8) = r1")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_load_acquire(void)
+{
+	asm volatile (
+	"r1 = 8;"
+	"*(u64 *)(r10 - 8) = r1;"
+	".8byte %[load_acquire_insn];" /* r2 = load_acquire((u64 *)(r10 - 8)); */
+	"r3 = r10;"
+	"r3 += r2;" /* mark_precise */
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm_insn(load_acquire_insn,
+		     BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_2, BPF_REG_10, -8))
+	: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r2 = r10")
+__msg("mark_precise: frame0: regs=r1 stack= before 2: (79) r1 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 1: (db) store_release((u64 *)(r10 -8), r1)")
+__msg("mark_precise: frame0: regs=r1 stack= before 0: (b7) r1 = 8")
+__naked int bpf_store_release(void)
+{
+	asm volatile (
+	"r1 = 8;"
+	".8byte %[store_release_insn];" /* store_release((u64 *)(r10 - 8), r1); */
+	"r1 = *(u64 *)(r10 - 8);"
+	"r2 = r10;"
+	"r2 += r1;" /* mark_precise */
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm_insn(store_release_insn,
+		     BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8))
+	: __clobber_all);
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
 #endif /* v4 instruction */
 
 SEC("?raw_tp")
 __success __log_level(2)
 /*
  * Without the bug fix there will be no history between "last_idx 3 first_idx 3"
- * and "parent state regs=" lines. "R0_w=6" parts are here to help anchor
+ * and "parent state regs=" lines. "R0=6" parts are here to help anchor
  * expected log messages to the one specific mark_chain_precision operation.
  *
  * This is quite fragile: if verifier checkpointing heuristic changes, this
  * might need adjusting.
  */
-__msg("2: (07) r0 += 1                       ; R0_w=6")
+__msg("2: (07) r0 += 1                       ; R0=6")
 __msg("3: (35) if r0 >= 0xa goto pc+1")
 __msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1")
 __msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1")
 __msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1")
 __msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4")
 __msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1")
-__msg("mark_precise: frame0: parent state regs= stack=:  R0_rw=P4")
-__msg("3: R0_w=6")
+__msg("mark_precise: frame0: parent state regs= stack=:  R0=P4")
+__msg("3: R0=6")
 __naked int state_loop_first_last_equal(void)
 {
 	asm volatile (
@@ -130,4 +178,127 @@ __naked int state_loop_first_last_equal(void)
 	);
 }
 
+__used __naked static void __bpf_cond_op_r10(void)
+{
+	asm volatile (
+	"r2 = 2314885393468386424 ll;"
+	"goto +0;"
+	"if r2 <= r10 goto +3;"
+	"if r1 >= -1835016 goto +0;"
+	"if r2 <= 8 goto +0;"
+	"if r3 <= 0 goto +0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("8: (bd) if r2 <= r10 goto pc+3")
+__msg("9: (35) if r1 >= 0xffe3fff8 goto pc+0")
+__msg("10: (b5) if r2 <= 0x8 goto pc+0")
+__msg("mark_precise: frame1: last_idx 10 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame1: regs=r2 stack= before 9: (35) if r1 >= 0xffe3fff8 goto pc+0")
+__msg("mark_precise: frame1: regs=r2 stack= before 8: (bd) if r2 <= r10 goto pc+3")
+__msg("mark_precise: frame1: regs=r2 stack= before 7: (05) goto pc+0")
+__naked void bpf_cond_op_r10(void)
+{
+	asm volatile (
+	"r3 = 0 ll;"
+	"call __bpf_cond_op_r10;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("3: (bf) r3 = r10")
+__msg("4: (bd) if r3 <= r2 goto pc+1")
+__msg("5: (b5) if r2 <= 0x8 goto pc+2")
+__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 4: (bd) if r3 <= r2 goto pc+1")
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r3 = r10")
+__naked void bpf_cond_op_not_r10(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"r2 = 2314885393468386424 ll;"
+	"r3 = r10;"
+	"if r3 <= r2 goto +1;"
+	"if r2 <= 8 goto +2;"
+	"r0 = 2 ll;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__success __log_level(2)
+__msg("0: (b7) r0 = 1                        ; R0=1")
+__msg("1: (84) w0 = -w0                      ; R0=0xffffffff")
+__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (84) w0 = -w0")
+__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1")
+__naked int bpf_neg_2(void)
+{
+	/*
+	 * lsm.s/socket_connect requires a return value within [-4095, 0].
+	 * Returning -1 is allowed
+	 */
+	asm volatile (
+	"r0 = 1;"
+	"w0 = -w0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__failure __msg("At program exit the register R0 has")
+__naked int bpf_neg_3(void)
+{
+	/*
+	 * lsm.s/socket_connect requires a return value within [-4095, 0].
+	 * Returning -10000 is not allowed.
+	 */
+	asm volatile (
+	"r0 = 10000;"
+	"w0 = -w0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__success __log_level(2)
+__msg("0: (b7) r0 = 1                        ; R0=1")
+__msg("1: (87) r0 = -r0                      ; R0=-1")
+__msg("mark_precise: frame0: last_idx 2 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (87) r0 = -r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 0: (b7) r0 = 1")
+__naked int bpf_neg_4(void)
+{
+	/*
+	 * lsm.s/socket_connect requires a return value within [-4095, 0].
+	 * Returning -1 is allowed
+	 */
+	asm volatile (
+	"r0 = 1;"
+	"r0 = -r0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("lsm.s/socket_connect")
+__failure __msg("At program exit the register R0 has")
+__naked int bpf_neg_5(void)
+{
+	/*
+	 * lsm.s/socket_connect requires a return value within [-4095, 0].
+	 * Returning -10000 is not allowed.
+	 */
+	asm volatile (
+	"r0 = 10000;"
+	"r0 = -r0;"
+	"exit;"
+	::: __clobber_all);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_private_stack.c b/tools/testing/selftests/bpf/progs/verifier_private_stack.c
index b1fbdf119553..1ecd34ebde19 100644
--- a/tools/testing/selftests/bpf/progs/verifier_private_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_private_stack.c
@@ -8,7 +8,7 @@
 /* From include/linux/filter.h */
 #define MAX_BPF_STACK    512
 
-#if defined(__TARGET_ARCH_x86)
+#if defined(__TARGET_ARCH_x86) || defined(__TARGET_ARCH_arm64)
 
 struct elem {
 	struct bpf_timer t;
@@ -27,9 +27,21 @@ __description("Private stack, single prog")
 __success
 __arch_x86_64
 __jited("	movabsq	$0x{{.*}}, %r9")
-__jited("	addq	%gs:0x{{.*}}, %r9")
+__jited("	addq	%gs:{{.*}}, %r9")
 __jited("	movl	$0x2a, %edi")
 __jited("	movq	%rdi, -0x100(%r9)")
+__arch_arm64
+__jited("	stp	x25, x27, [sp, {{.*}}]!")
+__jited("	mov	x27, {{.*}}")
+__jited("	movk	x27, {{.*}}, lsl #16")
+__jited("	movk	x27, {{.*}}")
+__jited("	mrs	x10, TPIDR_EL{{[0-1]}}")
+__jited("	add	x27, x27, x10")
+__jited("	add	x25, x27, {{.*}}")
+__jited("	mov	x0, #0x2a")
+__jited("	str	x0, [x27]")
+__jited("...")
+__jited("	ldp	x25, x27, [sp], {{.*}}")
 __naked void private_stack_single_prog(void)
 {
 	asm volatile ("			\
@@ -45,6 +57,9 @@ __description("No private stack")
 __success
 __arch_x86_64
 __jited("	subq	$0x8, %rsp")
+__arch_arm64
+__jited("	mov	x25, sp")
+__jited("	sub	sp, sp, #0x10")
 __naked void no_private_stack_nested(void)
 {
 	asm volatile ("			\
@@ -74,13 +89,26 @@ __success
 __arch_x86_64
 /* private stack fp for the main prog */
 __jited("	movabsq	$0x{{.*}}, %r9")
-__jited("	addq	%gs:0x{{.*}}, %r9")
+__jited("	addq	%gs:{{.*}}, %r9")
 __jited("	movl	$0x2a, %edi")
 __jited("	movq	%rdi, -0x200(%r9)")
 __jited("	pushq	%r9")
 __jited("	callq	0x{{.*}}")
 __jited("	popq	%r9")
 __jited("	xorl	%eax, %eax")
+__arch_arm64
+__jited("	stp	x25, x27, [sp, {{.*}}]!")
+__jited("	mov	x27, {{.*}}")
+__jited("	movk	x27, {{.*}}, lsl #16")
+__jited("	movk	x27, {{.*}}")
+__jited("	mrs	x10, TPIDR_EL{{[0-1]}}")
+__jited("	add	x27, x27, x10")
+__jited("	add	x25, x27, {{.*}}")
+__jited("	mov	x0, #0x2a")
+__jited("	str	x0, [x27]")
+__jited("	bl	{{.*}}")
+__jited("...")
+__jited("	ldp	x25, x27, [sp], {{.*}}")
 __naked void private_stack_nested_1(void)
 {
 	asm volatile ("				\
@@ -122,7 +150,7 @@ __jited("	pushq	%rbp")
 __jited("	movq	%rsp, %rbp")
 __jited("	endbr64")
 __jited("	movabsq	$0x{{.*}}, %r9")
-__jited("	addq	%gs:0x{{.*}}, %r9")
+__jited("	addq	%gs:{{.*}}, %r9")
 __jited("	pushq	%r9")
 __jited("	callq")
 __jited("	popq	%r9")
@@ -131,6 +159,24 @@ __jited("	movq	%rdi, -0x200(%r9)")
 __jited("	pushq	%r9")
 __jited("	callq")
 __jited("	popq	%r9")
+__arch_arm64
+__jited("func #1")
+__jited("...")
+__jited("	stp	x25, x27, [sp, {{.*}}]!")
+__jited("	mov	x27, {{.*}}")
+__jited("	movk	x27, {{.*}}, lsl #16")
+__jited("	movk	x27, {{.*}}")
+__jited("	mrs	x10, TPIDR_EL{{[0-1]}}")
+__jited("	add	x27, x27, x10")
+__jited("	add	x25, x27, {{.*}}")
+__jited("	bl	0x{{.*}}")
+__jited("	add	x7, x0, #0x0")
+__jited("	mov	x0, #0x2a")
+__jited("	str	x0, [x27]")
+__jited("	bl	0x{{.*}}")
+__jited("	add	x7, x0, #0x0")
+__jited("	mov	x7, #0x0")
+__jited("	ldp	x25, x27, [sp], {{.*}}")
 __naked void private_stack_callback(void)
 {
 	asm volatile ("			\
@@ -154,6 +200,28 @@ __arch_x86_64
 __jited("	pushq	%r9")
 __jited("	callq")
 __jited("	popq	%r9")
+__arch_arm64
+__jited("	stp	x29, x30, [sp, #-0x10]!")
+__jited("	mov	x29, sp")
+__jited("	stp	xzr, x26, [sp, #-0x10]!")
+__jited("	mov	x26, sp")
+__jited("	stp	x19, x20, [sp, #-0x10]!")
+__jited("	stp	x21, x22, [sp, #-0x10]!")
+__jited("	stp	x23, x24, [sp, #-0x10]!")
+__jited("	stp	x25, x26, [sp, #-0x10]!")
+__jited("	stp	x27, x28, [sp, #-0x10]!")
+__jited("	mov	x27, {{.*}}")
+__jited("	movk	x27, {{.*}}, lsl #16")
+__jited("	movk	x27, {{.*}}")
+__jited("	mrs	x10, TPIDR_EL{{[0-1]}}")
+__jited("	add	x27, x27, x10")
+__jited("	add	x25, x27, {{.*}}")
+__jited("	mov	x0, #0x2a")
+__jited("	str	x0, [x27]")
+__jited("	mov	x0, #0x0")
+__jited("	bl	0x{{.*}}")
+__jited("	add	x7, x0, #0x0")
+__jited("	ldp	x27, x28, [sp], #0x10")
 int private_stack_exception_main_prog(void)
 {
 	asm volatile ("			\
@@ -179,6 +247,19 @@ __jited("	movq	%rdi, -0x200(%r9)")
 __jited("	pushq	%r9")
 __jited("	callq")
 __jited("	popq	%r9")
+__arch_arm64
+__jited("	stp	x27, x28, [sp, #-0x10]!")
+__jited("	mov	x27, {{.*}}")
+__jited("	movk	x27, {{.*}}, lsl #16")
+__jited("	movk	x27, {{.*}}")
+__jited("	mrs	x10, TPIDR_EL{{[0-1]}}")
+__jited("	add	x27, x27, x10")
+__jited("	add	x25, x27, {{.*}}")
+__jited("	mov	x0, #0x2a")
+__jited("	str	x0, [x27]")
+__jited("	bl	0x{{.*}}")
+__jited("	add	x7, x0, #0x0")
+__jited("	ldp	x27, x28, [sp], #0x10")
 int private_stack_exception_sub_prog(void)
 {
 	asm volatile ("			\
@@ -220,6 +301,10 @@ __description("Private stack, async callback, not nested")
 __success __retval(0)
 __arch_x86_64
 __jited("	movabsq	$0x{{.*}}, %r9")
+__arch_arm64
+__jited("	mrs	x10, TPIDR_EL{{[0-1]}}")
+__jited("	add	x27, x27, x10")
+__jited("	add	x25, x27, {{.*}}")
 int private_stack_async_callback_1(void)
 {
 	struct bpf_timer *arr_timer;
@@ -241,6 +326,8 @@ __description("Private stack, async callback, potential nesting")
 __success __retval(0)
 __arch_x86_64
 __jited("	subq	$0x100, %rsp")
+__arch_arm64
+__jited("	sub	sp, sp, #0x100")
 int private_stack_async_callback_2(void)
 {
 	struct bpf_timer *arr_timer;
diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
index 7cc83acac727..c689665e07b9 100644
--- a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
@@ -236,7 +236,7 @@ __naked void load_bytes_spilled_regs_data(void)
 
 SEC("tc")
 __description("raw_stack: skb_load_bytes, invalid access 1")
-__failure __msg("invalid indirect access to stack R3 off=-513 size=8")
+__failure __msg("invalid write to stack R3 off=-513 size=8")
 __naked void load_bytes_invalid_access_1(void)
 {
 	asm volatile ("					\
@@ -255,7 +255,7 @@ __naked void load_bytes_invalid_access_1(void)
 
 SEC("tc")
 __description("raw_stack: skb_load_bytes, invalid access 2")
-__failure __msg("invalid indirect access to stack R3 off=-1 size=8")
+__failure __msg("invalid write to stack R3 off=-1 size=8")
 __naked void load_bytes_invalid_access_2(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c
index 683a882b3e6d..910365201f68 100644
--- a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c
+++ b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c
@@ -27,7 +27,7 @@ struct bpf_key {} __attribute__((preserve_access_index));
 
 extern void bpf_key_put(struct bpf_key *key) __ksym;
 extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
-extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_user_key(__s32 serial, __u64 flags) __ksym;
 
 /* BTF FUNC records are not generated for kfuncs referenced
  * from inline assembly. These records are necessary for
diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
index 7c5e5e6d10eb..c0ce690ddb68 100644
--- a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
+++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
@@ -349,11 +349,11 @@ __naked void precision_two_ids(void)
 SEC("socket")
 __success __log_level(2)
 __flag(BPF_F_TEST_STATE_FREQ)
-/* check thar r0 and r6 have different IDs after 'if',
+/* check that r0 and r6 have different IDs after 'if',
  * collect_linked_regs() can't tie more than 6 registers for a single insn.
  */
 __msg("8: (25) if r0 > 0x7 goto pc+0         ; R0=scalar(id=1")
-__msg("9: (bf) r6 = r6                       ; R6_w=scalar(id=2")
+__msg("9: (bf) r6 = r6                       ; R6=scalar(id=2")
 /* check that r{0-5} are marked precise after 'if' */
 __msg("frame0: regs=r0 stack= before 8: (25) if r0 > 0x7 goto pc+0")
 __msg("frame0: parent state regs=r0,r1,r2,r3,r4,r5 stack=:")
@@ -779,12 +779,12 @@ __success
 __retval(0)
 /* Check that verifier believes r1/r0 are zero at exit */
 __log_level(2)
-__msg("4: (77) r1 >>= 32                     ; R1_w=0")
-__msg("5: (bf) r0 = r1                       ; R0_w=0 R1_w=0")
+__msg("4: (77) r1 >>= 32                     ; R1=0")
+__msg("5: (bf) r0 = r1                       ; R0=0 R1=0")
 __msg("6: (95) exit")
 __msg("from 3 to 4")
-__msg("4: (77) r1 >>= 32                     ; R1_w=0")
-__msg("5: (bf) r0 = r1                       ; R0_w=0 R1_w=0")
+__msg("4: (77) r1 >>= 32                     ; R1=0")
+__msg("5: (bf) r0 = r1                       ; R0=0 R1=0")
 __msg("6: (95) exit")
 /* Verify that statements to randomize upper half of r1 had not been
  * generated.
diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c
index d3e70e38e442..a2132c72d3b8 100644
--- a/tools/testing/selftests/bpf/progs/verifier_sock.c
+++ b/tools/testing/selftests/bpf/progs/verifier_sock.c
@@ -1,14 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Converted from tools/testing/selftests/bpf/verifier/sock.c */
 
-#include <linux/bpf.h>
+#include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
 
-#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
-#define offsetofend(TYPE, MEMBER) \
-	(offsetof(TYPE, MEMBER)	+ sizeof_field(TYPE, MEMBER))
-
 struct {
 	__uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
 	__uint(max_entries, 1);
@@ -50,6 +46,13 @@ struct {
 	__uint(map_flags, BPF_F_NO_PREALLOC);
 } sk_storage_map SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
 SEC("cgroup/skb")
 __description("skb->sk: no NULL check")
 __failure __msg("invalid mem access 'sock_common_or_null'")
@@ -1037,4 +1040,130 @@ __naked void sock_create_read_src_port(void)
 	: __clobber_all);
 }
 
+__noinline
+long skb_pull_data2(struct __sk_buff *sk, __u32 len)
+{
+	return bpf_skb_pull_data(sk, len);
+}
+
+__noinline
+long skb_pull_data1(struct __sk_buff *sk, __u32 len)
+{
+	return skb_pull_data2(sk, len);
+}
+
+/* global function calls bpf_skb_pull_data(), which invalidates packet
+ * pointers established before global function call.
+ */
+SEC("tc")
+__failure __msg("invalid mem access")
+int invalidate_pkt_pointers_from_global_func(struct __sk_buff *sk)
+{
+	int *p = (void *)(long)sk->data;
+
+	if ((void *)(p + 1) > (void *)(long)sk->data_end)
+		return TCX_DROP;
+	skb_pull_data1(sk, 0);
+	*p = 42; /* this is unsafe */
+	return TCX_PASS;
+}
+
+__noinline
+long xdp_pull_data2(struct xdp_md *x, __u32 len)
+{
+	return bpf_xdp_pull_data(x, len);
+}
+
+__noinline
+long xdp_pull_data1(struct xdp_md *x, __u32 len)
+{
+	return xdp_pull_data2(x, len);
+}
+
+/* global function calls bpf_xdp_pull_data(), which invalidates packet
+ * pointers established before global function call.
+ */
+SEC("xdp")
+__failure __msg("invalid mem access")
+int invalidate_xdp_pkt_pointers_from_global_func(struct xdp_md *x)
+{
+	int *p = (void *)(long)x->data;
+
+	if ((void *)(p + 1) > (void *)(long)x->data_end)
+		return XDP_DROP;
+	xdp_pull_data1(x, 0);
+	*p = 42; /* this is unsafe */
+	return XDP_PASS;
+}
+
+/* XDP packet changing kfunc calls invalidate packet pointers */
+SEC("xdp")
+__failure __msg("invalid mem access")
+int invalidate_xdp_pkt_pointers(struct xdp_md *x)
+{
+	int *p = (void *)(long)x->data;
+
+	if ((void *)(p + 1) > (void *)(long)x->data_end)
+		return XDP_DROP;
+	bpf_xdp_pull_data(x, 0);
+	*p = 42; /* this is unsafe */
+	return XDP_PASS;
+}
+
+__noinline
+int tail_call(struct __sk_buff *sk)
+{
+	bpf_tail_call_static(sk, &jmp_table, 0);
+	return 0;
+}
+
+static __noinline
+int static_tail_call(struct __sk_buff *sk)
+{
+	bpf_tail_call_static(sk, &jmp_table, 0);
+	return 0;
+}
+
+/* Tail calls in sub-programs invalidate packet pointers. */
+SEC("tc")
+__failure __msg("invalid mem access")
+int invalidate_pkt_pointers_by_global_tail_call(struct __sk_buff *sk)
+{
+	int *p = (void *)(long)sk->data;
+
+	if ((void *)(p + 1) > (void *)(long)sk->data_end)
+		return TCX_DROP;
+	tail_call(sk);
+	*p = 42; /* this is unsafe */
+	return TCX_PASS;
+}
+
+/* Tail calls in static sub-programs invalidate packet pointers. */
+SEC("tc")
+__failure __msg("invalid mem access")
+int invalidate_pkt_pointers_by_static_tail_call(struct __sk_buff *sk)
+{
+	int *p = (void *)(long)sk->data;
+
+	if ((void *)(p + 1) > (void *)(long)sk->data_end)
+		return TCX_DROP;
+	static_tail_call(sk);
+	*p = 42; /* this is unsafe */
+	return TCX_PASS;
+}
+
+/* Direct tail calls do not invalidate packet pointers. */
+SEC("tc")
+__success
+int invalidate_pkt_pointers_by_tail_call(struct __sk_buff *sk)
+{
+	int *p = (void *)(long)sk->data;
+
+	if ((void *)(p + 1) > (void *)(long)sk->data_end)
+		return TCX_DROP;
+	bpf_tail_call_static(sk, &jmp_table, 0);
+	*p = 42; /* this is NOT unsafe: tail calls don't return */
+	return TCX_PASS;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
index 671d9f415dbf..7a13dbd794b2 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -506,17 +506,17 @@ SEC("raw_tp")
 __log_level(2)
 __success
 /* fp-8 is spilled IMPRECISE value zero (represented by a zero value fake reg) */
-__msg("2: (7a) *(u64 *)(r10 -8) = 0          ; R10=fp0 fp-8_w=0")
+__msg("2: (7a) *(u64 *)(r10 -8) = 0          ; R10=fp0 fp-8=0")
 /* but fp-16 is spilled IMPRECISE zero const reg */
-__msg("4: (7b) *(u64 *)(r10 -16) = r0        ; R0_w=0 R10=fp0 fp-16_w=0")
+__msg("4: (7b) *(u64 *)(r10 -16) = r0        ; R0=0 R10=fp0 fp-16=0")
 /* validate that assigning R2 from STACK_SPILL with zero value  doesn't mark register
  * precise immediately; if necessary, it will be marked precise later
  */
-__msg("6: (71) r2 = *(u8 *)(r10 -1)          ; R2_w=0 R10=fp0 fp-8_w=0")
+__msg("6: (71) r2 = *(u8 *)(r10 -1)          ; R2=0 R10=fp0 fp-8=0")
 /* similarly, when R2 is assigned from spilled register, it is initially
  * imprecise, but will be marked precise later once it is used in precise context
  */
-__msg("10: (71) r2 = *(u8 *)(r10 -9)         ; R2_w=0 R10=fp0 fp-16_w=0")
+__msg("10: (71) r2 = *(u8 *)(r10 -9)         ; R2=0 R10=fp0 fp-16=0")
 __msg("11: (0f) r1 += r2")
 __msg("mark_precise: frame0: last_idx 11 first_idx 0 subseq_idx -1")
 __msg("mark_precise: frame0: regs=r2 stack= before 10: (71) r2 = *(u8 *)(r10 -9)")
@@ -598,7 +598,7 @@ __log_level(2)
 __success
 /* fp-4 is STACK_ZERO */
 __msg("2: (62) *(u32 *)(r10 -4) = 0          ; R10=fp0 fp-8=0000????")
-__msg("4: (71) r2 = *(u8 *)(r10 -1)          ; R2_w=0 R10=fp0 fp-8=0000????")
+__msg("4: (71) r2 = *(u8 *)(r10 -1)          ; R2=0 R10=fp0 fp-8=0000????")
 __msg("5: (0f) r1 += r2")
 __msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1")
 __msg("mark_precise: frame0: regs=r2 stack= before 4: (71) r2 = *(u8 *)(r10 -1)")
@@ -640,25 +640,25 @@ SEC("raw_tp")
 __log_level(2) __flag(BPF_F_TEST_STATE_FREQ)
 __success
 /* make sure fp-8 is IMPRECISE fake register spill */
-__msg("3: (7a) *(u64 *)(r10 -8) = 1          ; R10=fp0 fp-8_w=1")
+__msg("3: (7a) *(u64 *)(r10 -8) = 1          ; R10=fp0 fp-8=1")
 /* and fp-16 is spilled IMPRECISE const reg */
-__msg("5: (7b) *(u64 *)(r10 -16) = r0        ; R0_w=1 R10=fp0 fp-16_w=1")
+__msg("5: (7b) *(u64 *)(r10 -16) = r0        ; R0=1 R10=fp0 fp-16=1")
 /* validate load from fp-8, which was initialized using BPF_ST_MEM */
-__msg("8: (79) r2 = *(u64 *)(r10 -8)         ; R2_w=1 R10=fp0 fp-8=1")
+__msg("8: (79) r2 = *(u64 *)(r10 -8)         ; R2=1 R10=fp0 fp-8=1")
 __msg("9: (0f) r1 += r2")
 __msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
 __msg("mark_precise: frame0: regs=r2 stack= before 8: (79) r2 = *(u64 *)(r10 -8)")
 __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
 /* note, fp-8 is precise, fp-16 is not yet precise, we'll get there */
-__msg("mark_precise: frame0: parent state regs= stack=-8:  R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_w=1")
+__msg("mark_precise: frame0: parent state regs= stack=-8:  R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=1")
 __msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
 __msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
 __msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -16) = r0")
 __msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
 __msg("mark_precise: frame0: regs= stack=-8 before 3: (7a) *(u64 *)(r10 -8) = 1")
-__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
 /* validate load from fp-16, which was initialized using BPF_STX_MEM */
-__msg("12: (79) r2 = *(u64 *)(r10 -16)       ; R2_w=1 R10=fp0 fp-16=1")
+__msg("12: (79) r2 = *(u64 *)(r10 -16)       ; R2=1 R10=fp0 fp-16=1")
 __msg("13: (0f) r1 += r2")
 __msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1")
 __msg("mark_precise: frame0: regs=r2 stack= before 12: (79) r2 = *(u64 *)(r10 -16)")
@@ -668,12 +668,12 @@ __msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
 __msg("mark_precise: frame0: regs= stack=-16 before 8: (79) r2 = *(u64 *)(r10 -8)")
 __msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
 /* now both fp-8 and fp-16 are precise, very good */
-__msg("mark_precise: frame0: parent state regs= stack=-16:  R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_rw=P1")
+__msg("mark_precise: frame0: parent state regs= stack=-16:  R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=P1 fp-16=P1")
 __msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
 __msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
 __msg("mark_precise: frame0: regs= stack=-16 before 5: (7b) *(u64 *)(r10 -16) = r0")
 __msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
-__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
 __naked void stack_load_preserves_const_precision(void)
 {
 	asm volatile (
@@ -719,22 +719,22 @@ __success
 /* make sure fp-8 is 32-bit FAKE subregister spill */
 __msg("3: (62) *(u32 *)(r10 -8) = 1          ; R10=fp0 fp-8=????1")
 /* but fp-16 is spilled IMPRECISE zero const reg */
-__msg("5: (63) *(u32 *)(r10 -16) = r0        ; R0_w=1 R10=fp0 fp-16=????1")
+__msg("5: (63) *(u32 *)(r10 -16) = r0        ; R0=1 R10=fp0 fp-16=????1")
 /* validate load from fp-8, which was initialized using BPF_ST_MEM */
-__msg("8: (61) r2 = *(u32 *)(r10 -8)         ; R2_w=1 R10=fp0 fp-8=????1")
+__msg("8: (61) r2 = *(u32 *)(r10 -8)         ; R2=1 R10=fp0 fp-8=????1")
 __msg("9: (0f) r1 += r2")
 __msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
 __msg("mark_precise: frame0: regs=r2 stack= before 8: (61) r2 = *(u32 *)(r10 -8)")
 __msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
-__msg("mark_precise: frame0: parent state regs= stack=-8:  R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16=????1")
+__msg("mark_precise: frame0: parent state regs= stack=-8:  R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????1")
 __msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
 __msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
 __msg("mark_precise: frame0: regs= stack=-8 before 5: (63) *(u32 *)(r10 -16) = r0")
 __msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
 __msg("mark_precise: frame0: regs= stack=-8 before 3: (62) *(u32 *)(r10 -8) = 1")
-__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__msg("10: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
 /* validate load from fp-16, which was initialized using BPF_STX_MEM */
-__msg("12: (61) r2 = *(u32 *)(r10 -16)       ; R2_w=1 R10=fp0 fp-16=????1")
+__msg("12: (61) r2 = *(u32 *)(r10 -16)       ; R2=1 R10=fp0 fp-16=????1")
 __msg("13: (0f) r1 += r2")
 __msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1")
 __msg("mark_precise: frame0: regs=r2 stack= before 12: (61) r2 = *(u32 *)(r10 -16)")
@@ -743,12 +743,12 @@ __msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2
 __msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
 __msg("mark_precise: frame0: regs= stack=-16 before 8: (61) r2 = *(u32 *)(r10 -8)")
 __msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
-__msg("mark_precise: frame0: parent state regs= stack=-16:  R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16_r=????P1")
+__msg("mark_precise: frame0: parent state regs= stack=-16:  R0=1 R1=ctx() R6=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8=????P1 fp-16=????P1")
 __msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
 __msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
 __msg("mark_precise: frame0: regs= stack=-16 before 5: (63) *(u32 *)(r10 -16) = r0")
 __msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
-__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__msg("14: R1=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2=1")
 __naked void stack_load_preserves_const_precision_subreg(void)
 {
 	asm volatile (
@@ -1244,4 +1244,39 @@ __naked void old_stack_misc_vs_cur_ctx_ptr(void)
 	: __clobber_all);
 }
 
+SEC("socket")
+__description("stack_noperfmon: reject read of invalid slots")
+__success
+__caps_unpriv(CAP_BPF)
+__failure_unpriv __msg_unpriv("invalid read from stack off -8+1 size 8")
+__naked void stack_noperfmon_reject_invalid_read(void)
+{
+	asm volatile ("					\
+	r2 = 1;						\
+	r6 = r10;					\
+	r6 += -8;					\
+	*(u8 *)(r6 + 0) = r2;				\
+	r2 = *(u64 *)(r6 + 0);				\
+	r0 = 0;						\
+	exit;						\
+"	::: __clobber_all);
+}
+
+SEC("socket")
+__description("stack_noperfmon: narrow spill onto 64-bit scalar spilled slots")
+__success
+__caps_unpriv(CAP_BPF)
+__success_unpriv
+__naked void stack_noperfmon_spill_32bit_onto_64bit_slot(void)
+{
+	asm volatile("					\
+	r0 = 0;						\
+	*(u64 *)(r10 - 8) = r0;				\
+	*(u32 *)(r10 - 8) = r0;				\
+	exit;						\
+"	:
+	:
+	: __clobber_all);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c
index 3f679de73229..d9d7b05cf6d2 100644
--- a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c
@@ -187,7 +187,7 @@ l0_%=:	r6 = r0;					\
 
 SEC("cgroup/skb")
 __description("spin_lock: test6 missing unlock")
-__failure __msg("BPF_EXIT instruction cannot be used inside bpf_spin_lock-ed region")
+__failure __msg("BPF_EXIT instruction in main prog cannot be used inside bpf_spin_lock-ed region")
 __failure_unpriv __msg_unpriv("")
 __naked void spin_lock_test6_missing_unlock(void)
 {
@@ -530,4 +530,30 @@ l1_%=:	exit;						\
 	: __clobber_all);
 }
 
+SEC("tc")
+__description("spin_lock: loop within a locked region")
+__success __failure_unpriv __msg_unpriv("")
+__retval(0)
+int bpf_loop_inside_locked_region(void)
+{
+	const int zero = 0;
+	struct val *val;
+	int i, j = 0;
+
+	val = bpf_map_lookup_elem(&map_spin_lock, &zero);
+	if (!val)
+		return -1;
+
+	bpf_spin_lock(&val->l);
+	bpf_for(i, 0, 10) {
+		j++;
+		/* Silence "unused variable" warnings. */
+		if (j == 10)
+			break;
+	}
+	bpf_spin_unlock(&val->l);
+
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
index 417c61cd4b19..24aabc6083fd 100644
--- a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
+++ b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
@@ -481,4 +481,56 @@ l1_%=:	r0 = 42;					\
 	: __clobber_all);
 }
 
+SEC("socket")
+__description("PTR_TO_STACK stack size > 512")
+__failure __msg("invalid write to stack R1 off=-520 size=8")
+__naked void stack_check_size_gt_512(void)
+{
+	asm volatile ("					\
+	r1 = r10;					\
+	r1 += -520;					\
+	r0 = 42;					\
+	*(u64*)(r1 + 0) = r0;				\
+	exit;						\
+"	::: __clobber_all);
+}
+
+#ifdef __BPF_FEATURE_MAY_GOTO
+SEC("socket")
+__description("PTR_TO_STACK stack size 512 with may_goto with jit")
+__load_if_JITed()
+__success __retval(42)
+__naked void stack_check_size_512_with_may_goto_jit(void)
+{
+	asm volatile ("					\
+	r1 = r10;					\
+	r1 += -512;					\
+	r0 = 42;					\
+	*(u32*)(r1 + 0) = r0;				\
+	may_goto l0_%=;					\
+	r2 = 100;					\
+	l0_%=:						\
+	exit;						\
+"	::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK stack size 512 with may_goto without jit")
+__load_if_no_JITed()
+__failure __msg("stack size 520(extra 8) is too large")
+__naked void stack_check_size_512_with_may_goto(void)
+{
+	asm volatile ("					\
+	r1 = r10;					\
+	r1 += -512;					\
+	r0 = 42;					\
+	*(u32*)(r1 + 0) = r0;				\
+	may_goto l0_%=;					\
+	r2 = 100;					\
+	l0_%=:						\
+	exit;						\
+"	::: __clobber_all);
+}
+#endif
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_store_release.c b/tools/testing/selftests/bpf/progs/verifier_store_release.c
new file mode 100644
index 000000000000..72f1eb006074
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_store_release.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("socket")
+__description("store-release, 8-bit")
+__success __success_unpriv __retval(0)
+__naked void store_release_8(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"w1 = 0x12;"
+	".8byte %[store_release_insn];" // store_release((u8 *)(r10 - 1), w1);
+	"w2 = *(u8 *)(r10 - 1);"
+	"if r2 == r1 goto 1f;"
+	"r0 = 1;"
+"1:"
+	"exit;"
+	:
+	: __imm_insn(store_release_insn,
+		     BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -1))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, 16-bit")
+__success __success_unpriv __retval(0)
+__naked void store_release_16(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"w1 = 0x1234;"
+	".8byte %[store_release_insn];" // store_release((u16 *)(r10 - 2), w1);
+	"w2 = *(u16 *)(r10 - 2);"
+	"if r2 == r1 goto 1f;"
+	"r0 = 1;"
+"1:"
+	"exit;"
+	:
+	: __imm_insn(store_release_insn,
+		     BPF_ATOMIC_OP(BPF_H, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -2))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, 32-bit")
+__success __success_unpriv __retval(0)
+__naked void store_release_32(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"w1 = 0x12345678;"
+	".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 4), w1);
+	"w2 = *(u32 *)(r10 - 4);"
+	"if r2 == r1 goto 1f;"
+	"r0 = 1;"
+"1:"
+	"exit;"
+	:
+	: __imm_insn(store_release_insn,
+		     BPF_ATOMIC_OP(BPF_W, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -4))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, 64-bit")
+__success __success_unpriv __retval(0)
+__naked void store_release_64(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"r1 = 0x1234567890abcdef ll;"
+	".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1);
+	"r2 = *(u64 *)(r10 - 8);"
+	"if r2 == r1 goto 1f;"
+	"r0 = 1;"
+"1:"
+	"exit;"
+	:
+	: __imm_insn(store_release_insn,
+		     BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with uninitialized src_reg")
+__failure __failure_unpriv __msg("R2 !read_ok")
+__naked void store_release_with_uninitialized_src_reg(void)
+{
+	asm volatile (
+	".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r2);
+	"exit;"
+	:
+	: __imm_insn(store_release_insn,
+		     BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_2, -8))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with uninitialized dst_reg")
+__failure __failure_unpriv __msg("R2 !read_ok")
+__naked void store_release_with_uninitialized_dst_reg(void)
+{
+	asm volatile (
+	"r1 = 0;"
+	".8byte %[store_release_insn];" // store_release((u64 *)(r2 - 8), r1);
+	"exit;"
+	:
+	: __imm_insn(store_release_insn,
+		     BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_2, BPF_REG_1, -8))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with non-pointer dst_reg")
+__failure __failure_unpriv __msg("R1 invalid mem access 'scalar'")
+__naked void store_release_with_non_pointer_dst_reg(void)
+{
+	asm volatile (
+	"r1 = 0;"
+	".8byte %[store_release_insn];" // store_release((u64 *)(r1 + 0), r1);
+	"exit;"
+	:
+	: __imm_insn(store_release_insn,
+		     BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_1, BPF_REG_1, 0))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("misaligned store-release")
+__failure __failure_unpriv __msg("misaligned stack access off")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void store_release_misaligned(void)
+{
+	asm volatile (
+	"w0 = 0;"
+	".8byte %[store_release_insn];" // store_release((u32 *)(r10 - 5), w0);
+	"exit;"
+	:
+	: __imm_insn(store_release_insn,
+		     BPF_ATOMIC_OP(BPF_W, BPF_STORE_REL, BPF_REG_10, BPF_REG_0, -5))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("store-release to ctx pointer")
+__failure __failure_unpriv __msg("BPF_ATOMIC stores into R1 ctx is not allowed")
+__naked void store_release_to_ctx_pointer(void)
+{
+	asm volatile (
+	"w0 = 0;"
+	// store_release((u8 *)(r1 + offsetof(struct __sk_buff, cb[0])), w0);
+	".8byte %[store_release_insn];"
+	"exit;"
+	:
+	: __imm_insn(store_release_insn,
+		     BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_1, BPF_REG_0,
+				   offsetof(struct __sk_buff, cb[0])))
+	: __clobber_all);
+}
+
+SEC("xdp")
+__description("store-release to pkt pointer")
+__failure __msg("BPF_ATOMIC stores into R2 pkt is not allowed")
+__naked void store_release_to_pkt_pointer(void)
+{
+	asm volatile (
+	"w0 = 0;"
+	"r2 = *(u32 *)(r1 + %[xdp_md_data]);"
+	"r3 = *(u32 *)(r1 + %[xdp_md_data_end]);"
+	"r1 = r2;"
+	"r1 += 8;"
+	"if r1 >= r3 goto l0_%=;"
+	".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0);
+"l0_%=:  r0 = 0;"
+	"exit;"
+	:
+	: __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+	  __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+	  __imm_insn(store_release_insn,
+		     BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0))
+	: __clobber_all);
+}
+
+SEC("flow_dissector")
+__description("store-release to flow_keys pointer")
+__failure __msg("BPF_ATOMIC stores into R2 flow_keys is not allowed")
+__naked void store_release_to_flow_keys_pointer(void)
+{
+	asm volatile (
+	"w0 = 0;"
+	"r2 = *(u64 *)(r1 + %[__sk_buff_flow_keys]);"
+	".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0);
+	"exit;"
+	:
+	: __imm_const(__sk_buff_flow_keys,
+		      offsetof(struct __sk_buff, flow_keys)),
+	  __imm_insn(store_release_insn,
+		     BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0))
+	: __clobber_all);
+}
+
+SEC("sk_reuseport")
+__description("store-release to sock pointer")
+__failure __msg("R2 cannot write into sock")
+__naked void store_release_to_sock_pointer(void)
+{
+	asm volatile (
+	"w0 = 0;"
+	"r2 = *(u64 *)(r1 + %[sk_reuseport_md_sk]);"
+	".8byte %[store_release_insn];" // store_release((u8 *)(r2 + 0), w0);
+	"exit;"
+	:
+	: __imm_const(sk_reuseport_md_sk, offsetof(struct sk_reuseport_md, sk)),
+	  __imm_insn(store_release_insn,
+		     BPF_ATOMIC_OP(BPF_B, BPF_STORE_REL, BPF_REG_2, BPF_REG_0, 0))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("store-release, leak pointer to stack")
+__success __success_unpriv __retval(0)
+__naked void store_release_leak_pointer_to_stack(void)
+{
+	asm volatile (
+	".8byte %[store_release_insn];" // store_release((u64 *)(r10 - 8), r1);
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm_insn(store_release_insn,
+		     BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_10, BPF_REG_1, -8))
+	: __clobber_all);
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, long long);
+	__type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("store-release, leak pointer to map")
+__success __retval(0)
+__failure_unpriv __msg_unpriv("R6 leaks addr into map")
+__naked void store_release_leak_pointer_to_map(void)
+{
+	asm volatile (
+	"r6 = r1;"
+	"r1 = %[map_hash_8b] ll;"
+	"r2 = 0;"
+	"*(u64 *)(r10 - 8) = r2;"
+	"r2 = r10;"
+	"r2 += -8;"
+	"call %[bpf_map_lookup_elem];"
+	"if r0 == 0 goto l0_%=;"
+	".8byte %[store_release_insn];" // store_release((u64 *)(r0 + 0), r6);
+"l0_%=:"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm_addr(map_hash_8b),
+	  __imm(bpf_map_lookup_elem),
+	  __imm_insn(store_release_insn,
+		     BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, BPF_REG_0, BPF_REG_6, 0))
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("store-release with invalid register R15")
+__failure __failure_unpriv __msg("R15 is invalid")
+__naked void store_release_with_invalid_reg(void)
+{
+	asm volatile (
+	".8byte %[store_release_insn];" // store_release((u64 *)(r15 + 0), r1);
+	"exit;"
+	:
+	: __imm_insn(store_release_insn,
+		     BPF_ATOMIC_OP(BPF_DW, BPF_STORE_REL, 15 /* invalid reg */, BPF_REG_1, 0))
+	: __clobber_all);
+}
+
+#else /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+SEC("socket")
+__description("Clang version < 18, ENABLE_ATOMICS_TESTS not defined, and/or JIT doesn't support store-release, use a dummy test")
+__success
+int dummy_test(void)
+{
+	return 0;
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index 9d415f7ce599..61886ed554de 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -105,7 +105,7 @@ __msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4")
 __msg("mark_precise: frame0: regs=r0 stack= before 3: (57) r0 &= 3")
 __msg("mark_precise: frame0: regs=r0 stack= before 10: (95) exit")
 __msg("mark_precise: frame1: regs=r0 stack= before 9: (bf) r0 = (s8)r10")
-__msg("7: R0_w=scalar")
+__msg("7: R0=scalar")
 __naked int fp_precise_subprog_result(void)
 {
 	asm volatile (
@@ -141,7 +141,7 @@ __msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = (s8)r1")
  * anyways, at which point we'll break precision chain
  */
 __msg("mark_precise: frame1: regs=r1 stack= before 9: (bf) r1 = r10")
-__msg("7: R0_w=scalar")
+__msg("7: R0=scalar")
 __naked int sneaky_fp_precise_subprog_result(void)
 {
 	asm volatile (
@@ -681,7 +681,7 @@ __msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1")
 __msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8")
 __msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4")
 __msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)")
-__msg("mark_precise: frame0: parent state regs= stack=-8:  R0_w=2 R6_w=1 R8_rw=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8_rw=P1")
+__msg("mark_precise: frame0: parent state regs= stack=-8:  R0=2 R6=1 R8=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8=P1")
 __msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7")
 __msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
 __msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2")
@@ -793,4 +793,57 @@ __naked int stack_slot_aliases_precision(void)
 	);
 }
 
+struct {
+        __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+        __uint(max_entries, 1);
+        __type(key, __u32);
+        __type(value, __u32);
+} map_array SEC(".maps");
+
+__naked __noinline __used
+static unsigned long identity_tail_call(void)
+{
+	/* the simplest identity function involving a tail call */
+        asm volatile (
+		"r6 = r2;"
+		"r2 = %[map_array] ll;"
+		"r3 = 0;"
+		"call %[bpf_tail_call];"
+		"r0 = r6;"
+		"exit;"
+		:
+		: __imm(bpf_tail_call),
+		  __imm_addr(map_array)
+		: __clobber_all);
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+__msg("13: (85) call bpf_tail_call#12")
+__msg("mark_precise: frame1: last_idx 13 first_idx 0 subseq_idx -1 ")
+__msg("returning from callee:")
+__msg("frame1: R0=scalar() R6=3 R10=fp0")
+__msg("to caller at 4:")
+__msg("R0=scalar() R6=map_value(map=.data.vals,ks=4,vs=16) R10=fp0")
+__msg("6: (0f) r1 += r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4")
+__msg("mark_precise: frame0: parent state regs=r0 stack=:  R0=Pscalar() R6=map_value(map=.data.vals,ks=4,vs=16) R10=fp0")
+__msg("math between map_value pointer and register with unbounded min value is not allowed")
+__naked int subprog_result_tail_call(void)
+{
+	asm volatile (
+		"r2 = 3;"
+		"call identity_tail_call;"
+		"r0 *= 4;"
+		"r1 = %[vals];"
+		"r1 += r0;"
+		"r0 = *(u32 *)(r1 + 0);"
+		"exit;"
+		:
+		: __imm_ptr(vals)
+		: __clobber_common
+	);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_tailcall.c b/tools/testing/selftests/bpf/progs/verifier_tailcall.c
new file mode 100644
index 000000000000..b4acce60fb9b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_tailcall.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} map_array SEC(".maps");
+
+SEC("socket")
+__description("invalid map type for tail call")
+__failure __msg("expected prog array map for tail call")
+__failure_unpriv
+__naked void invalid_map_for_tail_call(void)
+{
+	asm volatile ("			\
+	r2 = %[map_array] ll;	\
+	r3 = 0;				\
+	call %[bpf_tail_call];		\
+	exit;				\
+"	:
+	: __imm(bpf_tail_call),
+	  __imm_addr(map_array)
+	: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
index 7ea535bfbacd..28b4f7035ceb 100644
--- a/tools/testing/selftests/bpf/progs/verifier_unpriv.c
+++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
@@ -199,7 +199,7 @@ __naked void pass_pointer_to_helper_function(void)
 SEC("socket")
 __description("unpriv: indirectly pass pointer on stack to helper function")
 __success __failure_unpriv
-__msg_unpriv("invalid indirect read from stack R2 off -8+0 size 8")
+__msg_unpriv("invalid read from stack R2 off -8+0 size 8")
 __retval(0)
 __naked void on_stack_to_helper_function(void)
 {
@@ -572,8 +572,14 @@ l0_%=:	exit;						\
 
 SEC("socket")
 __description("alu32: mov u32 const")
-__success __failure_unpriv __msg_unpriv("R7 invalid mem access 'scalar'")
+__success __success_unpriv
 __retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r0 == 0x0 goto pc+2")
+__xlated_unpriv("nospec") /* inserted to prevent `R7 invalid mem access 'scalar'` */
+__xlated_unpriv("goto pc-1") /* sanitized dead code */
+__xlated_unpriv("exit")
+#endif
 __naked void alu32_mov_u32_const(void)
 {
 	asm volatile ("					\
@@ -619,12 +625,11 @@ __naked void pass_pointer_to_tail_call(void)
 
 SEC("socket")
 __description("unpriv: cmp map pointer with zero")
-__success __failure_unpriv __msg_unpriv("R1 pointer comparison")
+__success __success_unpriv
 __retval(0)
 __naked void cmp_map_pointer_with_zero(void)
 {
 	asm volatile ("					\
-	r1 = 0;						\
 	r1 = %[map_hash_8b] ll;				\
 	if r1 == 0 goto l0_%=;				\
 l0_%=:	r0 = 0;						\
@@ -635,6 +640,22 @@ l0_%=:	r0 = 0;						\
 }
 
 SEC("socket")
+__description("unpriv: cmp map pointer with const")
+__success __failure_unpriv __msg_unpriv("R1 pointer comparison prohibited")
+__retval(0)
+__naked void cmp_map_pointer_with_const(void)
+{
+	asm volatile ("					\
+	r1 = %[map_hash_8b] ll;				\
+	if r1 == 0x0000beef goto l0_%=;			\
+l0_%=:	r0 = 0;						\
+	exit;						\
+"	:
+	: __imm_addr(map_hash_8b)
+	: __clobber_all);
+}
+
+SEC("socket")
 __description("unpriv: write into frame pointer")
 __failure __msg("frame pointer is read only")
 __failure_unpriv
@@ -723,4 +744,210 @@ l0_%=:	r0 = 0;						\
 "	::: __clobber_all);
 }
 
+SEC("socket")
+__description("unpriv: Spectre v1 path-based type confusion of scalar as stack-ptr")
+__success __success_unpriv __retval(0)
+#ifdef SPEC_V1
+__xlated_unpriv("if r0 != 0x1 goto pc+2")
+/* This nospec prevents the exploit because it forces the mispredicted (not
+ * taken) `if r0 != 0x0 goto l0_%=` to resolve before using r6 as a pointer.
+ * This causes the CPU to realize that `r6 = r9` should have never executed. It
+ * ensures that r6 always contains a readable stack slot ptr when the insn after
+ * the nospec executes.
+ */
+__xlated_unpriv("nospec")
+__xlated_unpriv("r9 = *(u8 *)(r6 +0)")
+#endif
+__naked void unpriv_spec_v1_type_confusion(void)
+{
+	asm volatile ("					\
+	r1 = 0;						\
+	*(u64*)(r10 - 8) = r1;				\
+	r2 = r10;					\
+	r2 += -8;					\
+	r1 = %[map_hash_8b] ll;				\
+	call %[bpf_map_lookup_elem];			\
+	if r0 == 0 goto l2_%=;				\
+	/* r0: pointer to a map array entry */		\
+	r2 = r10;					\
+	r2 += -8;					\
+	r1 = %[map_hash_8b] ll;				\
+	/* r1, r2: prepared call args */		\
+	r6 = r10;					\
+	r6 += -8;					\
+	/* r6: pointer to readable stack slot */	\
+	r9 = 0xffffc900;				\
+	r9 <<= 32;					\
+	/* r9: scalar controlled by attacker */		\
+	r0 = *(u64 *)(r0 + 0); /* cache miss */		\
+	if r0 != 0x0 goto l0_%=;			\
+	r6 = r9;					\
+l0_%=:	if r0 != 0x1 goto l1_%=;			\
+	r9 = *(u8 *)(r6 + 0);				\
+l1_%=:  /* leak r9 */					\
+	r9 &= 1;					\
+	r9 <<= 9;					\
+	*(u64*)(r10 - 8) = r9;				\
+	call %[bpf_map_lookup_elem];			\
+	if r0 == 0 goto l2_%=;				\
+	/* leak secret into is_cached(map[0|512]): */	\
+	r0 = *(u64 *)(r0 + 0);				\
+l2_%=:							\
+	r0 = 0;						\
+	exit;						\
+"	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm_addr(map_hash_8b)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: ldimm64 before Spectre v4 barrier")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V4
+__xlated_unpriv("r1 = 0x2020200005642020") /* should not matter */
+__xlated_unpriv("*(u64 *)(r10 -8) = r1")
+__xlated_unpriv("nospec")
+#endif
+__naked void unpriv_ldimm64_spectre_v4(void)
+{
+	asm volatile ("					\
+	r1 = 0x2020200005642020 ll;			\
+	*(u64 *)(r10 -8) = r1;				\
+	r0 = 0;						\
+	exit;						\
+"	::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: Spectre v1 and v4 barrier")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+#ifdef SPEC_V4
+/* starts with r0 == r8 == r9 == 0 */
+__xlated_unpriv("if r8 != 0x0 goto pc+1")
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("if r9 == 0x0 goto pc+4")
+__xlated_unpriv("r2 = r0")
+/* Following nospec required to prevent following dangerous `*(u64 *)(NOT_FP -64)
+ * = r1` iff `if r9 == 0 goto pc+4` was mispredicted because of Spectre v1. The
+ * test therefore ensures the Spectre-v4--induced nospec does not prevent the
+ * Spectre-v1--induced speculative path from being fully analyzed.
+ */
+__xlated_unpriv("nospec") /* Spectre v1 */
+__xlated_unpriv("*(u64 *)(r2 -64) = r1") /* could be used to leak r2 */
+__xlated_unpriv("nospec") /* Spectre v4 */
+#endif
+#endif
+__naked void unpriv_spectre_v1_and_v4(void)
+{
+	asm volatile ("					\
+	r1 = 0;						\
+	*(u64*)(r10 - 8) = r1;				\
+	r2 = r10;					\
+	r2 += -8;					\
+	r1 = %[map_hash_8b] ll;				\
+	call %[bpf_map_lookup_elem];			\
+	r8 = r0;					\
+	r2 = r10;					\
+	r2 += -8;					\
+	r1 = %[map_hash_8b] ll;				\
+	call %[bpf_map_lookup_elem];			\
+	r9 = r0;					\
+	r0 = r10;					\
+	r1 = 0;						\
+	r2 = r10;					\
+	if r8 != 0 goto l0_%=;				\
+	if r9 != 0 goto l0_%=;				\
+	r0 = 0;						\
+l0_%=:	if r8 != 0 goto l1_%=;				\
+	goto l2_%=;					\
+l1_%=:	if r9 == 0 goto l3_%=;				\
+	r2 = r0;					\
+l2_%=:	*(u64 *)(r2 -64) = r1;				\
+l3_%=:	r0 = 0;						\
+	exit;						\
+"	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm_addr(map_hash_8b)
+	: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: Spectre v1 and v4 barrier (simple)")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+#ifdef SPEC_V4
+__xlated_unpriv("if r8 != 0x0 goto pc+1")
+__xlated_unpriv("goto pc+2")
+__xlated_unpriv("goto pc-1") /* if r9 == 0 goto l3_%= */
+__xlated_unpriv("goto pc-1") /* r2 = r0 */
+__xlated_unpriv("nospec")
+__xlated_unpriv("*(u64 *)(r2 -64) = r1")
+__xlated_unpriv("nospec")
+#endif
+#endif
+__naked void unpriv_spectre_v1_and_v4_simple(void)
+{
+	asm volatile ("					\
+	r8 = 0;						\
+	r9 = 0;						\
+	r0 = r10;					\
+	r1 = 0;						\
+	r2 = r10;					\
+	if r8 != 0 goto l0_%=;				\
+	if r9 != 0 goto l0_%=;				\
+	r0 = 0;						\
+l0_%=:	if r8 != 0 goto l1_%=;				\
+	goto l2_%=;					\
+l1_%=:	if r9 == 0 goto l3_%=;				\
+	r2 = r0;					\
+l2_%=:	*(u64 *)(r2 -64) = r1;				\
+l3_%=:	r0 = 0;						\
+	exit;						\
+"	::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: ldimm64 before Spectre v1 and v4 barrier (simple)")
+__success __success_unpriv
+__retval(0)
+#ifdef SPEC_V1
+#ifdef SPEC_V4
+__xlated_unpriv("if r8 != 0x0 goto pc+1")
+__xlated_unpriv("goto pc+4")
+__xlated_unpriv("goto pc-1") /* if r9 == 0 goto l3_%= */
+__xlated_unpriv("goto pc-1") /* r2 = r0 */
+__xlated_unpriv("goto pc-1") /* r1 = 0x2020200005642020 ll */
+__xlated_unpriv("goto pc-1") /* second part of ldimm64 */
+__xlated_unpriv("nospec")
+__xlated_unpriv("*(u64 *)(r2 -64) = r1")
+__xlated_unpriv("nospec")
+#endif
+#endif
+__naked void unpriv_ldimm64_spectre_v1_and_v4_simple(void)
+{
+	asm volatile ("					\
+	r8 = 0;						\
+	r9 = 0;						\
+	r0 = r10;					\
+	r1 = 0;						\
+	r2 = r10;					\
+	if r8 != 0 goto l0_%=;				\
+	if r9 != 0 goto l0_%=;				\
+	r0 = 0;						\
+l0_%=:	if r8 != 0 goto l1_%=;				\
+	goto l2_%=;					\
+l1_%=:	if r9 == 0 goto l3_%=;				\
+	r2 = r0;					\
+	r1 = 0x2020200005642020 ll;			\
+l2_%=:	*(u64 *)(r2 -64) = r1;				\
+l3_%=:	r0 = 0;						\
+	exit;						\
+"	::: __clobber_all);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c
index a9ab37d3b9e2..2129e4353fd9 100644
--- a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c
+++ b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c
@@ -3,6 +3,7 @@
 
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
 #include "bpf_misc.h"
 
 #define MAX_ENTRIES 11
@@ -146,6 +147,24 @@ l0_%=:	exit;						\
 	: __clobber_all);
 }
 
+SEC("socket")
+__description("map_ptr illegal alu op, map_ptr = -map_ptr")
+__failure __msg("R0 invalid mem access 'scalar'")
+__failure_unpriv __msg_unpriv("R0 pointer arithmetic prohibited")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void map_ptr_illegal_alu_op(void)
+{
+	asm volatile ("					\
+	r0 = %[map_hash_48b] ll;			\
+	r0 = -r0;					\
+	r1 = 22;					\
+	*(u64*)(r0 + 0) = r1;				\
+	exit;						\
+"	:
+	: __imm_addr(map_hash_48b)
+	: __clobber_all);
+}
+
 SEC("flow_dissector")
 __description("flow_keys illegal alu op with variable offset")
 __failure __msg("R7 pointer arithmetic on flow_keys prohibited")
@@ -165,4 +184,32 @@ __naked void flow_keys_illegal_variable_offset_alu(void)
 	: __clobber_all);
 }
 
+#define DEFINE_BAD_OFFSET_TEST(name, op, off, imm)	\
+	SEC("socket")					\
+	__failure __msg("BPF_ALU uses reserved fields") \
+	__naked void name(void)				\
+	{						\
+		asm volatile(				\
+			"r0 = 1;"			\
+			".8byte %[insn];"		\
+			"r0 = 0;"			\
+			"exit;"				\
+		:					\
+		: __imm_insn(insn, BPF_RAW_INSN((op), 0, 0, (off), (imm))) \
+		: __clobber_all);			\
+	}
+
+/*
+ * Offset fields of 0 and 1 are legal for BPF_{DIV,MOD} instructions.
+ * Offset fields of 0 are legal for the rest of ALU instructions.
+ * Test that error is reported for illegal offsets, assuming that tests
+ * for legal offsets exist.
+ */
+DEFINE_BAD_OFFSET_TEST(bad_offset_divx, BPF_ALU64 | BPF_DIV | BPF_X, -1, 0)
+DEFINE_BAD_OFFSET_TEST(bad_offset_modk, BPF_ALU64 | BPF_MOD | BPF_K, -1, 1)
+DEFINE_BAD_OFFSET_TEST(bad_offset_addx, BPF_ALU64 | BPF_ADD | BPF_X, -1, 0)
+DEFINE_BAD_OFFSET_TEST(bad_offset_divx2, BPF_ALU64 | BPF_DIV | BPF_X, 2, 0)
+DEFINE_BAD_OFFSET_TEST(bad_offset_modk2, BPF_ALU64 | BPF_MOD | BPF_K, 2, 1)
+DEFINE_BAD_OFFSET_TEST(bad_offset_addx2, BPF_ALU64 | BPF_ADD | BPF_X, 1, 0)
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
index 5ba6e53571c8..af7938ce56cb 100644
--- a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
@@ -231,6 +231,10 @@ __retval(1)
 __naked void ptr_unknown_vs_unknown_lt(void)
 {
 	asm volatile ("					\
+	r8 = r1;					\
+	call %[bpf_get_prandom_u32];			\
+	r9 = r0;					\
+	r1 = r8;					\
 	r0 = *(u32*)(r1 + %[__sk_buff_len]);		\
 	r1 = 0;						\
 	*(u64*)(r10 - 8) = r1;				\
@@ -245,11 +249,11 @@ l1_%=:	call %[bpf_map_lookup_elem];			\
 	r4 = *(u8*)(r0 + 0);				\
 	if r4 == 1 goto l3_%=;				\
 	r1 = 6;						\
-	r1 = -r1;					\
+	r1 = r9;					\
 	r1 &= 0x3;					\
 	goto l4_%=;					\
 l3_%=:	r1 = 6;						\
-	r1 = -r1;					\
+	r1 = r9;					\
 	r1 &= 0x7;					\
 l4_%=:	r1 += r0;					\
 	r0 = *(u8*)(r1 + 0);				\
@@ -259,7 +263,8 @@ l2_%=:	r0 = 1;						\
 	: __imm(bpf_map_lookup_elem),
 	  __imm_addr(map_array_48b),
 	  __imm_addr(map_hash_16b),
-	  __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+	  __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)),
+	  __imm(bpf_get_prandom_u32)
 	: __clobber_all);
 }
 
@@ -271,6 +276,10 @@ __retval(1)
 __naked void ptr_unknown_vs_unknown_gt(void)
 {
 	asm volatile ("					\
+	r8 = r1;					\
+	call %[bpf_get_prandom_u32];			\
+	r9 = r0;					\
+	r1 = r8;					\
 	r0 = *(u32*)(r1 + %[__sk_buff_len]);		\
 	r1 = 0;						\
 	*(u64*)(r10 - 8) = r1;				\
@@ -285,11 +294,11 @@ l1_%=:	call %[bpf_map_lookup_elem];			\
 	r4 = *(u8*)(r0 + 0);				\
 	if r4 == 1 goto l3_%=;				\
 	r1 = 6;						\
-	r1 = -r1;					\
+	r1 = r9;					\
 	r1 &= 0x7;					\
 	goto l4_%=;					\
 l3_%=:	r1 = 6;						\
-	r1 = -r1;					\
+	r1 = r9;					\
 	r1 &= 0x3;					\
 l4_%=:	r1 += r0;					\
 	r0 = *(u8*)(r1 + 0);				\
@@ -299,7 +308,8 @@ l2_%=:	r0 = 1;						\
 	: __imm(bpf_map_lookup_elem),
 	  __imm_addr(map_array_48b),
 	  __imm_addr(map_hash_16b),
-	  __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+	  __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)),
+	  __imm(bpf_get_prandom_u32)
 	: __clobber_all);
 }
 
@@ -398,7 +408,8 @@ l2_%=:	r0 = 1;						\
 
 SEC("socket")
 __description("map access: mixing value pointer and scalar, 1")
-__success __failure_unpriv __msg_unpriv("R2 pointer comparison prohibited")
+__success __failure_unpriv
+__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root")
 __retval(0)
 __naked void value_pointer_and_scalar_1(void)
 {
@@ -433,6 +444,7 @@ l2_%=:	/* common instruction */			\
 l3_%=:	/* branch B */					\
 	r0 = 0x13371337;				\
 	/* verifier follows fall-through */		\
+	/* unpriv: nospec (inserted to prevent `R2 pointer comparison prohibited`) */\
 	if r2 != 0x100000 goto l4_%=;			\
 	r0 = 0;						\
 	exit;						\
@@ -450,7 +462,8 @@ l4_%=:	/* fake-dead code; targeted from branch A to	\
 
 SEC("socket")
 __description("map access: mixing value pointer and scalar, 2")
-__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'")
+__success __failure_unpriv
+__msg_unpriv("R2 tried to add from different maps, paths or scalars, pointer arithmetic with it prohibited for !root")
 __retval(0)
 __naked void value_pointer_and_scalar_2(void)
 {
@@ -492,6 +505,7 @@ l4_%=:	/* fake-dead code; targeted from branch A to	\
 	 * prevent dead code sanitization, rejected	\
 	 * via branch B however				\
 	 */						\
+	/* unpriv: nospec (inserted to prevent `R0 invalid mem access 'scalar'`) */\
 	r0 = *(u8*)(r0 + 0);				\
 	r0 = 0;						\
 	exit;						\
@@ -1296,9 +1310,13 @@ l0_%=:	r0 = 1;						\
 
 SEC("socket")
 __description("map access: value_ptr -= unknown scalar, 2")
-__success __failure_unpriv
-__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__success __success_unpriv
 __retval(1)
+#ifdef SPEC_V1
+__xlated_unpriv("r1 &= 7")
+__xlated_unpriv("nospec") /* inserted to prevent `R0 pointer arithmetic of map value goes out of range` */
+__xlated_unpriv("r0 -= r1")
+#endif
 __naked void value_ptr_unknown_scalar_2_2(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c
index c810f4f6f479..f345466bca68 100644
--- a/tools/testing/selftests/bpf/progs/verifier_var_off.c
+++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c
@@ -114,8 +114,8 @@ __naked void stack_write_priv_vs_unpriv(void)
 }
 
 /* Similar to the previous test, but this time also perform a read from the
- * address written to with a variable offset. The read is allowed, showing that,
- * after a variable-offset write, a priviledged program can read the slots that
+ * address written to with a variable offet. The read is allowed, showing that,
+ * after a variable-offset write, a privileged program can read the slots that
  * were in the range of that write (even if the verifier doesn't actually know if
  * the slot being read was really written to or not.
  *
@@ -157,7 +157,7 @@ __naked void stack_write_followed_by_read(void)
 SEC("socket")
 __description("variable-offset stack write clobbers spilled regs")
 __failure
-/* In the priviledged case, dereferencing a spilled-and-then-filled
+/* In the privileged case, dereferencing a spilled-and-then-filled
  * register is rejected because the previous variable offset stack
  * write might have overwritten the spilled pointer (i.e. we lose track
  * of the spilled register when we analyze the write).
@@ -203,7 +203,7 @@ __naked void stack_write_clobbers_spilled_regs(void)
 
 SEC("sockops")
 __description("indirect variable-offset stack access, unbounded")
-__failure __msg("invalid unbounded variable-offset indirect access to stack R4")
+__failure __msg("invalid unbounded variable-offset write to stack R4")
 __naked void variable_offset_stack_access_unbounded(void)
 {
 	asm volatile ("					\
@@ -236,7 +236,7 @@ l0_%=:	r0 = 0;						\
 
 SEC("lwt_in")
 __description("indirect variable-offset stack access, max out of bound")
-__failure __msg("invalid variable-offset indirect access to stack R2")
+__failure __msg("invalid variable-offset read from stack R2")
 __naked void access_max_out_of_bound(void)
 {
 	asm volatile ("					\
@@ -269,7 +269,7 @@ __naked void access_max_out_of_bound(void)
  */
 SEC("socket")
 __description("indirect variable-offset stack access, zero-sized, max out of bound")
-__failure __msg("invalid variable-offset indirect access to stack R1")
+__failure __msg("invalid variable-offset write to stack R1")
 __naked void zero_sized_access_max_out_of_bound(void)
 {
 	asm volatile ("                      \
@@ -294,7 +294,7 @@ __naked void zero_sized_access_max_out_of_bound(void)
 
 SEC("lwt_in")
 __description("indirect variable-offset stack access, min out of bound")
-__failure __msg("invalid variable-offset indirect access to stack R2")
+__failure __msg("invalid variable-offset read from stack R2")
 __naked void access_min_out_of_bound(void)
 {
 	asm volatile ("					\
diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
index a7c0a553aa50..55398c04290a 100644
--- a/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
+++ b/tools/testing/selftests/bpf/progs/verifier_vfs_accept.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2024 Google LLC. */
 
 #include <vmlinux.h>
+#include <errno.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
@@ -69,7 +70,7 @@ __success
 int BPF_PROG(path_d_path_from_file_argument, struct file *file)
 {
 	int ret;
-	struct path *path;
+	const struct path *path;
 
 	/* The f_path member is a path which is embedded directly within a
 	 * file. Therefore, a pointer to such embedded members are still
@@ -82,4 +83,21 @@ int BPF_PROG(path_d_path_from_file_argument, struct file *file)
 	return 0;
 }
 
+SEC("lsm.s/inode_rename")
+__success
+int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
+	     struct inode *new_dir, struct dentry *new_dentry,
+	     unsigned int flags)
+{
+	struct inode *inode = new_dentry->d_inode;
+	ino_t ino;
+
+	if (!inode)
+		return 0;
+	ino = inode->i_ino;
+	if (ino == 0)
+		return -EACCES;
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c
index d6d3f4fcb24c..4b392c6c8fc4 100644
--- a/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c
+++ b/tools/testing/selftests/bpf/progs/verifier_vfs_reject.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2024 Google LLC. */
 
 #include <vmlinux.h>
+#include <errno.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 #include <linux/limits.h>
@@ -158,4 +159,18 @@ int BPF_PROG(path_d_path_kfunc_non_lsm, struct path *path, struct file *f)
 	return 0;
 }
 
+SEC("lsm.s/inode_rename")
+__failure __msg("invalid mem access 'trusted_ptr_or_null_'")
+int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
+	     struct inode *new_dir, struct dentry *new_dentry,
+	     unsigned int flags)
+{
+	struct inode *inode = new_dentry->d_inode;
+	ino_t ino;
+
+	ino = inode->i_ino;
+	if (ino == 0)
+		return -EACCES;
+	return 0;
+}
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/wq.c b/tools/testing/selftests/bpf/progs/wq.c
index f8d3ae0c29ae..25be2cd9d42c 100644
--- a/tools/testing/selftests/bpf/progs/wq.c
+++ b/tools/testing/selftests/bpf/progs/wq.c
@@ -5,7 +5,7 @@
 #include "bpf_experimental.h"
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -187,3 +187,20 @@ long test_call_lru_sleepable(void *ctx)
 
 	return test_elem_callback(&lru, &key, wq_callback);
 }
+
+SEC("tc")
+long test_map_no_btf(void *ctx)
+{
+	struct elem *val;
+	struct bpf_wq *wq;
+	int key = 42;
+
+	val = bpf_map_lookup_elem(&array, &key);
+	if (!val)
+		return -2;
+
+	wq = &val->w;
+	if (bpf_wq_init(wq, &array, 0) != 0)
+		return -3;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c
index 25b51a72fe0f..d06f6d40594a 100644
--- a/tools/testing/selftests/bpf/progs/wq_failures.c
+++ b/tools/testing/selftests/bpf/progs/wq_failures.c
@@ -5,7 +5,7 @@
 #include "bpf_experimental.h"
 #include <bpf/bpf_helpers.h>
 #include "bpf_misc.h"
-#include "../bpf_testmod/bpf_testmod_kfunc.h"
+#include "../test_kmods/bpf_testmod_kfunc.h"
 
 char _license[] SEC("license") = "GPL";
 
@@ -142,3 +142,26 @@ long test_wrong_wq_pointer_offset(void *ctx)
 
 	return -22;
 }
+
+SEC("tc")
+__log_level(2)
+__failure
+__msg(": (85) call bpf_wq_init#")
+__msg("R1 doesn't have constant offset. bpf_wq has to be at the constant offset")
+long test_bad_wq_off(void *ctx)
+{
+	struct elem *val;
+	struct bpf_wq *wq;
+	int key = 42;
+	u64 unknown;
+
+	val = bpf_map_lookup_elem(&array, &key);
+	if (!val)
+		return -2;
+
+	unknown = bpf_get_prandom_u32();
+	wq = &val->w + unknown;
+	if (bpf_wq_init(wq, &array, 0) != 0)
+		return -3;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c
index 31ca229bb3c0..09bb8a038d52 100644
--- a/tools/testing/selftests/bpf/progs/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c
@@ -19,6 +19,13 @@ struct {
 	__type(value, __u32);
 } prog_arr SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_devmap_val));
+	__uint(max_entries, 1);
+} dev_map SEC(".maps");
+
 extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
 					 __u64 *timestamp) __ksym;
 extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
@@ -95,4 +102,10 @@ int rx(struct xdp_md *ctx)
 	return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
 }
 
+SEC("xdp")
+int redirect(struct xdp_md *ctx)
+{
+	return bpf_redirect_map(&dev_map, ctx->rx_queue_index, XDP_PASS);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
index 682dda8dabbc..50c8958f94e5 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_map.c
@@ -1,7 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 
+#include <linux/if_ether.h>
+
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 struct {
 	__uint(type, BPF_MAP_TYPE_DEVMAP);
@@ -28,4 +31,89 @@ int xdp_redirect_map_2(struct xdp_md *xdp)
 	return bpf_redirect_map(&tx_port, 2, 0);
 }
 
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 3);
+	__type(key, __u32);
+	__type(value, __u64);
+} rxcnt SEC(".maps");
+
+static int xdp_count(struct xdp_md *xdp, __u32 key)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct ethhdr *eth = data;
+	__u64 *count;
+
+	if (data + sizeof(*eth) > data_end)
+		return XDP_DROP;
+
+	if (bpf_htons(eth->h_proto) == ETH_P_IP) {
+		/* We only count IPv4 packets */
+		count = bpf_map_lookup_elem(&rxcnt, &key);
+		if (count)
+			*count += 1;
+	}
+
+	return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_count_0(struct xdp_md *xdp)
+{
+	return xdp_count(xdp, 0);
+}
+
+SEC("xdp")
+int xdp_count_1(struct xdp_md *xdp)
+{
+	return xdp_count(xdp, 1);
+}
+
+SEC("xdp")
+int xdp_count_2(struct xdp_md *xdp)
+{
+	return xdp_count(xdp, 2);
+}
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, __be64);
+} rx_mac SEC(".maps");
+
+static int store_mac(struct xdp_md *xdp, __u32 id)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	struct ethhdr *eth = data;
+	__u32 key = id;
+	__be64 mac = 0;
+
+	if (data + sizeof(*eth) > data_end)
+		return XDP_DROP;
+
+	/* Only store IPv4 MAC to avoid being polluted by IPv6 packets */
+	if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+		__builtin_memcpy(&mac, eth->h_source, ETH_ALEN);
+		bpf_map_update_elem(&rx_mac, &key, &mac, 0);
+		bpf_printk("%s - %x", __func__, mac);
+	}
+
+	return XDP_PASS;
+}
+
+SEC("xdp")
+int store_mac_1(struct xdp_md *xdp)
+{
+	return store_mac(xdp, 0);
+}
+
+SEC("xdp")
+int store_mac_2(struct xdp_md *xdp)
+{
+	return store_mac(xdp, 1);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
index 97b26a30b59a..bc2945ed8a80 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -34,6 +34,14 @@ struct {
 	__uint(max_entries, 128);
 } mac_map SEC(".maps");
 
+/* map to store redirect flags for each protocol*/
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, __u16);
+	__type(value, __u64);
+	__uint(max_entries, 16);
+} redirect_flags SEC(".maps");
+
 SEC("xdp")
 int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
 {
@@ -41,25 +49,34 @@ int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
 	void *data = (void *)(long)ctx->data;
 	int if_index = ctx->ingress_ifindex;
 	struct ethhdr *eth = data;
+	__u64 *flags_from_map;
 	__u16 h_proto;
 	__u64 nh_off;
+	__u64 flags;
 
 	nh_off = sizeof(*eth);
 	if (data + nh_off > data_end)
 		return XDP_DROP;
 
-	h_proto = eth->h_proto;
-
-	/* Using IPv4 for (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) testing */
-	if (h_proto == bpf_htons(ETH_P_IP))
-		return bpf_redirect_map(&map_all, 0,
-					BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
-	/* Using IPv6 for none flag testing */
-	else if (h_proto == bpf_htons(ETH_P_IPV6))
-		return bpf_redirect_map(&map_all, if_index, 0);
-	/* All others for BPF_F_BROADCAST testing */
-	else
-		return bpf_redirect_map(&map_all, 0, BPF_F_BROADCAST);
+	h_proto = bpf_htons(eth->h_proto);
+
+	flags_from_map = bpf_map_lookup_elem(&redirect_flags, &h_proto);
+
+	/* Default flags for IPv4 : (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) */
+	if (h_proto == ETH_P_IP) {
+		flags = flags_from_map ? *flags_from_map : BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS;
+		return bpf_redirect_map(&map_all, 0, flags);
+	}
+	/* Default flags for IPv6 : 0 */
+	if (h_proto == ETH_P_IPV6) {
+		flags = flags_from_map ? *flags_from_map : 0;
+		return bpf_redirect_map(&map_all, if_index, flags);
+	}
+	/* Default flags for others BPF_F_BROADCAST : 0 */
+	else {
+		flags = flags_from_map ? *flags_from_map : BPF_F_BROADCAST;
+		return bpf_redirect_map(&map_all, 0, flags);
+	}
 }
 
 /* The following 2 progs are for 2nd devmap prog testing */
diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
index ccde6a4c6319..683306db8594 100644
--- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
+++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
@@ -4,6 +4,8 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/errno.h>
 #include "xsk_xdp_common.h"
 
 struct {
@@ -14,6 +16,7 @@ struct {
 } xsk SEC(".maps");
 
 static unsigned int idx;
+int adjust_value = 0;
 int count = 0;
 
 SEC("xdp.frags") int xsk_def_prog(struct xdp_md *xdp)
@@ -70,4 +73,51 @@ SEC("xdp") int xsk_xdp_shared_umem(struct xdp_md *xdp)
 	return bpf_redirect_map(&xsk, idx, XDP_DROP);
 }
 
+SEC("xdp.frags") int xsk_xdp_adjust_tail(struct xdp_md *xdp)
+{
+	__u32 buff_len, curr_buff_len;
+	int ret;
+
+	buff_len = bpf_xdp_get_buff_len(xdp);
+	if (buff_len == 0)
+		return XDP_DROP;
+
+	ret = bpf_xdp_adjust_tail(xdp, adjust_value);
+	if (ret < 0) {
+		/* Handle unsupported cases */
+		if (ret == -EOPNOTSUPP) {
+			/* Set adjust_value to -EOPNOTSUPP to indicate to userspace that this case
+			 * is unsupported
+			 */
+			adjust_value = -EOPNOTSUPP;
+			return bpf_redirect_map(&xsk, 0, XDP_DROP);
+		}
+
+		return XDP_DROP;
+	}
+
+	curr_buff_len = bpf_xdp_get_buff_len(xdp);
+	if (curr_buff_len != buff_len + adjust_value)
+		return XDP_DROP;
+
+	if (curr_buff_len > buff_len) {
+		__u32 *pkt_data = (void *)(long)xdp->data;
+		__u32 len, words_to_end, seq_num;
+
+		len = curr_buff_len - PKT_HDR_ALIGN;
+		words_to_end = len / sizeof(*pkt_data) - 1;
+		seq_num = words_to_end;
+
+		/* Convert sequence number to network byte order. Store this in the last 4 bytes of
+		 * the packet. Use 'adjust_value' to determine the position at the end of the
+		 * packet for storing the sequence number.
+		 */
+		seq_num = __constant_htonl(words_to_end);
+		bpf_xdp_store_bytes(xdp, curr_buff_len - sizeof(seq_num), &seq_num,
+				    sizeof(seq_num));
+	}
+
+	return bpf_redirect_map(&xsk, 0, XDP_DROP);
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h
index ca0162b4dc57..1fcfa5160231 100644
--- a/tools/testing/selftests/bpf/sdt.h
+++ b/tools/testing/selftests/bpf/sdt.h
@@ -102,6 +102,8 @@
 # define STAP_SDT_ARG_CONSTRAINT        nZr
 # elif defined __arm__
 # define STAP_SDT_ARG_CONSTRAINT        g
+# elif defined __loongarch__
+# define STAP_SDT_ARG_CONSTRAINT        nmr
 # else
 # define STAP_SDT_ARG_CONSTRAINT        nor
 # endif
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index 1453a53ed547..b03a87571592 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -90,10 +90,6 @@ echo -e "... through kbuild\n"
 
 if [ -f ".config" ] ; then
 	make_and_clean tools/bpf
-	## "make tools/bpf" sets $(OUTPUT) to ...tools/bpf/runqslower for
-	## runqslower, but the default (used for the "clean" target) is .output.
-	## Let's make sure we clean runqslower's directory properly.
-	make -C tools/bpf/runqslower OUTPUT=${KDIR_ROOT_DIR}/tools/bpf/runqslower/ clean
 
 	## $OUTPUT is overwritten in kbuild Makefile, and thus cannot be passed
 	## down from toplevel Makefile to bpftool's Makefile.
diff --git a/tools/testing/selftests/bpf/test_bpftool_map.sh b/tools/testing/selftests/bpf/test_bpftool_map.sh
new file mode 100755
index 000000000000..515b1df0501e
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_map.sh
@@ -0,0 +1,398 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TESTNAME="bpftool_map"
+BPF_FILE="security_bpf_map.bpf.o"
+BPF_ITER_FILE="bpf_iter_map_elem.bpf.o"
+PROTECTED_MAP_NAME="prot_map"
+NOT_PROTECTED_MAP_NAME="not_prot_map"
+BPF_FS_TMP_PARENT="/tmp"
+BPF_FS_PARENT=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
+BPF_FS_PARENT=${BPF_FS_PARENT:-$BPF_FS_TMP_PARENT}
+# bpftool will mount bpf file system under BPF_DIR if it is not mounted
+# under BPF_FS_PARENT.
+BPF_DIR="$BPF_FS_PARENT/test_$TESTNAME"
+SCRIPT_DIR=$(dirname $(realpath "$0"))
+BPF_FILE_PATH="$SCRIPT_DIR/$BPF_FILE"
+BPF_ITER_FILE_PATH="$SCRIPT_DIR/$BPF_ITER_FILE"
+BPFTOOL_PATH="bpftool"
+# Assume the script is located under tools/testing/selftests/bpf/
+KDIR_ROOT_DIR=$(realpath "$SCRIPT_DIR"/../../../../)
+
+_cleanup()
+{
+	set +eu
+
+	# If BPF_DIR is a mount point this will not remove the mount point itself.
+	[ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2> /dev/null
+
+	# Unmount if BPF filesystem was temporarily created.
+	if [ "$BPF_FS_PARENT" = "$BPF_FS_TMP_PARENT" ]; then
+		# A loop and recursive unmount are required as bpftool might
+		# create multiple mounts. For example, a bind mount of the directory
+		# to itself. The bind mount is created to change mount propagation
+		# flags on an actual mount point.
+		max_attempts=3
+		attempt=0
+		while mountpoint -q "$BPF_DIR" && [ $attempt -lt $max_attempts ]; do
+			umount -R "$BPF_DIR" 2>/dev/null
+			attempt=$((attempt+1))
+		done
+
+		# The directory still exists. Remove it now.
+		[ -d "$BPF_DIR" ] && rm -rf "$BPF_DIR" 2>/dev/null
+	fi
+}
+
+cleanup_skip()
+{
+	echo "selftests: $TESTNAME [SKIP]"
+	_cleanup
+
+	exit $ksft_skip
+}
+
+cleanup()
+{
+	if [ "$?" = 0 ]; then
+		echo "selftests: $TESTNAME [PASS]"
+	else
+		echo "selftests: $TESTNAME [FAILED]"
+	fi
+	_cleanup
+}
+
+check_root_privileges() {
+	if [ $(id -u) -ne 0 ]; then
+		echo "Need root privileges"
+		exit $ksft_skip
+	fi
+}
+
+# Function to verify bpftool path.
+# Parameters:
+#   $1: bpftool path
+verify_bpftool_path() {
+	local bpftool_path="$1"
+	if ! "$bpftool_path" version > /dev/null 2>&1; then
+		echo "Could not run test without bpftool"
+		exit $ksft_skip
+	fi
+}
+
+# Function to verify BTF support.
+# The test requires BTF support for fmod_ret programs.
+verify_btf_support() {
+	if [ ! -f /sys/kernel/btf/vmlinux ]; then
+		echo "Could not run test without BTF support"
+		exit $ksft_skip
+	fi
+}
+
+# Function to initialize map entries with keys [0..2] and values set to 0.
+# Parameters:
+#  $1: Map name
+#  $2: bpftool path
+initialize_map_entries() {
+	local map_name="$1"
+	local bpftool_path="$2"
+
+	for key in 0 1 2; do
+		"$bpftool_path" map update name "$map_name" key $key 0 0 0 value 0 0 0 $key
+	done
+}
+
+# Test read access to the map.
+# Parameters:
+#   $1: Name command (name/pinned)
+#   $2: Map name
+#   $3: bpftool path
+#   $4: key
+access_for_read() {
+	local name_cmd="$1"
+	local map_name="$2"
+	local bpftool_path="$3"
+	local key="$4"
+
+	# Test read access to the map.
+	if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
+		echo " Read access to $key in $map_name failed"
+		exit 1
+	fi
+
+	# Test read access to map's BTF data.
+	if ! "$bpftool_path" btf dump map "$name_cmd" "$map_name" 1>/dev/null; then
+		echo " Read access to $map_name for BTF data failed"
+		exit 1
+	fi
+}
+
+# Test write access to the map.
+# Parameters:
+#   $1: Name command (name/pinned)
+#   $2: Map name
+#   $3: bpftool path
+#   $4: key
+#   $5: Whether write should succeed (true/false)
+access_for_write() {
+	local name_cmd="$1"
+	local map_name="$2"
+	local bpftool_path="$3"
+	local key="$4"
+	local write_should_succeed="$5"
+	local value="1 1 1 1"
+
+	if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \
+			$value 2>/dev/null; then
+		if [ "$write_should_succeed" = "false" ]; then
+			echo " Write access to $key in $map_name succeeded but should have failed"
+			exit 1
+		fi
+	else
+		if [ "$write_should_succeed" = "true" ]; then
+			echo " Write access to $key in $map_name failed but should have succeeded"
+			exit 1
+		fi
+	fi
+}
+
+# Test entry deletion for the map.
+# Parameters:
+#   $1: Name command (name/pinned)
+#   $2: Map name
+#   $3: bpftool path
+#   $4: key
+#   $5: Whether write should succeed (true/false)
+access_for_deletion() {
+	local name_cmd="$1"
+	local map_name="$2"
+	local bpftool_path="$3"
+	local key="$4"
+	local write_should_succeed="$5"
+	local value="1 1 1 1"
+
+	# Test deletion by key for the map.
+	# Before deleting, check the key exists.
+	if ! "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
+		echo " Key $key does not exist in $map_name"
+		exit 1
+	fi
+
+	# Delete by key.
+	if "$bpftool_path" map delete "$name_cmd" "$map_name" key $key 2>/dev/null; then
+		if [ "$write_should_succeed" = "false" ]; then
+			echo " Deletion for $key in $map_name succeeded but should have failed"
+			exit 1
+		fi
+	else
+		if [ "$write_should_succeed" = "true" ]; then
+			echo " Deletion for $key in $map_name failed but should have succeeded"
+			exit 1
+		fi
+	fi
+
+	# After deleting, check the entry existence according to the expected status.
+	if "$bpftool_path" map lookup "$name_cmd" "$map_name" key $key 1>/dev/null; then
+		if [ "$write_should_succeed" = "true" ]; then
+			echo " Key $key for $map_name was not deleted but should have been deleted"
+			exit 1
+		fi
+	else
+		if [ "$write_should_succeed" = "false" ]; then
+			echo "Key $key for $map_name was deleted but should have not been deleted"
+			exit 1
+		fi
+	fi
+
+	# Test creation of map's deleted entry, if deletion was successful.
+	# Otherwise, the entry exists.
+	if "$bpftool_path" map update "$name_cmd" "$map_name" key $key value \
+				$value 2>/dev/null; then
+		if [ "$write_should_succeed" = "false" ]; then
+			echo " Write access to $key in $map_name succeeded after deletion attempt but should have failed"
+			exit 1
+		fi
+	else
+		if [ "$write_should_succeed" = "true" ]; then
+			echo " Write access to $key in $map_name failed after deletion attempt but should have succeeded"
+			exit 1
+		fi
+	fi
+}
+
+# Test map elements iterator.
+# Parameters:
+#   $1: Name command (name/pinned)
+#   $2: Map name
+#   $3: bpftool path
+#   $4: BPF_DIR
+#   $5: bpf iterator object file path
+iterate_map_elem() {
+	local name_cmd="$1"
+	local map_name="$2"
+	local bpftool_path="$3"
+	local bpf_dir="$4"
+	local bpf_file="$5"
+	local pin_path="$bpf_dir/map_iterator"
+
+	"$bpftool_path" iter pin "$bpf_file" "$pin_path" map "$name_cmd" "$map_name"
+	if [ ! -f "$pin_path" ]; then
+		echo " Failed to pin iterator to $pin_path"
+		exit 1
+	fi
+
+	cat "$pin_path" 1>/dev/null
+	rm "$pin_path" 2>/dev/null
+}
+
+# Function to test map access with configurable write expectations
+# Parameters:
+#   $1: Name command (name/pinned)
+#   $2: Map name
+#   $3: bpftool path
+#   $4: key for rw
+#   $5: key to delete
+#   $6: Whether write should succeed (true/false)
+#   $7: BPF_DIR
+#   $8: bpf iterator object file path
+access_map() {
+	local name_cmd="$1"
+	local map_name="$2"
+	local bpftool_path="$3"
+	local key_for_rw="$4"
+	local key_to_del="$5"
+	local write_should_succeed="$6"
+	local bpf_dir="$7"
+	local bpf_iter_file_path="$8"
+
+	access_for_read "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw"
+	access_for_write "$name_cmd" "$map_name" "$bpftool_path" "$key_for_rw" \
+		"$write_should_succeed"
+	access_for_deletion "$name_cmd" "$map_name" "$bpftool_path" "$key_to_del" \
+		"$write_should_succeed"
+	iterate_map_elem "$name_cmd" "$map_name" "$bpftool_path" "$bpf_dir" \
+		"$bpf_iter_file_path"
+}
+
+# Function to test map access with configurable write expectations
+# Parameters:
+#   $1: Map name
+#   $2: bpftool path
+#   $3: BPF_DIR
+#   $4: Whether write should succeed (true/false)
+#   $5: bpf iterator object file path
+test_map_access() {
+	local map_name="$1"
+	local bpftool_path="$2"
+	local bpf_dir="$3"
+	local pin_path="$bpf_dir/${map_name}_pinned"
+	local write_should_succeed="$4"
+	local bpf_iter_file_path="$5"
+
+	# Test access to the map by name.
+	access_map "name" "$map_name" "$bpftool_path" "0 0 0 0" "1 0 0 0" \
+		"$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path"
+
+	# Pin the map to the BPF filesystem
+	"$bpftool_path" map pin name "$map_name" "$pin_path"
+	if [ ! -e "$pin_path" ]; then
+		echo " Failed to pin $map_name"
+		exit 1
+	fi
+
+	# Test access to the pinned map.
+	access_map "pinned" "$pin_path" "$bpftool_path" "0 0 0 0" "2 0 0 0" \
+		"$write_should_succeed" "$bpf_dir" "$bpf_iter_file_path"
+}
+
+# Function to test map creation and map-of-maps
+# Parameters:
+#   $1: bpftool path
+#   $2: BPF_DIR
+test_map_creation_and_map_of_maps() {
+	local bpftool_path="$1"
+	local bpf_dir="$2"
+	local outer_map_name="outer_map_tt"
+	local inner_map_name="inner_map_tt"
+
+	"$bpftool_path" map create "$bpf_dir/$inner_map_name" type array key 4 \
+		value 4 entries 4 name "$inner_map_name"
+	if [ ! -f "$bpf_dir/$inner_map_name" ]; then
+		echo " Failed to create inner map file at $bpf_dir/$outer_map_name"
+		return 1
+	fi
+
+	"$bpftool_path" map create "$bpf_dir/$outer_map_name" type hash_of_maps \
+		key 4 value 4 entries 2 name "$outer_map_name" inner_map name "$inner_map_name"
+	if [ ! -f "$bpf_dir/$outer_map_name" ]; then
+		echo " Failed to create outer map file at $bpf_dir/$outer_map_name"
+		return 1
+	fi
+
+	# Add entries to the outer map by name and by pinned path.
+	"$bpftool_path" map update pinned "$bpf_dir/$outer_map_name" key 0 0 0 0 \
+		value pinned "$bpf_dir/$inner_map_name"
+	"$bpftool_path" map update name "$outer_map_name" key 1 0 0 0 value \
+		name "$inner_map_name"
+
+	# The outer map should be full by now.
+	# The following map update command is expected to fail.
+	if "$bpftool_path" map update name "$outer_map_name" key 2 0 0 0 value name \
+		"$inner_map_name" 2>/dev/null; then
+		echo " Update for $outer_map_name succeeded but should have failed"
+		exit 1
+	fi
+}
+
+# Function to test map access with the btf list command
+# Parameters:
+#   $1: bpftool path
+test_map_access_with_btf_list() {
+	local bpftool_path="$1"
+
+	# The btf list command iterates over maps for
+	# loaded BPF programs.
+	if ! "$bpftool_path" btf list 1>/dev/null; then
+		echo " Failed to access btf data"
+		exit 1
+	fi
+}
+
+set -eu
+
+trap cleanup_skip EXIT
+
+check_root_privileges
+
+verify_bpftool_path "$BPFTOOL_PATH"
+
+verify_btf_support
+
+trap cleanup EXIT
+
+# Load and attach the BPF programs to control maps access.
+"$BPFTOOL_PATH" prog loadall "$BPF_FILE_PATH" "$BPF_DIR" autoattach
+
+initialize_map_entries "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH"
+initialize_map_entries "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH"
+
+# Activate the map protection mechanism. Protection status is controlled
+# by a value stored in the prot_status_map at index 0.
+"$BPFTOOL_PATH" map update name prot_status_map key 0 0 0 0 value 1 0 0 0
+
+# Test protected map (write should fail).
+test_map_access "$PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "false" \
+ "$BPF_ITER_FILE_PATH"
+
+# Test not protected map (write should succeed).
+test_map_access "$NOT_PROTECTED_MAP_NAME" "$BPFTOOL_PATH" "$BPF_DIR" "true" \
+ "$BPF_ITER_FILE_PATH"
+
+test_map_creation_and_map_of_maps "$BPFTOOL_PATH" "$BPF_DIR"
+
+test_map_access_with_btf_list "$BPFTOOL_PATH"
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
index 0ed67b6b31dd..238121fda5b6 100755
--- a/tools/testing/selftests/bpf/test_bpftool_synctypes.py
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -66,7 +66,7 @@ class ArrayParser(BlockParser):
 
     def __init__(self, reader, array_name):
         self.array_name = array_name
-        self.start_marker = re.compile(f'(static )?const bool {self.array_name}\[.*\] = {{\n')
+        self.start_marker = re.compile(fr'(static )?const bool {self.array_name}\[.*\] = {{\n')
         super().__init__(reader)
 
     def search_block(self):
@@ -80,7 +80,7 @@ class ArrayParser(BlockParser):
         Parse a block and return data as a dictionary. Items to extract must be
         on separate lines in the file.
         """
-        pattern = re.compile('\[(BPF_\w*)\]\s*= (true|false),?$')
+        pattern = re.compile(r'\[(BPF_\w*)\]\s*= (true|false),?$')
         entries = set()
         while True:
             line = self.reader.readline()
@@ -178,7 +178,7 @@ class FileExtractor(object):
         @enum_name: name of the enum to parse
         """
         start_marker = re.compile(f'enum {enum_name} {{\n')
-        pattern = re.compile('^\s*(BPF_\w+),?(\s+/\*.*\*/)?$')
+        pattern = re.compile(r'^\s*(BPF_\w+),?(\s+/\*.*\*/)?$')
         end_marker = re.compile('^};')
         parser = BlockParser(self.reader)
         parser.search_block(start_marker)
@@ -226,8 +226,8 @@ class FileExtractor(object):
 
         @block_name: name of the blog to parse, 'TYPE' in the example
         """
-        start_marker = re.compile(f'\*{block_name}\* := {{')
-        pattern = re.compile('\*\*([\w/-]+)\*\*')
+        start_marker = re.compile(fr'\*{block_name}\* := {{')
+        pattern = re.compile(r'\*\*([\w/-]+)\*\*')
         end_marker = re.compile('}\n')
         return self.__get_description_list(start_marker, pattern, end_marker)
 
@@ -245,8 +245,8 @@ class FileExtractor(object):
 
         @block_name: name of the blog to parse, 'TYPE' in the example
         """
-        start_marker = re.compile(f'"\s*{block_name} := {{')
-        pattern = re.compile('([\w/]+) [|}]')
+        start_marker = re.compile(fr'"\s*{block_name} := {{')
+        pattern = re.compile(r'([\w/]+) [|}]')
         end_marker = re.compile('}')
         return self.__get_description_list(start_marker, pattern, end_marker)
 
@@ -264,8 +264,8 @@ class FileExtractor(object):
 
         @macro: macro starting the block, 'HELP_SPEC_OPTIONS' in the example
         """
-        start_marker = re.compile(f'"\s*{macro}\s*" [|}}]')
-        pattern = re.compile('([\w-]+) ?(?:\||}[ }\]])')
+        start_marker = re.compile(fr'"\s*{macro}\s*" [|}}]')
+        pattern = re.compile(r'([\w-]+) ?(?:\||}[ }\]])')
         end_marker = re.compile('}\\\\n')
         return self.__get_description_list(start_marker, pattern, end_marker)
 
@@ -283,8 +283,8 @@ class FileExtractor(object):
 
         @block_name: name of the blog to parse, 'TYPE' in the example
         """
-        start_marker = re.compile(f'local {block_name}=\'')
-        pattern = re.compile('(?:.*=\')?([\w/]+)')
+        start_marker = re.compile(fr'local {block_name}=\'')
+        pattern = re.compile(r'(?:.*=\')?([\w/]+)')
         end_marker = re.compile('\'$')
         return self.__get_description_list(start_marker, pattern, end_marker)
 
@@ -316,7 +316,7 @@ class MainHeaderFileExtractor(SourceFileExtractor):
             {'-p', '-d', '--pretty', '--debug', '--json', '-j'}
         """
         start_marker = re.compile(f'"OPTIONS :=')
-        pattern = re.compile('([\w-]+) ?(?:\||}[ }\]"])')
+        pattern = re.compile(r'([\w-]+) ?(?:\||}[ }\]"])')
         end_marker = re.compile('#define')
 
         parser = InlineListParser(self.reader)
@@ -338,8 +338,8 @@ class ManSubstitutionsExtractor(SourceFileExtractor):
 
             {'-p', '-d', '--pretty', '--debug', '--json', '-j'}
         """
-        start_marker = re.compile('\|COMMON_OPTIONS\| replace:: {')
-        pattern = re.compile('\*\*([\w/-]+)\*\*')
+        start_marker = re.compile(r'\|COMMON_OPTIONS\| replace:: {')
+        pattern = re.compile(r'\*\*([\w/-]+)\*\*')
         end_marker = re.compile('}$')
 
         parser = InlineListParser(self.reader)
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index fb4f4714eeb4..e65889ab4adf 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -72,9 +72,15 @@
 #define BTF_TYPE_FLOAT_ENC(name, sz) \
 	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
 
+#define BTF_DECL_ATTR_ENC(value, type, component_idx)	\
+	BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), type), (component_idx)
+
 #define BTF_DECL_TAG_ENC(value, type, component_idx)	\
 	BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx)
 
+#define BTF_TYPE_ATTR_ENC(value, type)	\
+	BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 1, 0), type)
+
 #define BTF_TYPE_TAG_ENC(value, type)	\
 	BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type)
 
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c
deleted file mode 100644
index 571cc076dd7d..000000000000
--- a/tools/testing/selftests/bpf/test_flow_dissector.c
+++ /dev/null
@@ -1,780 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Inject packets with all sorts of encapsulation into the kernel.
- *
- * IPv4/IPv6	outer layer 3
- * GRE/GUE/BARE outer layer 4, where bare is IPIP/SIT/IPv4-in-IPv6/..
- * IPv4/IPv6    inner layer 3
- */
-
-#define _GNU_SOURCE
-
-#include <stddef.h>
-#include <arpa/inet.h>
-#include <asm/byteorder.h>
-#include <error.h>
-#include <errno.h>
-#include <linux/if_packet.h>
-#include <linux/if_ether.h>
-#include <linux/ipv6.h>
-#include <netinet/ip.h>
-#include <netinet/in.h>
-#include <netinet/udp.h>
-#include <poll.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/stat.h>
-#include <sys/time.h>
-#include <sys/types.h>
-#include <unistd.h>
-
-#define CFG_PORT_INNER	8000
-
-/* Add some protocol definitions that do not exist in userspace */
-
-struct grehdr {
-	uint16_t unused;
-	uint16_t protocol;
-} __attribute__((packed));
-
-struct guehdr {
-	union {
-		struct {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-			__u8	hlen:5,
-				control:1,
-				version:2;
-#elif defined (__BIG_ENDIAN_BITFIELD)
-			__u8	version:2,
-				control:1,
-				hlen:5;
-#else
-#error  "Please fix <asm/byteorder.h>"
-#endif
-			__u8	proto_ctype;
-			__be16	flags;
-		};
-		__be32	word;
-	};
-};
-
-static uint8_t	cfg_dsfield_inner;
-static uint8_t	cfg_dsfield_outer;
-static uint8_t	cfg_encap_proto;
-static bool	cfg_expect_failure = false;
-static int	cfg_l3_extra = AF_UNSPEC;	/* optional SIT prefix */
-static int	cfg_l3_inner = AF_UNSPEC;
-static int	cfg_l3_outer = AF_UNSPEC;
-static int	cfg_num_pkt = 10;
-static int	cfg_num_secs = 0;
-static char	cfg_payload_char = 'a';
-static int	cfg_payload_len = 100;
-static int	cfg_port_gue = 6080;
-static bool	cfg_only_rx;
-static bool	cfg_only_tx;
-static int	cfg_src_port = 9;
-
-static char	buf[ETH_DATA_LEN];
-
-#define INIT_ADDR4(name, addr4, port)				\
-	static struct sockaddr_in name = {			\
-		.sin_family = AF_INET,				\
-		.sin_port = __constant_htons(port),		\
-		.sin_addr.s_addr = __constant_htonl(addr4),	\
-	};
-
-#define INIT_ADDR6(name, addr6, port)				\
-	static struct sockaddr_in6 name = {			\
-		.sin6_family = AF_INET6,			\
-		.sin6_port = __constant_htons(port),		\
-		.sin6_addr = addr6,				\
-	};
-
-INIT_ADDR4(in_daddr4, INADDR_LOOPBACK, CFG_PORT_INNER)
-INIT_ADDR4(in_saddr4, INADDR_LOOPBACK + 2, 0)
-INIT_ADDR4(out_daddr4, INADDR_LOOPBACK, 0)
-INIT_ADDR4(out_saddr4, INADDR_LOOPBACK + 1, 0)
-INIT_ADDR4(extra_daddr4, INADDR_LOOPBACK, 0)
-INIT_ADDR4(extra_saddr4, INADDR_LOOPBACK + 1, 0)
-
-INIT_ADDR6(in_daddr6, IN6ADDR_LOOPBACK_INIT, CFG_PORT_INNER)
-INIT_ADDR6(in_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
-INIT_ADDR6(out_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
-INIT_ADDR6(out_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
-INIT_ADDR6(extra_daddr6, IN6ADDR_LOOPBACK_INIT, 0)
-INIT_ADDR6(extra_saddr6, IN6ADDR_LOOPBACK_INIT, 0)
-
-static unsigned long util_gettime(void)
-{
-	struct timeval tv;
-
-	gettimeofday(&tv, NULL);
-	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
-}
-
-static void util_printaddr(const char *msg, struct sockaddr *addr)
-{
-	unsigned long off = 0;
-	char nbuf[INET6_ADDRSTRLEN];
-
-	switch (addr->sa_family) {
-	case PF_INET:
-		off = __builtin_offsetof(struct sockaddr_in, sin_addr);
-		break;
-	case PF_INET6:
-		off = __builtin_offsetof(struct sockaddr_in6, sin6_addr);
-		break;
-	default:
-		error(1, 0, "printaddr: unsupported family %u\n",
-		      addr->sa_family);
-	}
-
-	if (!inet_ntop(addr->sa_family, ((void *) addr) + off, nbuf,
-		       sizeof(nbuf)))
-		error(1, errno, "inet_ntop");
-
-	fprintf(stderr, "%s: %s\n", msg, nbuf);
-}
-
-static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
-{
-	unsigned long sum = 0;
-	int i;
-
-	for (i = 0; i < num_u16; i++)
-		sum += start[i];
-
-	return sum;
-}
-
-static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
-			      unsigned long sum)
-{
-	sum += add_csum_hword(start, num_u16);
-
-	while (sum >> 16)
-		sum = (sum & 0xffff) + (sum >> 16);
-
-	return ~sum;
-}
-
-static void build_ipv4_header(void *header, uint8_t proto,
-			      uint32_t src, uint32_t dst,
-			      int payload_len, uint8_t tos)
-{
-	struct iphdr *iph = header;
-
-	iph->ihl = 5;
-	iph->version = 4;
-	iph->tos = tos;
-	iph->ttl = 8;
-	iph->tot_len = htons(sizeof(*iph) + payload_len);
-	iph->id = htons(1337);
-	iph->protocol = proto;
-	iph->saddr = src;
-	iph->daddr = dst;
-	iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
-}
-
-static void ipv6_set_dsfield(struct ipv6hdr *ip6h, uint8_t dsfield)
-{
-	uint16_t val, *ptr = (uint16_t *)ip6h;
-
-	val = ntohs(*ptr);
-	val &= 0xF00F;
-	val |= ((uint16_t) dsfield) << 4;
-	*ptr = htons(val);
-}
-
-static void build_ipv6_header(void *header, uint8_t proto,
-			      struct sockaddr_in6 *src,
-			      struct sockaddr_in6 *dst,
-			      int payload_len, uint8_t dsfield)
-{
-	struct ipv6hdr *ip6h = header;
-
-	ip6h->version = 6;
-	ip6h->payload_len = htons(payload_len);
-	ip6h->nexthdr = proto;
-	ip6h->hop_limit = 8;
-	ipv6_set_dsfield(ip6h, dsfield);
-
-	memcpy(&ip6h->saddr, &src->sin6_addr, sizeof(ip6h->saddr));
-	memcpy(&ip6h->daddr, &dst->sin6_addr, sizeof(ip6h->daddr));
-}
-
-static uint16_t build_udp_v4_csum(const struct iphdr *iph,
-				  const struct udphdr *udph,
-				  int num_words)
-{
-	unsigned long pseudo_sum;
-	int num_u16 = sizeof(iph->saddr);	/* halfwords: twice byte len */
-
-	pseudo_sum = add_csum_hword((void *) &iph->saddr, num_u16);
-	pseudo_sum += htons(IPPROTO_UDP);
-	pseudo_sum += udph->len;
-	return build_ip_csum((void *) udph, num_words, pseudo_sum);
-}
-
-static uint16_t build_udp_v6_csum(const struct ipv6hdr *ip6h,
-				  const struct udphdr *udph,
-				  int num_words)
-{
-	unsigned long pseudo_sum;
-	int num_u16 = sizeof(ip6h->saddr);	/* halfwords: twice byte len */
-
-	pseudo_sum = add_csum_hword((void *) &ip6h->saddr, num_u16);
-	pseudo_sum += htons(ip6h->nexthdr);
-	pseudo_sum += ip6h->payload_len;
-	return build_ip_csum((void *) udph, num_words, pseudo_sum);
-}
-
-static void build_udp_header(void *header, int payload_len,
-			     uint16_t dport, int family)
-{
-	struct udphdr *udph = header;
-	int len = sizeof(*udph) + payload_len;
-
-	udph->source = htons(cfg_src_port);
-	udph->dest = htons(dport);
-	udph->len = htons(len);
-	udph->check = 0;
-	if (family == AF_INET)
-		udph->check = build_udp_v4_csum(header - sizeof(struct iphdr),
-						udph, len >> 1);
-	else
-		udph->check = build_udp_v6_csum(header - sizeof(struct ipv6hdr),
-						udph, len >> 1);
-}
-
-static void build_gue_header(void *header, uint8_t proto)
-{
-	struct guehdr *gueh = header;
-
-	gueh->proto_ctype = proto;
-}
-
-static void build_gre_header(void *header, uint16_t proto)
-{
-	struct grehdr *greh = header;
-
-	greh->protocol = htons(proto);
-}
-
-static int l3_length(int family)
-{
-	if (family == AF_INET)
-		return sizeof(struct iphdr);
-	else
-		return sizeof(struct ipv6hdr);
-}
-
-static int build_packet(void)
-{
-	int ol3_len = 0, ol4_len = 0, il3_len = 0, il4_len = 0;
-	int el3_len = 0;
-
-	if (cfg_l3_extra)
-		el3_len = l3_length(cfg_l3_extra);
-
-	/* calculate header offsets */
-	if (cfg_encap_proto) {
-		ol3_len = l3_length(cfg_l3_outer);
-
-		if (cfg_encap_proto == IPPROTO_GRE)
-			ol4_len = sizeof(struct grehdr);
-		else if (cfg_encap_proto == IPPROTO_UDP)
-			ol4_len = sizeof(struct udphdr) + sizeof(struct guehdr);
-	}
-
-	il3_len = l3_length(cfg_l3_inner);
-	il4_len = sizeof(struct udphdr);
-
-	if (el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len >=
-	    sizeof(buf))
-		error(1, 0, "packet too large\n");
-
-	/*
-	 * Fill packet from inside out, to calculate correct checksums.
-	 * But create ip before udp headers, as udp uses ip for pseudo-sum.
-	 */
-	memset(buf + el3_len + ol3_len + ol4_len + il3_len + il4_len,
-	       cfg_payload_char, cfg_payload_len);
-
-	/* add zero byte for udp csum padding */
-	buf[el3_len + ol3_len + ol4_len + il3_len + il4_len + cfg_payload_len] = 0;
-
-	switch (cfg_l3_inner) {
-	case PF_INET:
-		build_ipv4_header(buf + el3_len + ol3_len + ol4_len,
-				  IPPROTO_UDP,
-				  in_saddr4.sin_addr.s_addr,
-				  in_daddr4.sin_addr.s_addr,
-				  il4_len + cfg_payload_len,
-				  cfg_dsfield_inner);
-		break;
-	case PF_INET6:
-		build_ipv6_header(buf + el3_len + ol3_len + ol4_len,
-				  IPPROTO_UDP,
-				  &in_saddr6, &in_daddr6,
-				  il4_len + cfg_payload_len,
-				  cfg_dsfield_inner);
-		break;
-	}
-
-	build_udp_header(buf + el3_len + ol3_len + ol4_len + il3_len,
-			 cfg_payload_len, CFG_PORT_INNER, cfg_l3_inner);
-
-	if (!cfg_encap_proto)
-		return il3_len + il4_len + cfg_payload_len;
-
-	switch (cfg_l3_outer) {
-	case PF_INET:
-		build_ipv4_header(buf + el3_len, cfg_encap_proto,
-				  out_saddr4.sin_addr.s_addr,
-				  out_daddr4.sin_addr.s_addr,
-				  ol4_len + il3_len + il4_len + cfg_payload_len,
-				  cfg_dsfield_outer);
-		break;
-	case PF_INET6:
-		build_ipv6_header(buf + el3_len, cfg_encap_proto,
-				  &out_saddr6, &out_daddr6,
-				  ol4_len + il3_len + il4_len + cfg_payload_len,
-				  cfg_dsfield_outer);
-		break;
-	}
-
-	switch (cfg_encap_proto) {
-	case IPPROTO_UDP:
-		build_gue_header(buf + el3_len + ol3_len + ol4_len -
-				 sizeof(struct guehdr),
-				 cfg_l3_inner == PF_INET ? IPPROTO_IPIP
-							 : IPPROTO_IPV6);
-		build_udp_header(buf + el3_len + ol3_len,
-				 sizeof(struct guehdr) + il3_len + il4_len +
-				 cfg_payload_len,
-				 cfg_port_gue, cfg_l3_outer);
-		break;
-	case IPPROTO_GRE:
-		build_gre_header(buf + el3_len + ol3_len,
-				 cfg_l3_inner == PF_INET ? ETH_P_IP
-							 : ETH_P_IPV6);
-		break;
-	}
-
-	switch (cfg_l3_extra) {
-	case PF_INET:
-		build_ipv4_header(buf,
-				  cfg_l3_outer == PF_INET ? IPPROTO_IPIP
-							  : IPPROTO_IPV6,
-				  extra_saddr4.sin_addr.s_addr,
-				  extra_daddr4.sin_addr.s_addr,
-				  ol3_len + ol4_len + il3_len + il4_len +
-				  cfg_payload_len, 0);
-		break;
-	case PF_INET6:
-		build_ipv6_header(buf,
-				  cfg_l3_outer == PF_INET ? IPPROTO_IPIP
-							  : IPPROTO_IPV6,
-				  &extra_saddr6, &extra_daddr6,
-				  ol3_len + ol4_len + il3_len + il4_len +
-				  cfg_payload_len, 0);
-		break;
-	}
-
-	return el3_len + ol3_len + ol4_len + il3_len + il4_len +
-	       cfg_payload_len;
-}
-
-/* sender transmits encapsulated over RAW or unencap'd over UDP */
-static int setup_tx(void)
-{
-	int family, fd, ret;
-
-	if (cfg_l3_extra)
-		family = cfg_l3_extra;
-	else if (cfg_l3_outer)
-		family = cfg_l3_outer;
-	else
-		family = cfg_l3_inner;
-
-	fd = socket(family, SOCK_RAW, IPPROTO_RAW);
-	if (fd == -1)
-		error(1, errno, "socket tx");
-
-	if (cfg_l3_extra) {
-		if (cfg_l3_extra == PF_INET)
-			ret = connect(fd, (void *) &extra_daddr4,
-				      sizeof(extra_daddr4));
-		else
-			ret = connect(fd, (void *) &extra_daddr6,
-				      sizeof(extra_daddr6));
-		if (ret)
-			error(1, errno, "connect tx");
-	} else if (cfg_l3_outer) {
-		/* connect to destination if not encapsulated */
-		if (cfg_l3_outer == PF_INET)
-			ret = connect(fd, (void *) &out_daddr4,
-				      sizeof(out_daddr4));
-		else
-			ret = connect(fd, (void *) &out_daddr6,
-				      sizeof(out_daddr6));
-		if (ret)
-			error(1, errno, "connect tx");
-	} else {
-		/* otherwise using loopback */
-		if (cfg_l3_inner == PF_INET)
-			ret = connect(fd, (void *) &in_daddr4,
-				      sizeof(in_daddr4));
-		else
-			ret = connect(fd, (void *) &in_daddr6,
-				      sizeof(in_daddr6));
-		if (ret)
-			error(1, errno, "connect tx");
-	}
-
-	return fd;
-}
-
-/* receiver reads unencapsulated UDP */
-static int setup_rx(void)
-{
-	int fd, ret;
-
-	fd = socket(cfg_l3_inner, SOCK_DGRAM, 0);
-	if (fd == -1)
-		error(1, errno, "socket rx");
-
-	if (cfg_l3_inner == PF_INET)
-		ret = bind(fd, (void *) &in_daddr4, sizeof(in_daddr4));
-	else
-		ret = bind(fd, (void *) &in_daddr6, sizeof(in_daddr6));
-	if (ret)
-		error(1, errno, "bind rx");
-
-	return fd;
-}
-
-static int do_tx(int fd, const char *pkt, int len)
-{
-	int ret;
-
-	ret = write(fd, pkt, len);
-	if (ret == -1)
-		error(1, errno, "send");
-	if (ret != len)
-		error(1, errno, "send: len (%d < %d)\n", ret, len);
-
-	return 1;
-}
-
-static int do_poll(int fd, short events, int timeout)
-{
-	struct pollfd pfd;
-	int ret;
-
-	pfd.fd = fd;
-	pfd.events = events;
-
-	ret = poll(&pfd, 1, timeout);
-	if (ret == -1)
-		error(1, errno, "poll");
-	if (ret && !(pfd.revents & POLLIN))
-		error(1, errno, "poll: unexpected event 0x%x\n", pfd.revents);
-
-	return ret;
-}
-
-static int do_rx(int fd)
-{
-	char rbuf;
-	int ret, num = 0;
-
-	while (1) {
-		ret = recv(fd, &rbuf, 1, MSG_DONTWAIT);
-		if (ret == -1 && errno == EAGAIN)
-			break;
-		if (ret == -1)
-			error(1, errno, "recv");
-		if (rbuf != cfg_payload_char)
-			error(1, 0, "recv: payload mismatch");
-		num++;
-	}
-
-	return num;
-}
-
-static int do_main(void)
-{
-	unsigned long tstop, treport, tcur;
-	int fdt = -1, fdr = -1, len, tx = 0, rx = 0;
-
-	if (!cfg_only_tx)
-		fdr = setup_rx();
-	if (!cfg_only_rx)
-		fdt = setup_tx();
-
-	len = build_packet();
-
-	tcur = util_gettime();
-	treport = tcur + 1000;
-	tstop = tcur + (cfg_num_secs * 1000);
-
-	while (1) {
-		if (!cfg_only_rx)
-			tx += do_tx(fdt, buf, len);
-
-		if (!cfg_only_tx)
-			rx += do_rx(fdr);
-
-		if (cfg_num_secs) {
-			tcur = util_gettime();
-			if (tcur >= tstop)
-				break;
-			if (tcur >= treport) {
-				fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
-				tx = 0;
-				rx = 0;
-				treport = tcur + 1000;
-			}
-		} else {
-			if (tx == cfg_num_pkt)
-				break;
-		}
-	}
-
-	/* read straggler packets, if any */
-	if (rx < tx) {
-		tstop = util_gettime() + 100;
-		while (rx < tx) {
-			tcur = util_gettime();
-			if (tcur >= tstop)
-				break;
-
-			do_poll(fdr, POLLIN, tstop - tcur);
-			rx += do_rx(fdr);
-		}
-	}
-
-	fprintf(stderr, "pkts: tx=%u rx=%u\n", tx, rx);
-
-	if (fdr != -1 && close(fdr))
-		error(1, errno, "close rx");
-	if (fdt != -1 && close(fdt))
-		error(1, errno, "close tx");
-
-	/*
-	 * success (== 0) only if received all packets
-	 * unless failure is expected, in which case none must arrive.
-	 */
-	if (cfg_expect_failure)
-		return rx != 0;
-	else
-		return rx != tx;
-}
-
-
-static void __attribute__((noreturn)) usage(const char *filepath)
-{
-	fprintf(stderr, "Usage: %s [-e gre|gue|bare|none] [-i 4|6] [-l len] "
-			"[-O 4|6] [-o 4|6] [-n num] [-t secs] [-R] [-T] "
-			"[-s <osrc> [-d <odst>] [-S <isrc>] [-D <idst>] "
-			"[-x <otos>] [-X <itos>] [-f <isport>] [-F]\n",
-		filepath);
-	exit(1);
-}
-
-static void parse_addr(int family, void *addr, const char *optarg)
-{
-	int ret;
-
-	ret = inet_pton(family, optarg, addr);
-	if (ret == -1)
-		error(1, errno, "inet_pton");
-	if (ret == 0)
-		error(1, 0, "inet_pton: bad string");
-}
-
-static void parse_addr4(struct sockaddr_in *addr, const char *optarg)
-{
-	parse_addr(AF_INET, &addr->sin_addr, optarg);
-}
-
-static void parse_addr6(struct sockaddr_in6 *addr, const char *optarg)
-{
-	parse_addr(AF_INET6, &addr->sin6_addr, optarg);
-}
-
-static int parse_protocol_family(const char *filepath, const char *optarg)
-{
-	if (!strcmp(optarg, "4"))
-		return PF_INET;
-	if (!strcmp(optarg, "6"))
-		return PF_INET6;
-
-	usage(filepath);
-}
-
-static void parse_opts(int argc, char **argv)
-{
-	int c;
-
-	while ((c = getopt(argc, argv, "d:D:e:f:Fhi:l:n:o:O:Rs:S:t:Tx:X:")) != -1) {
-		switch (c) {
-		case 'd':
-			if (cfg_l3_outer == AF_UNSPEC)
-				error(1, 0, "-d must be preceded by -o");
-			if (cfg_l3_outer == AF_INET)
-				parse_addr4(&out_daddr4, optarg);
-			else
-				parse_addr6(&out_daddr6, optarg);
-			break;
-		case 'D':
-			if (cfg_l3_inner == AF_UNSPEC)
-				error(1, 0, "-D must be preceded by -i");
-			if (cfg_l3_inner == AF_INET)
-				parse_addr4(&in_daddr4, optarg);
-			else
-				parse_addr6(&in_daddr6, optarg);
-			break;
-		case 'e':
-			if (!strcmp(optarg, "gre"))
-				cfg_encap_proto = IPPROTO_GRE;
-			else if (!strcmp(optarg, "gue"))
-				cfg_encap_proto = IPPROTO_UDP;
-			else if (!strcmp(optarg, "bare"))
-				cfg_encap_proto = IPPROTO_IPIP;
-			else if (!strcmp(optarg, "none"))
-				cfg_encap_proto = IPPROTO_IP;	/* == 0 */
-			else
-				usage(argv[0]);
-			break;
-		case 'f':
-			cfg_src_port = strtol(optarg, NULL, 0);
-			break;
-		case 'F':
-			cfg_expect_failure = true;
-			break;
-		case 'h':
-			usage(argv[0]);
-			break;
-		case 'i':
-			if (!strcmp(optarg, "4"))
-				cfg_l3_inner = PF_INET;
-			else if (!strcmp(optarg, "6"))
-				cfg_l3_inner = PF_INET6;
-			else
-				usage(argv[0]);
-			break;
-		case 'l':
-			cfg_payload_len = strtol(optarg, NULL, 0);
-			break;
-		case 'n':
-			cfg_num_pkt = strtol(optarg, NULL, 0);
-			break;
-		case 'o':
-			cfg_l3_outer = parse_protocol_family(argv[0], optarg);
-			break;
-		case 'O':
-			cfg_l3_extra = parse_protocol_family(argv[0], optarg);
-			break;
-		case 'R':
-			cfg_only_rx = true;
-			break;
-		case 's':
-			if (cfg_l3_outer == AF_INET)
-				parse_addr4(&out_saddr4, optarg);
-			else
-				parse_addr6(&out_saddr6, optarg);
-			break;
-		case 'S':
-			if (cfg_l3_inner == AF_INET)
-				parse_addr4(&in_saddr4, optarg);
-			else
-				parse_addr6(&in_saddr6, optarg);
-			break;
-		case 't':
-			cfg_num_secs = strtol(optarg, NULL, 0);
-			break;
-		case 'T':
-			cfg_only_tx = true;
-			break;
-		case 'x':
-			cfg_dsfield_outer = strtol(optarg, NULL, 0);
-			break;
-		case 'X':
-			cfg_dsfield_inner = strtol(optarg, NULL, 0);
-			break;
-		}
-	}
-
-	if (cfg_only_rx && cfg_only_tx)
-		error(1, 0, "options: cannot combine rx-only and tx-only");
-
-	if (cfg_encap_proto && cfg_l3_outer == AF_UNSPEC)
-		error(1, 0, "options: must specify outer with encap");
-	else if ((!cfg_encap_proto) && cfg_l3_outer != AF_UNSPEC)
-		error(1, 0, "options: cannot combine no-encap and outer");
-	else if ((!cfg_encap_proto) && cfg_l3_extra != AF_UNSPEC)
-		error(1, 0, "options: cannot combine no-encap and extra");
-
-	if (cfg_l3_inner == AF_UNSPEC)
-		cfg_l3_inner = AF_INET6;
-	if (cfg_l3_inner == AF_INET6 && cfg_encap_proto == IPPROTO_IPIP)
-		cfg_encap_proto = IPPROTO_IPV6;
-
-	/* RFC 6040 4.2:
-	 *   on decap, if outer encountered congestion (CE == 0x3),
-	 *   but inner cannot encode ECN (NoECT == 0x0), then drop packet.
-	 */
-	if (((cfg_dsfield_outer & 0x3) == 0x3) &&
-	    ((cfg_dsfield_inner & 0x3) == 0x0))
-		cfg_expect_failure = true;
-}
-
-static void print_opts(void)
-{
-	if (cfg_l3_inner == PF_INET6) {
-		util_printaddr("inner.dest6", (void *) &in_daddr6);
-		util_printaddr("inner.source6", (void *) &in_saddr6);
-	} else {
-		util_printaddr("inner.dest4", (void *) &in_daddr4);
-		util_printaddr("inner.source4", (void *) &in_saddr4);
-	}
-
-	if (!cfg_l3_outer)
-		return;
-
-	fprintf(stderr, "encap proto:   %u\n", cfg_encap_proto);
-
-	if (cfg_l3_outer == PF_INET6) {
-		util_printaddr("outer.dest6", (void *) &out_daddr6);
-		util_printaddr("outer.source6", (void *) &out_saddr6);
-	} else {
-		util_printaddr("outer.dest4", (void *) &out_daddr4);
-		util_printaddr("outer.source4", (void *) &out_saddr4);
-	}
-
-	if (!cfg_l3_extra)
-		return;
-
-	if (cfg_l3_outer == PF_INET6) {
-		util_printaddr("extra.dest6", (void *) &extra_daddr6);
-		util_printaddr("extra.source6", (void *) &extra_saddr6);
-	} else {
-		util_printaddr("extra.dest4", (void *) &extra_daddr4);
-		util_printaddr("extra.source4", (void *) &extra_saddr4);
-	}
-
-}
-
-int main(int argc, char **argv)
-{
-	parse_opts(argc, argv);
-	print_opts();
-	return do_main();
-}
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
deleted file mode 100755
index 4b298863797a..000000000000
--- a/tools/testing/selftests/bpf/test_flow_dissector.sh
+++ /dev/null
@@ -1,178 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Load BPF flow dissector and verify it correctly dissects traffic
-
-BPF_FILE="bpf_flow.bpf.o"
-export TESTNAME=test_flow_dissector
-unmount=0
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-msg="skip all tests:"
-if [ $UID != 0 ]; then
-	echo $msg please run this as root >&2
-	exit $ksft_skip
-fi
-
-# This test needs to be run in a network namespace with in_netns.sh. Check if
-# this is the case and run it with in_netns.sh if it is being run in the root
-# namespace.
-if [[ -z $(ip netns identify $$) ]]; then
-	err=0
-	if bpftool="$(which bpftool)"; then
-		echo "Testing global flow dissector..."
-
-		$bpftool prog loadall $BPF_FILE /sys/fs/bpf/flow \
-			type flow_dissector
-
-		if ! unshare --net $bpftool prog attach pinned \
-			/sys/fs/bpf/flow/_dissect flow_dissector; then
-			echo "Unexpected unsuccessful attach in namespace" >&2
-			err=1
-		fi
-
-		$bpftool prog attach pinned /sys/fs/bpf/flow/_dissect \
-			flow_dissector
-
-		if unshare --net $bpftool prog attach pinned \
-			/sys/fs/bpf/flow/_dissect flow_dissector; then
-			echo "Unexpected successful attach in namespace" >&2
-			err=1
-		fi
-
-		if ! $bpftool prog detach pinned \
-			/sys/fs/bpf/flow/_dissect flow_dissector; then
-			echo "Failed to detach flow dissector" >&2
-			err=1
-		fi
-
-		rm -rf /sys/fs/bpf/flow
-	else
-		echo "Skipping root flow dissector test, bpftool not found" >&2
-	fi
-
-	# Run the rest of the tests in a net namespace.
-	../net/in_netns.sh "$0" "$@"
-	err=$(( $err + $? ))
-
-	if (( $err == 0 )); then
-		echo "selftests: $TESTNAME [PASS]";
-	else
-		echo "selftests: $TESTNAME [FAILED]";
-	fi
-
-	exit $err
-fi
-
-# Determine selftest success via shell exit code
-exit_handler()
-{
-	set +e
-
-	# Cleanup
-	tc filter del dev lo ingress pref 1337 2> /dev/null
-	tc qdisc del dev lo ingress 2> /dev/null
-	./flow_dissector_load -d 2> /dev/null
-	if [ $unmount -ne 0 ]; then
-		umount bpffs 2> /dev/null
-	fi
-}
-
-# Exit script immediately (well catched by trap handler) if any
-# program/thing exits with a non-zero status.
-set -e
-
-# (Use 'trap -l' to list meaning of numbers)
-trap exit_handler 0 2 3 6 9
-
-# Mount BPF file system
-if /bin/mount | grep /sys/fs/bpf > /dev/null; then
-	echo "bpffs already mounted"
-else
-	echo "bpffs not mounted. Mounting..."
-	unmount=1
-	/bin/mount bpffs /sys/fs/bpf -t bpf
-fi
-
-# Attach BPF program
-./flow_dissector_load -p $BPF_FILE -s _dissect
-
-# Setup
-tc qdisc add dev lo ingress
-echo 0 > /proc/sys/net/ipv4/conf/default/rp_filter
-echo 0 > /proc/sys/net/ipv4/conf/all/rp_filter
-echo 0 > /proc/sys/net/ipv4/conf/lo/rp_filter
-
-echo "Testing IPv4..."
-# Drops all IP/UDP packets coming from port 9
-tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
-	udp src_port 9 action drop
-
-# Send 10 IPv4/UDP packets from port 8. Filter should not drop any.
-./test_flow_dissector -i 4 -f 8
-# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
-./test_flow_dissector -i 4 -f 9 -F
-# Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
-./test_flow_dissector -i 4 -f 10
-
-echo "Testing IPv4 from 127.0.0.127 (fallback to generic dissector)..."
-# Send 10 IPv4/UDP packets from port 8. Filter should not drop any.
-./test_flow_dissector -i 4 -S 127.0.0.127 -f 8
-# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
-./test_flow_dissector -i 4 -S 127.0.0.127 -f 9 -F
-# Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
-./test_flow_dissector -i 4 -S 127.0.0.127 -f 10
-
-echo "Testing IPIP..."
-# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
-	-D 192.168.0.1 -S 1.1.1.1 -f 8
-# Send 10 IPv4/IPv4/UDP packets from port 9. Filter should drop all.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
-	-D 192.168.0.1 -S 1.1.1.1 -f 9 -F
-# Send 10 IPv4/IPv4/UDP packets from port 10. Filter should not drop any.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
-	-D 192.168.0.1 -S 1.1.1.1 -f 10
-
-echo "Testing IPv4 + GRE..."
-# Send 10 IPv4/GRE/IPv4/UDP packets from port 8. Filter should not drop any.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
-	-D 192.168.0.1 -S 1.1.1.1 -f 8
-# Send 10 IPv4/GRE/IPv4/UDP packets from port 9. Filter should drop all.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
-	-D 192.168.0.1 -S 1.1.1.1 -f 9 -F
-# Send 10 IPv4/GRE/IPv4/UDP packets from port 10. Filter should not drop any.
-./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e gre -i 4 \
-	-D 192.168.0.1 -S 1.1.1.1 -f 10
-
-tc filter del dev lo ingress pref 1337
-
-echo "Testing port range..."
-# Drops all IP/UDP packets coming from port 8-10
-tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
-	udp src_port 8-10 action drop
-
-# Send 10 IPv4/UDP packets from port 7. Filter should not drop any.
-./test_flow_dissector -i 4 -f 7
-# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
-./test_flow_dissector -i 4 -f 9 -F
-# Send 10 IPv4/UDP packets from port 11. Filter should not drop any.
-./test_flow_dissector -i 4 -f 11
-
-tc filter del dev lo ingress pref 1337
-
-echo "Testing IPv6..."
-# Drops all IPv6/UDP packets coming from port 9
-tc filter add dev lo parent ffff: protocol ipv6 pref 1337 flower ip_proto \
-	udp src_port 9 action drop
-
-# Send 10 IPv6/UDP packets from port 8. Filter should not drop any.
-./test_flow_dissector -i 6 -f 8
-# Send 10 IPv6/UDP packets from port 9. Filter should drop all.
-./test_flow_dissector -i 6 -f 9 -F
-# Send 10 IPv6/UDP packets from port 10. Filter should not drop any.
-./test_flow_dissector -i 6 -f 10
-
-exit 0
diff --git a/tools/testing/selftests/bpf/bpf_testmod/.gitignore b/tools/testing/selftests/bpf/test_kmods/.gitignore
index ded513777281..ded513777281 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/.gitignore
+++ b/tools/testing/selftests/bpf/test_kmods/.gitignore
diff --git a/tools/testing/selftests/bpf/test_kmods/Makefile b/tools/testing/selftests/bpf/test_kmods/Makefile
new file mode 100644
index 000000000000..63c4d3f6a12f
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmods/Makefile
@@ -0,0 +1,21 @@
+TEST_KMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= $(abspath $(TEST_KMOD_DIR)/../../../../..)
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+MODULES = bpf_testmod.ko bpf_test_no_cfi.ko bpf_test_modorder_x.ko \
+	bpf_test_modorder_y.ko bpf_test_rqspinlock.ko
+
+$(foreach m,$(MODULES),$(eval obj-m += $(m:.ko=.o)))
+
+CFLAGS_bpf_testmod.o = -I$(src)
+
+all:
+	$(Q)$(MAKE) -C $(KDIR) M=$(TEST_KMOD_DIR) modules
+
+clean:
+	$(Q)$(MAKE) -C $(KDIR) M=$(TEST_KMOD_DIR) clean
diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_x/bpf_test_modorder_x.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_x.c
index 0cc747fa912f..0cc747fa912f 100644
--- a/tools/testing/selftests/bpf/bpf_test_modorder_x/bpf_test_modorder_x.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_x.c
diff --git a/tools/testing/selftests/bpf/bpf_test_modorder_y/bpf_test_modorder_y.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_y.c
index c627ee085d13..c627ee085d13 100644
--- a/tools/testing/selftests/bpf/bpf_test_modorder_y/bpf_test_modorder_y.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_modorder_y.c
diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_no_cfi.c
index 948eb3962732..948eb3962732 100644
--- a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_no_cfi.c
diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
new file mode 100644
index 000000000000..7b4ae5e81d32
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_test_rqspinlock.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/prandom.h>
+#include <linux/ktime.h>
+#include <asm/rqspinlock.h>
+#include <linux/perf_event.h>
+#include <linux/kthread.h>
+#include <linux/atomic.h>
+#include <linux/slab.h>
+
+static struct perf_event_attr hw_attr = {
+	.type		= PERF_TYPE_HARDWARE,
+	.config		= PERF_COUNT_HW_CPU_CYCLES,
+	.size		= sizeof(struct perf_event_attr),
+	.pinned		= 1,
+	.disabled	= 1,
+	.sample_period	= 100000,
+};
+
+static rqspinlock_t lock_a;
+static rqspinlock_t lock_b;
+static rqspinlock_t lock_c;
+
+#define RQSL_SLOW_THRESHOLD_MS 10
+static const unsigned int rqsl_hist_ms[] = {
+	1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
+	12, 14, 16, 18, 20, 25, 30, 40, 50, 75,
+	100, 150, 200, 250, 1000,
+};
+#define RQSL_NR_HIST_BUCKETS ARRAY_SIZE(rqsl_hist_ms)
+
+enum rqsl_context {
+	RQSL_CTX_NORMAL = 0,
+	RQSL_CTX_NMI,
+	RQSL_CTX_MAX,
+};
+
+struct rqsl_cpu_hist {
+	atomic64_t hist[RQSL_CTX_MAX][RQSL_NR_HIST_BUCKETS];
+	atomic64_t success[RQSL_CTX_MAX];
+	atomic64_t failure[RQSL_CTX_MAX];
+};
+
+static DEFINE_PER_CPU(struct rqsl_cpu_hist, rqsl_cpu_hists);
+
+enum rqsl_mode {
+	RQSL_MODE_AA = 0,
+	RQSL_MODE_ABBA,
+	RQSL_MODE_ABBCCA,
+};
+
+static int test_mode = RQSL_MODE_AA;
+module_param(test_mode, int, 0644);
+MODULE_PARM_DESC(test_mode,
+		 "rqspinlock test mode: 0 = AA, 1 = ABBA, 2 = ABBCCA");
+
+static int normal_delay = 20;
+module_param(normal_delay, int, 0644);
+MODULE_PARM_DESC(normal_delay,
+		 "rqspinlock critical section length for normal context (20ms default)");
+
+static int nmi_delay = 10;
+module_param(nmi_delay, int, 0644);
+MODULE_PARM_DESC(nmi_delay,
+		 "rqspinlock critical section length for NMI context (10ms default)");
+
+static struct perf_event **rqsl_evts;
+static int rqsl_nevts;
+
+static struct task_struct **rqsl_threads;
+static int rqsl_nthreads;
+static atomic_t rqsl_ready_cpus = ATOMIC_INIT(0);
+
+static int pause = 0;
+
+static const char *rqsl_mode_names[] = {
+	[RQSL_MODE_AA] = "AA",
+	[RQSL_MODE_ABBA] = "ABBA",
+	[RQSL_MODE_ABBCCA] = "ABBCCA",
+};
+
+struct rqsl_lock_pair {
+	rqspinlock_t *worker_lock;
+	rqspinlock_t *nmi_lock;
+};
+
+static struct rqsl_lock_pair rqsl_get_lock_pair(int cpu)
+{
+	int mode = READ_ONCE(test_mode);
+
+	switch (mode) {
+	default:
+	case RQSL_MODE_AA:
+		return (struct rqsl_lock_pair){ &lock_a, &lock_a };
+	case RQSL_MODE_ABBA:
+		if (cpu & 1)
+			return (struct rqsl_lock_pair){ &lock_b, &lock_a };
+		return (struct rqsl_lock_pair){ &lock_a, &lock_b };
+	case RQSL_MODE_ABBCCA:
+		switch (cpu % 3) {
+		case 0:
+			return (struct rqsl_lock_pair){ &lock_a, &lock_b };
+		case 1:
+			return (struct rqsl_lock_pair){ &lock_b, &lock_c };
+		default:
+			return (struct rqsl_lock_pair){ &lock_c, &lock_a };
+		}
+	}
+}
+
+static u32 rqsl_hist_bucket_idx(u32 delta_ms)
+{
+	int i;
+
+	for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
+		if (delta_ms <= rqsl_hist_ms[i])
+			return i;
+	}
+
+	return RQSL_NR_HIST_BUCKETS - 1;
+}
+
+static void rqsl_record_lock_result(u64 delta_ns, enum rqsl_context ctx, int ret)
+{
+	struct rqsl_cpu_hist *hist = this_cpu_ptr(&rqsl_cpu_hists);
+	u32 delta_ms = DIV_ROUND_UP_ULL(delta_ns, NSEC_PER_MSEC);
+	u32 bucket = rqsl_hist_bucket_idx(delta_ms);
+	atomic64_t *buckets = hist->hist[ctx];
+
+	atomic64_inc(&buckets[bucket]);
+	if (!ret)
+		atomic64_inc(&hist->success[ctx]);
+	else
+		atomic64_inc(&hist->failure[ctx]);
+}
+
+static int rqspinlock_worker_fn(void *arg)
+{
+	int cpu = smp_processor_id();
+	unsigned long flags;
+	u64 start_ns;
+	int ret;
+
+	if (cpu) {
+		atomic_inc(&rqsl_ready_cpus);
+
+		while (!kthread_should_stop()) {
+			struct rqsl_lock_pair locks = rqsl_get_lock_pair(cpu);
+			rqspinlock_t *worker_lock = locks.worker_lock;
+
+			if (READ_ONCE(pause)) {
+				msleep(1000);
+				continue;
+			}
+			start_ns = ktime_get_mono_fast_ns();
+			ret = raw_res_spin_lock_irqsave(worker_lock, flags);
+			rqsl_record_lock_result(ktime_get_mono_fast_ns() - start_ns,
+						RQSL_CTX_NORMAL, ret);
+			mdelay(normal_delay);
+			if (!ret)
+				raw_res_spin_unlock_irqrestore(worker_lock, flags);
+			cpu_relax();
+		}
+		return 0;
+	}
+
+	while (!kthread_should_stop()) {
+		int expected = rqsl_nthreads > 0 ? rqsl_nthreads - 1 : 0;
+		int ready = atomic_read(&rqsl_ready_cpus);
+
+		if (ready == expected && !READ_ONCE(pause)) {
+			for (int i = 0; i < rqsl_nevts; i++)
+				perf_event_enable(rqsl_evts[i]);
+			pr_err("Waiting 5 secs to pause the test\n");
+			msleep(1000 * 5);
+			WRITE_ONCE(pause, 1);
+			pr_err("Paused the test\n");
+		} else {
+			msleep(1000);
+			cpu_relax();
+		}
+	}
+	return 0;
+}
+
+static void nmi_cb(struct perf_event *event, struct perf_sample_data *data,
+		   struct pt_regs *regs)
+{
+	struct rqsl_lock_pair locks;
+	int cpu = smp_processor_id();
+	unsigned long flags;
+	u64 start_ns;
+	int ret;
+
+	if (!cpu || READ_ONCE(pause))
+		return;
+
+	locks = rqsl_get_lock_pair(cpu);
+	start_ns = ktime_get_mono_fast_ns();
+	ret = raw_res_spin_lock_irqsave(locks.nmi_lock, flags);
+	rqsl_record_lock_result(ktime_get_mono_fast_ns() - start_ns,
+				RQSL_CTX_NMI, ret);
+
+	mdelay(nmi_delay);
+
+	if (!ret)
+		raw_res_spin_unlock_irqrestore(locks.nmi_lock, flags);
+}
+
+static void free_rqsl_threads(void)
+{
+	int i;
+
+	if (rqsl_threads) {
+		for_each_online_cpu(i) {
+			if (rqsl_threads[i])
+				kthread_stop(rqsl_threads[i]);
+		}
+		kfree(rqsl_threads);
+	}
+}
+
+static void free_rqsl_evts(void)
+{
+	int i;
+
+	if (rqsl_evts) {
+		for (i = 0; i < rqsl_nevts; i++) {
+			if (rqsl_evts[i])
+				perf_event_release_kernel(rqsl_evts[i]);
+		}
+		kfree(rqsl_evts);
+	}
+}
+
+static int bpf_test_rqspinlock_init(void)
+{
+	int i, ret;
+	int ncpus = num_online_cpus();
+
+	if (test_mode < RQSL_MODE_AA || test_mode > RQSL_MODE_ABBCCA) {
+		pr_err("Invalid mode %d\n", test_mode);
+		return -EINVAL;
+	}
+
+	pr_err("Mode = %s\n", rqsl_mode_names[test_mode]);
+
+	if (ncpus < test_mode + 2)
+		return -ENOTSUPP;
+
+	raw_res_spin_lock_init(&lock_a);
+	raw_res_spin_lock_init(&lock_b);
+	raw_res_spin_lock_init(&lock_c);
+
+	rqsl_evts = kcalloc(ncpus - 1, sizeof(*rqsl_evts), GFP_KERNEL);
+	if (!rqsl_evts)
+		return -ENOMEM;
+	rqsl_nevts = ncpus - 1;
+
+	for (i = 1; i < ncpus; i++) {
+		struct perf_event *e;
+
+		e = perf_event_create_kernel_counter(&hw_attr, i, NULL, nmi_cb, NULL);
+		if (IS_ERR(e)) {
+			ret = PTR_ERR(e);
+			goto err_perf_events;
+		}
+		rqsl_evts[i - 1] = e;
+	}
+
+	rqsl_threads = kcalloc(ncpus, sizeof(*rqsl_threads), GFP_KERNEL);
+	if (!rqsl_threads) {
+		ret = -ENOMEM;
+		goto err_perf_events;
+	}
+	rqsl_nthreads = ncpus;
+
+	for_each_online_cpu(i) {
+		struct task_struct *t;
+
+		t = kthread_create(rqspinlock_worker_fn, NULL, "rqsl_w/%d", i);
+		if (IS_ERR(t)) {
+			ret = PTR_ERR(t);
+			goto err_threads_create;
+		}
+		kthread_bind(t, i);
+		rqsl_threads[i] = t;
+		wake_up_process(t);
+	}
+	return 0;
+
+err_threads_create:
+	free_rqsl_threads();
+err_perf_events:
+	free_rqsl_evts();
+	return ret;
+}
+
+module_init(bpf_test_rqspinlock_init);
+
+static void rqsl_print_histograms(void)
+{
+	int cpu, i;
+
+	pr_err("rqspinlock acquisition latency histogram (ms):\n");
+
+	for_each_online_cpu(cpu) {
+		struct rqsl_cpu_hist *hist = per_cpu_ptr(&rqsl_cpu_hists, cpu);
+		u64 norm_counts[RQSL_NR_HIST_BUCKETS];
+		u64 nmi_counts[RQSL_NR_HIST_BUCKETS];
+		u64 total_counts[RQSL_NR_HIST_BUCKETS];
+		u64 norm_success, nmi_success, success_total;
+		u64 norm_failure, nmi_failure, failure_total;
+		u64 norm_total = 0, nmi_total = 0, total = 0;
+		bool has_slow = false;
+
+		for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
+			norm_counts[i] = atomic64_read(&hist->hist[RQSL_CTX_NORMAL][i]);
+			nmi_counts[i] = atomic64_read(&hist->hist[RQSL_CTX_NMI][i]);
+			total_counts[i] = norm_counts[i] + nmi_counts[i];
+			norm_total += norm_counts[i];
+			nmi_total += nmi_counts[i];
+			total += total_counts[i];
+			if (rqsl_hist_ms[i] > RQSL_SLOW_THRESHOLD_MS &&
+			    total_counts[i])
+				has_slow = true;
+		}
+
+		norm_success = atomic64_read(&hist->success[RQSL_CTX_NORMAL]);
+		nmi_success = atomic64_read(&hist->success[RQSL_CTX_NMI]);
+		norm_failure = atomic64_read(&hist->failure[RQSL_CTX_NORMAL]);
+		nmi_failure = atomic64_read(&hist->failure[RQSL_CTX_NMI]);
+		success_total = norm_success + nmi_success;
+		failure_total = norm_failure + nmi_failure;
+
+		if (!total)
+			continue;
+
+		if (!has_slow) {
+			pr_err(" cpu%d: total %llu (normal %llu, nmi %llu) | "
+			       "success %llu (normal %llu, nmi %llu) | "
+			       "failure %llu (normal %llu, nmi %llu), all within 0-%ums\n",
+			       cpu, total, norm_total, nmi_total,
+			       success_total, norm_success, nmi_success,
+			       failure_total, norm_failure, nmi_failure,
+			       RQSL_SLOW_THRESHOLD_MS);
+			continue;
+		}
+
+		pr_err(" cpu%d: total %llu (normal %llu, nmi %llu) | "
+		       "success %llu (normal %llu, nmi %llu) | "
+		       "failure %llu (normal %llu, nmi %llu)\n",
+		       cpu, total, norm_total, nmi_total,
+		       success_total, norm_success, nmi_success,
+		       failure_total, norm_failure, nmi_failure);
+		for (i = 0; i < RQSL_NR_HIST_BUCKETS; i++) {
+			unsigned int start_ms;
+
+			if (!total_counts[i])
+				continue;
+
+			start_ms = i == 0 ? 0 : rqsl_hist_ms[i - 1] + 1;
+			if (i == RQSL_NR_HIST_BUCKETS - 1) {
+				pr_err("   >= %ums: total %llu (normal %llu, nmi %llu)\n",
+				       start_ms, total_counts[i],
+				       norm_counts[i], nmi_counts[i]);
+			} else {
+				pr_err("   %u-%ums: total %llu (normal %llu, nmi %llu)\n",
+				       start_ms, rqsl_hist_ms[i],
+				       total_counts[i],
+				       norm_counts[i], nmi_counts[i]);
+			}
+		}
+	}
+}
+
+static void bpf_test_rqspinlock_exit(void)
+{
+	WRITE_ONCE(pause, 1);
+	free_rqsl_threads();
+	free_rqsl_evts();
+	rqsl_print_histograms();
+}
+
+module_exit(bpf_test_rqspinlock_exit);
+
+MODULE_AUTHOR("Kumar Kartikeya Dwivedi");
+MODULE_DESCRIPTION("BPF rqspinlock stress test module");
+MODULE_LICENSE("GPL");
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h
index aeef86b3da74..aeef86b3da74 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod-events.h
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
index cc9dde507aba..1669a7eeda26 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c
@@ -62,6 +62,18 @@ struct bpf_testmod_struct_arg_5 {
 	long d;
 };
 
+union bpf_testmod_union_arg_1 {
+	char a;
+	short b;
+	struct bpf_testmod_struct_arg_1 arg;
+};
+
+union bpf_testmod_union_arg_2 {
+	int a;
+	long b;
+	struct bpf_testmod_struct_arg_2 arg;
+};
+
 __bpf_hook_start();
 
 noinline int
@@ -129,11 +141,29 @@ bpf_testmod_test_struct_arg_9(u64 a, void *b, short c, int d, void *e, char f,
 }
 
 noinline int
+bpf_testmod_test_union_arg_1(union bpf_testmod_union_arg_1 a, int b, int c)
+{
+	bpf_testmod_test_struct_arg_result = a.arg.a + b + c;
+	return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_union_arg_2(int a, union bpf_testmod_union_arg_2 b)
+{
+	bpf_testmod_test_struct_arg_result = a + b.arg.a + b.arg.b;
+	return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
 bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) {
 	bpf_testmod_test_struct_arg_result = a->a;
 	return bpf_testmod_test_struct_arg_result;
 }
 
+__weak noinline void bpf_testmod_looooooooooooooooooooooooooooooong_name(void)
+{
+}
+
 __bpf_kfunc void
 bpf_testmod_test_mod_kfunc(int i)
 {
@@ -214,6 +244,16 @@ __bpf_kfunc void bpf_kfunc_rcu_task_test(struct task_struct *ptr)
 {
 }
 
+__bpf_kfunc struct task_struct *bpf_kfunc_ret_rcu_test(void)
+{
+	return NULL;
+}
+
+__bpf_kfunc int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size)
+{
+	return NULL;
+}
+
 __bpf_kfunc struct bpf_testmod_ctx *
 bpf_testmod_ctx_create(int *err)
 {
@@ -377,11 +417,35 @@ noinline int bpf_testmod_fentry_test11(u64 a, void *b, short c, int d,
 	return a + (long)b + c + d + (long)e + f + g + h + i + j + k;
 }
 
+noinline void bpf_testmod_stacktrace_test(void)
+{
+	/* used for stacktrace test as attach function */
+	asm volatile ("");
+}
+
+noinline void bpf_testmod_stacktrace_test_3(void)
+{
+	bpf_testmod_stacktrace_test();
+	asm volatile ("");
+}
+
+noinline void bpf_testmod_stacktrace_test_2(void)
+{
+	bpf_testmod_stacktrace_test_3();
+	asm volatile ("");
+}
+
+noinline void bpf_testmod_stacktrace_test_1(void)
+{
+	bpf_testmod_stacktrace_test_2();
+	asm volatile ("");
+}
+
 int bpf_testmod_fentry_ok;
 
 noinline ssize_t
 bpf_testmod_test_read(struct file *file, struct kobject *kobj,
-		      struct bin_attribute *bin_attr,
+		      const struct bin_attribute *bin_attr,
 		      char *buf, loff_t off, size_t len)
 {
 	struct bpf_testmod_test_read_ctx ctx = {
@@ -394,6 +458,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
 	struct bpf_testmod_struct_arg_3 *struct_arg3;
 	struct bpf_testmod_struct_arg_4 struct_arg4 = {21, 22};
 	struct bpf_testmod_struct_arg_5 struct_arg5 = {23, 24, 25, 26};
+	union bpf_testmod_union_arg_1 union_arg1 = { .arg = {1} };
+	union bpf_testmod_union_arg_2 union_arg2 = { .arg = {2, 3} };
 	int i = 1;
 
 	while (bpf_testmod_return_ptr(i))
@@ -411,9 +477,12 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
 	(void)bpf_testmod_test_struct_arg_9(16, (void *)17, 18, 19, (void *)20,
 					    21, 22, struct_arg5, 27);
 
+	(void)bpf_testmod_test_union_arg_1(union_arg1, 4, 5);
+	(void)bpf_testmod_test_union_arg_2(6, union_arg2);
+
 	(void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2);
 
-	(void)trace_bpf_testmod_test_raw_tp_null(NULL);
+	(void)trace_bpf_testmod_test_raw_tp_null_tp(NULL);
 
 	bpf_testmod_test_struct_ops3();
 
@@ -431,14 +500,14 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
 	if (bpf_testmod_loop_test(101) > 100)
 		trace_bpf_testmod_test_read(current, &ctx);
 
-	trace_bpf_testmod_test_nullable_bare(NULL);
+	trace_bpf_testmod_test_nullable_bare_tp(NULL);
 
 	/* Magic number to enable writable tp */
 	if (len == 64) {
 		struct bpf_testmod_test_writable_ctx writable = {
 			.val = 1024,
 		};
-		trace_bpf_testmod_test_writable_bare(&writable);
+		trace_bpf_testmod_test_writable_bare_tp(&writable);
 		if (writable.early_ret)
 			return snprintf(buf, len, "%d\n", writable.val);
 	}
@@ -452,6 +521,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
 			21, 22, 23, 24, 25, 26) != 231)
 		goto out;
 
+	bpf_testmod_stacktrace_test_1();
+
 	bpf_testmod_fentry_ok = 1;
 out:
 	return -EIO; /* always fail */
@@ -461,7 +532,7 @@ ALLOW_ERROR_INJECTION(bpf_testmod_test_read, ERRNO);
 
 noinline ssize_t
 bpf_testmod_test_write(struct file *file, struct kobject *kobj,
-		      struct bin_attribute *bin_attr,
+		      const struct bin_attribute *bin_attr,
 		      char *buf, loff_t off, size_t len)
 {
 	struct bpf_testmod_test_write_ctx ctx = {
@@ -470,7 +541,7 @@ bpf_testmod_test_write(struct file *file, struct kobject *kobj,
 		.len = len,
 	};
 
-	trace_bpf_testmod_test_write_bare(current, &ctx);
+	trace_bpf_testmod_test_write_bare_tp(current, &ctx);
 
 	return -EIO; /* always fail */
 }
@@ -497,14 +568,20 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
 #ifdef __x86_64__
 
 static int
+uprobe_handler(struct uprobe_consumer *self, struct pt_regs *regs, __u64 *data)
+{
+	regs->cx = 0x87654321feebdaed;
+	return 0;
+}
+
+static int
 uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func,
 		   struct pt_regs *regs, __u64 *data)
 
 {
 	regs->ax  = 0x12345678deadbeef;
-	regs->cx  = 0x87654321feebdaed;
 	regs->r11 = (u64) -1;
-	return true;
+	return 0;
 }
 
 struct testmod_uprobe {
@@ -516,6 +593,7 @@ struct testmod_uprobe {
 static DEFINE_MUTEX(testmod_uprobe_mutex);
 
 static struct testmod_uprobe uprobe = {
+	.consumer.handler = uprobe_handler,
 	.consumer.ret_handler = uprobe_ret_handler,
 };
 
@@ -563,7 +641,7 @@ static void testmod_unregister_uprobe(void)
 
 static ssize_t
 bpf_testmod_uprobe_write(struct file *file, struct kobject *kobj,
-			 struct bin_attribute *bin_attr,
+			 const struct bin_attribute *bin_attr,
 			 char *buf, loff_t off, size_t len)
 {
 	unsigned long offset = 0;
@@ -619,6 +697,8 @@ BTF_ID_FLAGS(func, bpf_kfunc_trusted_vma_test, KF_TRUSTED_ARGS)
 BTF_ID_FLAGS(func, bpf_kfunc_trusted_task_test, KF_TRUSTED_ARGS)
 BTF_ID_FLAGS(func, bpf_kfunc_trusted_num_test, KF_TRUSTED_ARGS)
 BTF_ID_FLAGS(func, bpf_kfunc_rcu_task_test, KF_RCU)
+BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test, KF_RET_NULL | KF_RCU_PROTECTED)
+BTF_ID_FLAGS(func, bpf_kfunc_ret_rcu_test_nostruct, KF_RET_NULL | KF_RCU_PROTECTED)
 BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_testmod_ops3_call_test_1)
@@ -846,7 +926,7 @@ __bpf_kfunc int bpf_kfunc_call_kernel_connect(struct addr_args *args)
 		goto out;
 	}
 
-	err = kernel_connect(sock, (struct sockaddr *)&args->addr,
+	err = kernel_connect(sock, (struct sockaddr_unsized *)&args->addr,
 			     args->addrlen, 0);
 out:
 	mutex_unlock(&sock_lock);
@@ -869,7 +949,7 @@ __bpf_kfunc int bpf_kfunc_call_kernel_bind(struct addr_args *args)
 		goto out;
 	}
 
-	err = kernel_bind(sock, (struct sockaddr *)&args->addr, args->addrlen);
+	err = kernel_bind(sock, (struct sockaddr_unsized *)&args->addr, args->addrlen);
 out:
 	mutex_unlock(&sock_lock);
 
@@ -1053,6 +1133,8 @@ __bpf_kfunc int bpf_kfunc_st_ops_inc10(struct st_ops_args *args)
 	return args->a;
 }
 
+__bpf_kfunc int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id);
+
 BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids)
 BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
@@ -1093,6 +1175,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_prologue, KF_TRUSTED_ARGS | KF_SLEEPABL
 BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
 BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_pro_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE)
 BTF_ID_FLAGS(func, bpf_kfunc_st_ops_inc10, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_multi_st_ops_test_1, KF_TRUSTED_ARGS)
 BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids)
 
 static int bpf_testmod_ops_init(struct btf *btf)
@@ -1130,6 +1213,7 @@ static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
 };
 
 static const struct bpf_verifier_ops bpf_testmod_verifier_ops = {
+	.get_func_proto	 = bpf_base_func_proto,
 	.is_valid_access = bpf_testmod_ops_is_valid_access,
 };
 
@@ -1176,10 +1260,25 @@ static int bpf_testmod_ops__test_maybe_null(int dummy,
 	return 0;
 }
 
+static int bpf_testmod_ops__test_refcounted(int dummy,
+					    struct task_struct *task__ref)
+{
+	return 0;
+}
+
+static struct task_struct *
+bpf_testmod_ops__test_return_ref_kptr(int dummy, struct task_struct *task__ref,
+				      struct cgroup *cgrp)
+{
+	return NULL;
+}
+
 static struct bpf_testmod_ops __bpf_testmod_ops = {
 	.test_1 = bpf_testmod_test_1,
 	.test_2 = bpf_testmod_test_2,
 	.test_maybe_null = bpf_testmod_ops__test_maybe_null,
+	.test_refcounted = bpf_testmod_ops__test_refcounted,
+	.test_return_ref_kptr = bpf_testmod_ops__test_return_ref_kptr,
 };
 
 struct bpf_struct_ops bpf_bpf_testmod_ops = {
@@ -1293,6 +1392,85 @@ static int bpf_test_mod_st_ops__test_pro_epilogue(struct st_ops_args *args)
 	return 0;
 }
 
+static int bpf_cgroup_from_id_id;
+static int bpf_cgroup_release_id;
+
+static int st_ops_gen_prologue_with_kfunc(struct bpf_insn *insn_buf, bool direct_write,
+					  const struct bpf_prog *prog)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	/* r8 = r1; // r8 will be "u64 *ctx".
+	 * r1 = 0;
+	 * r0 = bpf_cgroup_from_id(r1);
+	 * if r0 != 0 goto pc+5;
+	 * r6 = r8[0]; // r6 will be "struct st_ops *args".
+	 * r7 = r6->a;
+	 * r7 += 1000;
+	 * r6->a = r7;
+	 * goto pc+2;
+	 * r1 = r0;
+	 * bpf_cgroup_release(r1);
+	 * r1 = r8;
+	 */
+	*insn++ = BPF_MOV64_REG(BPF_REG_8, BPF_REG_1);
+	*insn++ = BPF_MOV64_IMM(BPF_REG_1, 0);
+	*insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_from_id_id);
+	*insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 5);
+	*insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_8, 0);
+	*insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_6, offsetof(struct st_ops_args, a));
+	*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 1000);
+	*insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, offsetof(struct st_ops_args, a));
+	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2);
+	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
+	*insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id);
+	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_8);
+	*insn++ = prog->insnsi[0];
+
+	return insn - insn_buf;
+}
+
+static int st_ops_gen_epilogue_with_kfunc(struct bpf_insn *insn_buf, const struct bpf_prog *prog,
+					  s16 ctx_stack_off)
+{
+	struct bpf_insn *insn = insn_buf;
+
+	/* r1 = 0;
+	 * r6 = 0;
+	 * r0 = bpf_cgroup_from_id(r1);
+	 * if r0 != 0 goto pc+6;
+	 * r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx"
+	 * r1 = r1[0]; // r1 will be "struct st_ops *args"
+	 * r6 = r1->a;
+	 * r6 += 10000;
+	 * r1->a = r6;
+	 * goto pc+2
+	 * r1 = r0;
+	 * bpf_cgroup_release(r1);
+	 * r0 = r6;
+	 * r0 *= 2;
+	 * BPF_EXIT;
+	 */
+	*insn++ = BPF_MOV64_IMM(BPF_REG_1, 0);
+	*insn++ = BPF_MOV64_IMM(BPF_REG_6, 0);
+	*insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_from_id_id);
+	*insn++ = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 6);
+	*insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_FP, ctx_stack_off);
+	*insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0);
+	*insn++ = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, offsetof(struct st_ops_args, a));
+	*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 10000);
+	*insn++ = BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, offsetof(struct st_ops_args, a));
+	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 2);
+	*insn++ = BPF_MOV64_REG(BPF_REG_1, BPF_REG_0);
+	*insn++ = BPF_CALL_KFUNC(0, bpf_cgroup_release_id);
+	*insn++ = BPF_MOV64_REG(BPF_REG_0, BPF_REG_6);
+	*insn++ = BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, 2);
+	*insn++ = BPF_EXIT_INSN();
+
+	return insn - insn_buf;
+}
+
+#define KFUNC_PRO_EPI_PREFIX "test_kfunc_"
 static int st_ops_gen_prologue(struct bpf_insn *insn_buf, bool direct_write,
 			       const struct bpf_prog *prog)
 {
@@ -1302,6 +1480,9 @@ static int st_ops_gen_prologue(struct bpf_insn *insn_buf, bool direct_write,
 	    strcmp(prog->aux->attach_func_name, "test_pro_epilogue"))
 		return 0;
 
+	if (!strncmp(prog->aux->name, KFUNC_PRO_EPI_PREFIX, strlen(KFUNC_PRO_EPI_PREFIX)))
+		return st_ops_gen_prologue_with_kfunc(insn_buf, direct_write, prog);
+
 	/* r6 = r1[0]; // r6 will be "struct st_ops *args". r1 is "u64 *ctx".
 	 * r7 = r6->a;
 	 * r7 += 1000;
@@ -1325,6 +1506,9 @@ static int st_ops_gen_epilogue(struct bpf_insn *insn_buf, const struct bpf_prog
 	    strcmp(prog->aux->attach_func_name, "test_pro_epilogue"))
 		return 0;
 
+	if (!strncmp(prog->aux->name, KFUNC_PRO_EPI_PREFIX, strlen(KFUNC_PRO_EPI_PREFIX)))
+		return st_ops_gen_epilogue_with_kfunc(insn_buf, prog, ctx_stack_off);
+
 	/* r1 = stack[ctx_stack_off]; // r1 will be "u64 *ctx"
 	 * r1 = r1[0]; // r1 will be "struct st_ops *args"
 	 * r6 = r1->a;
@@ -1395,6 +1579,13 @@ static void st_ops_unreg(void *kdata, struct bpf_link *link)
 
 static int st_ops_init(struct btf *btf)
 {
+	struct btf *kfunc_btf;
+
+	bpf_cgroup_from_id_id = bpf_find_btf_id("bpf_cgroup_from_id", BTF_KIND_FUNC, &kfunc_btf);
+	bpf_cgroup_release_id = bpf_find_btf_id("bpf_cgroup_release", BTF_KIND_FUNC, &kfunc_btf);
+	if (bpf_cgroup_from_id_id < 0 || bpf_cgroup_release_id < 0)
+		return -EINVAL;
+
 	return 0;
 }
 
@@ -1416,6 +1607,114 @@ static struct bpf_struct_ops testmod_st_ops = {
 	.owner = THIS_MODULE,
 };
 
+struct hlist_head multi_st_ops_list;
+static DEFINE_SPINLOCK(multi_st_ops_lock);
+
+static int multi_st_ops_init(struct btf *btf)
+{
+	spin_lock_init(&multi_st_ops_lock);
+	INIT_HLIST_HEAD(&multi_st_ops_list);
+
+	return 0;
+}
+
+static int multi_st_ops_init_member(const struct btf_type *t,
+				    const struct btf_member *member,
+				    void *kdata, const void *udata)
+{
+	return 0;
+}
+
+static struct bpf_testmod_multi_st_ops *multi_st_ops_find_nolock(u32 id)
+{
+	struct bpf_testmod_multi_st_ops *st_ops;
+
+	hlist_for_each_entry(st_ops, &multi_st_ops_list, node) {
+		if (st_ops->id == id)
+			return st_ops;
+	}
+
+	return NULL;
+}
+
+int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id)
+{
+	struct bpf_testmod_multi_st_ops *st_ops;
+	unsigned long flags;
+	int ret = -1;
+
+	spin_lock_irqsave(&multi_st_ops_lock, flags);
+	st_ops = multi_st_ops_find_nolock(id);
+	if (st_ops)
+		ret = st_ops->test_1(args);
+	spin_unlock_irqrestore(&multi_st_ops_lock, flags);
+
+	return ret;
+}
+
+static int multi_st_ops_reg(void *kdata, struct bpf_link *link)
+{
+	struct bpf_testmod_multi_st_ops *st_ops =
+		(struct bpf_testmod_multi_st_ops *)kdata;
+	unsigned long flags;
+	int err = 0;
+	u32 id;
+
+	if (!st_ops->test_1)
+		return -EINVAL;
+
+	id = bpf_struct_ops_id(kdata);
+
+	spin_lock_irqsave(&multi_st_ops_lock, flags);
+	if (multi_st_ops_find_nolock(id)) {
+		pr_err("multi_st_ops(id:%d) has already been registered\n", id);
+		err = -EEXIST;
+		goto unlock;
+	}
+
+	st_ops->id = id;
+	hlist_add_head(&st_ops->node, &multi_st_ops_list);
+unlock:
+	spin_unlock_irqrestore(&multi_st_ops_lock, flags);
+
+	return err;
+}
+
+static void multi_st_ops_unreg(void *kdata, struct bpf_link *link)
+{
+	struct bpf_testmod_multi_st_ops *st_ops;
+	unsigned long flags;
+	u32 id;
+
+	id = bpf_struct_ops_id(kdata);
+
+	spin_lock_irqsave(&multi_st_ops_lock, flags);
+	st_ops = multi_st_ops_find_nolock(id);
+	if (st_ops)
+		hlist_del(&st_ops->node);
+	spin_unlock_irqrestore(&multi_st_ops_lock, flags);
+}
+
+static int bpf_testmod_multi_st_ops__test_1(struct st_ops_args *args)
+{
+	return 0;
+}
+
+static struct bpf_testmod_multi_st_ops multi_st_ops_cfi_stubs = {
+	.test_1 = bpf_testmod_multi_st_ops__test_1,
+};
+
+struct bpf_struct_ops testmod_multi_st_ops = {
+	.verifier_ops = &bpf_testmod_verifier_ops,
+	.init = multi_st_ops_init,
+	.init_member = multi_st_ops_init_member,
+	.reg = multi_st_ops_reg,
+	.unreg = multi_st_ops_unreg,
+	.cfi_stubs = &multi_st_ops_cfi_stubs,
+	.name = "bpf_testmod_multi_st_ops",
+	.owner = THIS_MODULE,
+};
+
 extern int bpf_fentry_test1(int a);
 
 static int bpf_testmod_init(void)
@@ -1438,6 +1737,7 @@ static int bpf_testmod_init(void)
 	ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2);
 	ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops3, bpf_testmod_ops3);
 	ret = ret ?: register_bpf_struct_ops(&testmod_st_ops, bpf_testmod_st_ops);
+	ret = ret ?: register_bpf_struct_ops(&testmod_multi_st_ops, bpf_testmod_multi_st_ops);
 	ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors,
 						 ARRAY_SIZE(bpf_testmod_dtors),
 						 THIS_MODULE);
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
index 356803d1c10e..f6e492f9d042 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h
@@ -6,6 +6,7 @@
 #include <linux/types.h>
 
 struct task_struct;
+struct cgroup;
 
 struct bpf_testmod_test_read_ctx {
 	char *buf;
@@ -36,6 +37,11 @@ struct bpf_testmod_ops {
 	/* Used to test nullable arguments. */
 	int (*test_maybe_null)(int dummy, struct task_struct *task);
 	int (*unsupported_ops)(void);
+	/* Used to test ref_acquired arguments. */
+	int (*test_refcounted)(int dummy, struct task_struct *task);
+	/* Used to test returning referenced kptr. */
+	struct task_struct *(*test_return_ref_kptr)(int dummy, struct task_struct *task,
+						    struct cgroup *cgrp);
 
 	/* The following fields are used to test shadow copies. */
 	char onebyte;
@@ -110,4 +116,10 @@ struct bpf_testmod_st_ops {
 	struct module *owner;
 };
 
+struct bpf_testmod_multi_st_ops {
+	int (*test_1)(struct st_ops_args *args);
+	struct hlist_node node;
+	int id;
+};
+
 #endif /* _BPF_TESTMOD_H */
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
index b58817938deb..4df6fa6a92cb 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
+++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h
@@ -158,5 +158,9 @@ void bpf_kfunc_trusted_vma_test(struct vm_area_struct *ptr) __ksym;
 void bpf_kfunc_trusted_task_test(struct task_struct *ptr) __ksym;
 void bpf_kfunc_trusted_num_test(int *ptr) __ksym;
 void bpf_kfunc_rcu_task_test(struct task_struct *ptr) __ksym;
+struct task_struct *bpf_kfunc_ret_rcu_test(void) __ksym;
+int *bpf_kfunc_ret_rcu_test_nostruct(int rdonly_buf_size) __ksym;
+
+int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id) __ksym;
 
 #endif /* _BPF_TESTMOD_KFUNC_H */
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
index 4694422aa76c..88e4aeab21b7 100644
--- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
@@ -74,7 +74,7 @@ int main(int argc, char **argv)
 
 	/* Let's try detach it before it was ever attached */
 	ret = bpf_prog_detach2(progfd, lircfd, BPF_LIRC_MODE2);
-	if (ret != -1 || errno != ENOENT) {
+	if (ret != -ENOENT) {
 		printf("bpf_prog_detach2 not attached should fail: %m\n");
 		return 1;
 	}
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 3e9b009580d4..338c035c3688 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -2,7 +2,6 @@
 /* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
 #include <linux/capability.h>
 #include <stdlib.h>
-#include <regex.h>
 #include <test_progs.h>
 #include <bpf/btf.h>
 
@@ -20,10 +19,12 @@
 #define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure"
 #define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
 #define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg="
+#define TEST_TAG_EXPECT_NOT_MSG_PFX "comment:test_expect_not_msg="
 #define TEST_TAG_EXPECT_XLATED_PFX "comment:test_expect_xlated="
 #define TEST_TAG_EXPECT_FAILURE_UNPRIV "comment:test_expect_failure_unpriv"
 #define TEST_TAG_EXPECT_SUCCESS_UNPRIV "comment:test_expect_success_unpriv"
 #define TEST_TAG_EXPECT_MSG_PFX_UNPRIV "comment:test_expect_msg_unpriv="
+#define TEST_TAG_EXPECT_NOT_MSG_PFX_UNPRIV "comment:test_expect_not_msg_unpriv="
 #define TEST_TAG_EXPECT_XLATED_PFX_UNPRIV "comment:test_expect_xlated_unpriv="
 #define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level="
 #define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags="
@@ -36,9 +37,16 @@
 #define TEST_TAG_ARCH "comment:test_arch="
 #define TEST_TAG_JITED_PFX "comment:test_jited="
 #define TEST_TAG_JITED_PFX_UNPRIV "comment:test_jited_unpriv="
+#define TEST_TAG_CAPS_UNPRIV "comment:test_caps_unpriv="
+#define TEST_TAG_LOAD_MODE_PFX "comment:load_mode="
+#define TEST_TAG_EXPECT_STDERR_PFX "comment:test_expect_stderr="
+#define TEST_TAG_EXPECT_STDERR_PFX_UNPRIV "comment:test_expect_stderr_unpriv="
+#define TEST_TAG_EXPECT_STDOUT_PFX "comment:test_expect_stdout="
+#define TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV "comment:test_expect_stdout_unpriv="
+#define TEST_TAG_LINEAR_SIZE "comment:test_linear_size="
 
 /* Warning: duplicated in bpf_misc.h */
-#define POINTER_VALUE	0xcafe4all
+#define POINTER_VALUE	0xbadcafe
 #define TEST_DATA_LEN	64
 
 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -54,16 +62,9 @@ enum mode {
 	UNPRIV = 2
 };
 
-struct expect_msg {
-	const char *substr; /* substring match */
-	regex_t regex;
-	bool is_regex;
-	bool on_next_line;
-};
-
-struct expected_msgs {
-	struct expect_msg *patterns;
-	size_t cnt;
+enum load_mode {
+	JITED		= 1 << 0,
+	NO_JITED	= 1 << 1,
 };
 
 struct test_subspec {
@@ -72,8 +73,11 @@ struct test_subspec {
 	struct expected_msgs expect_msgs;
 	struct expected_msgs expect_xlated;
 	struct expected_msgs jited;
+	struct expected_msgs stderr;
+	struct expected_msgs stdout;
 	int retval;
 	bool execute;
+	__u64 caps;
 };
 
 struct test_spec {
@@ -85,6 +89,8 @@ struct test_spec {
 	int prog_flags;
 	int mode_mask;
 	int arch_mask;
+	int load_mask;
+	int linear_sz;
 	bool auxiliary;
 	bool valid;
 };
@@ -130,6 +136,10 @@ static void free_test_spec(struct test_spec *spec)
 	free_msgs(&spec->unpriv.expect_xlated);
 	free_msgs(&spec->priv.jited);
 	free_msgs(&spec->unpriv.jited);
+	free_msgs(&spec->unpriv.stderr);
+	free_msgs(&spec->priv.stderr);
+	free_msgs(&spec->unpriv.stdout);
+	free_msgs(&spec->priv.stdout);
 
 	free(spec->priv.name);
 	free(spec->unpriv.name);
@@ -197,7 +207,8 @@ static int compile_regex(const char *pattern, regex_t *regex)
 	return 0;
 }
 
-static int __push_msg(const char *pattern, bool on_next_line, struct expected_msgs *msgs)
+static int __push_msg(const char *pattern, bool on_next_line, bool negative,
+		      struct expected_msgs *msgs)
 {
 	struct expect_msg *msg;
 	void *tmp;
@@ -213,6 +224,7 @@ static int __push_msg(const char *pattern, bool on_next_line, struct expected_ms
 	msg = &msgs->patterns[msgs->cnt];
 	msg->on_next_line = on_next_line;
 	msg->substr = pattern;
+	msg->negative = negative;
 	msg->is_regex = false;
 	if (strstr(pattern, "{{")) {
 		err = compile_regex(pattern, &msg->regex);
@@ -231,16 +243,16 @@ static int clone_msgs(struct expected_msgs *from, struct expected_msgs *to)
 
 	for (i = 0; i < from->cnt; i++) {
 		msg = &from->patterns[i];
-		err = __push_msg(msg->substr, msg->on_next_line, to);
+		err = __push_msg(msg->substr, msg->on_next_line, msg->negative, to);
 		if (err)
 			return err;
 	}
 	return 0;
 }
 
-static int push_msg(const char *substr, struct expected_msgs *msgs)
+static int push_msg(const char *substr, bool negative, struct expected_msgs *msgs)
 {
-	return __push_msg(substr, false, msgs);
+	return __push_msg(substr, false, negative, msgs);
 }
 
 static int push_disasm_msg(const char *regex_str, bool *on_next_line, struct expected_msgs *msgs)
@@ -251,7 +263,7 @@ static int push_disasm_msg(const char *regex_str, bool *on_next_line, struct exp
 		*on_next_line = false;
 		return 0;
 	}
-	err = __push_msg(regex_str, *on_next_line, msgs);
+	err = __push_msg(regex_str, *on_next_line, false, msgs);
 	if (err)
 		return err;
 	*on_next_line = true;
@@ -276,22 +288,47 @@ static int parse_int(const char *str, int *val, const char *name)
 	return 0;
 }
 
-static int parse_retval(const char *str, int *val, const char *name)
+static int parse_caps(const char *str, __u64 *val, const char *name)
 {
-	struct {
-		char *name;
-		int val;
-	} named_values[] = {
-		{ "INT_MIN"      , INT_MIN },
-		{ "POINTER_VALUE", POINTER_VALUE },
-		{ "TEST_DATA_LEN", TEST_DATA_LEN },
-	};
-	int i;
+	int cap_flag = 0;
+	char *token = NULL, *saveptr = NULL;
 
-	for (i = 0; i < ARRAY_SIZE(named_values); ++i) {
-		if (strcmp(str, named_values[i].name) != 0)
-			continue;
-		*val = named_values[i].val;
+	char *str_cpy = strdup(str);
+	if (str_cpy == NULL) {
+		PRINT_FAIL("Memory allocation failed\n");
+		return -EINVAL;
+	}
+
+	token = strtok_r(str_cpy, "|", &saveptr);
+	while (token != NULL) {
+		errno = 0;
+		if (!strncmp("CAP_", token, sizeof("CAP_") - 1)) {
+			PRINT_FAIL("define %s constant in bpf_misc.h, failed to parse caps\n", token);
+			return -EINVAL;
+		}
+		cap_flag = strtol(token, NULL, 10);
+		if (!cap_flag || errno) {
+			PRINT_FAIL("failed to parse caps %s\n", name);
+			return -EINVAL;
+		}
+		*val |= (1ULL << cap_flag);
+		token = strtok_r(NULL, "|", &saveptr);
+	}
+
+	free(str_cpy);
+	return 0;
+}
+
+static int parse_retval(const char *str, int *val, const char *name)
+{
+	/*
+	 * INT_MIN is defined as (-INT_MAX -1), i.e. it doesn't expand to a
+	 * single int and cannot be parsed with strtol, so we handle it
+	 * separately here. In addition, it expands to different expressions in
+	 * different compilers so we use a prefixed _INT_MIN instead.
+	 */
+	if (strcmp(str, "_INT_MIN") == 0) {
+		*val = INT_MIN;
 		return 0;
 	}
 
@@ -340,6 +377,7 @@ enum arch {
 	ARCH_X86_64	= 0x2,
 	ARCH_ARM64	= 0x4,
 	ARCH_RISCV64	= 0x8,
+	ARCH_S390X	= 0x10,
 };
 
 static int get_current_arch(void)
@@ -350,6 +388,8 @@ static int get_current_arch(void)
 	return ARCH_ARM64;
 #elif defined(__riscv) && __riscv_xlen == 64
 	return ARCH_RISCV64;
+#elif defined(__s390x__)
+	return ARCH_S390X;
 #endif
 	return ARCH_UNKNOWN;
 }
@@ -370,9 +410,14 @@ static int parse_test_spec(struct test_loader *tester,
 	bool xlated_on_next_line = true;
 	bool unpriv_jit_on_next_line;
 	bool jit_on_next_line;
+	bool stderr_on_next_line = true;
+	bool unpriv_stderr_on_next_line = true;
+	bool stdout_on_next_line = true;
+	bool unpriv_stdout_on_next_line = true;
 	bool collect_jit = false;
 	int func_id, i, err = 0;
 	u32 arch_mask = 0;
+	u32 load_mask = 0;
 	struct btf *btf;
 	enum arch arch;
 
@@ -430,12 +475,22 @@ static int parse_test_spec(struct test_loader *tester,
 			spec->auxiliary = true;
 			spec->mode_mask |= UNPRIV;
 		} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_MSG_PFX))) {
-			err = push_msg(msg, &spec->priv.expect_msgs);
+			err = push_msg(msg, false, &spec->priv.expect_msgs);
+			if (err)
+				goto cleanup;
+			spec->mode_mask |= PRIV;
+		} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_NOT_MSG_PFX))) {
+			err = push_msg(msg, true, &spec->priv.expect_msgs);
 			if (err)
 				goto cleanup;
 			spec->mode_mask |= PRIV;
 		} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_MSG_PFX_UNPRIV))) {
-			err = push_msg(msg, &spec->unpriv.expect_msgs);
+			err = push_msg(msg, false, &spec->unpriv.expect_msgs);
+			if (err)
+				goto cleanup;
+			spec->mode_mask |= UNPRIV;
+		} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_NOT_MSG_PFX_UNPRIV))) {
+			err = push_msg(msg, true, &spec->unpriv.expect_msgs);
 			if (err)
 				goto cleanup;
 			spec->mode_mask |= UNPRIV;
@@ -530,8 +585,10 @@ static int parse_test_spec(struct test_loader *tester,
 				arch = ARCH_ARM64;
 			} else if (strcmp(val, "RISCV64") == 0) {
 				arch = ARCH_RISCV64;
+			} else if (strcmp(val, "s390x") == 0) {
+				arch = ARCH_S390X;
 			} else {
-				PRINT_FAIL("bad arch spec: '%s'", val);
+				PRINT_FAIL("bad arch spec: '%s'\n", val);
 				err = -EINVAL;
 				goto cleanup;
 			}
@@ -541,10 +598,63 @@ static int parse_test_spec(struct test_loader *tester,
 			jit_on_next_line = true;
 		} else if (str_has_pfx(s, TEST_BTF_PATH)) {
 			spec->btf_custom_path = s + sizeof(TEST_BTF_PATH) - 1;
+		} else if (str_has_pfx(s, TEST_TAG_CAPS_UNPRIV)) {
+			val = s + sizeof(TEST_TAG_CAPS_UNPRIV) - 1;
+			err = parse_caps(val, &spec->unpriv.caps, "test caps");
+			if (err)
+				goto cleanup;
+			spec->mode_mask |= UNPRIV;
+		} else if (str_has_pfx(s, TEST_TAG_LOAD_MODE_PFX)) {
+			val = s + sizeof(TEST_TAG_LOAD_MODE_PFX) - 1;
+			if (strcmp(val, "jited") == 0) {
+				load_mask = JITED;
+			} else if (strcmp(val, "no_jited") == 0) {
+				load_mask = NO_JITED;
+			} else {
+				PRINT_FAIL("bad load spec: '%s'", val);
+				err = -EINVAL;
+				goto cleanup;
+			}
+		} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX))) {
+			err = push_disasm_msg(msg, &stderr_on_next_line,
+					      &spec->priv.stderr);
+			if (err)
+				goto cleanup;
+		} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDERR_PFX_UNPRIV))) {
+			err = push_disasm_msg(msg, &unpriv_stderr_on_next_line,
+					      &spec->unpriv.stderr);
+			if (err)
+				goto cleanup;
+		} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX))) {
+			err = push_disasm_msg(msg, &stdout_on_next_line,
+					      &spec->priv.stdout);
+			if (err)
+				goto cleanup;
+		} else if ((msg = skip_dynamic_pfx(s, TEST_TAG_EXPECT_STDOUT_PFX_UNPRIV))) {
+			err = push_disasm_msg(msg, &unpriv_stdout_on_next_line,
+					      &spec->unpriv.stdout);
+			if (err)
+				goto cleanup;
+		} else if (str_has_pfx(s, TEST_TAG_LINEAR_SIZE)) {
+			switch (bpf_program__type(prog)) {
+			case BPF_PROG_TYPE_SCHED_ACT:
+			case BPF_PROG_TYPE_SCHED_CLS:
+			case BPF_PROG_TYPE_CGROUP_SKB:
+				val = s + sizeof(TEST_TAG_LINEAR_SIZE) - 1;
+				err = parse_int(val, &spec->linear_sz, "test linear size");
+				if (err)
+					goto cleanup;
+				break;
+			default:
+				PRINT_FAIL("__linear_size for unsupported program type");
+				err = -EINVAL;
+				goto cleanup;
+			}
 		}
 	}
 
 	spec->arch_mask = arch_mask ?: -1;
+	spec->load_mask = load_mask ?: (JITED | NO_JITED);
 
 	if (spec->mode_mask == 0)
 		spec->mode_mask = PRIV;
@@ -593,6 +703,10 @@ static int parse_test_spec(struct test_loader *tester,
 			clone_msgs(&spec->priv.expect_xlated, &spec->unpriv.expect_xlated);
 		if (spec->unpriv.jited.cnt == 0)
 			clone_msgs(&spec->priv.jited, &spec->unpriv.jited);
+		if (spec->unpriv.stderr.cnt == 0)
+			clone_msgs(&spec->priv.stderr, &spec->unpriv.stderr);
+		if (spec->unpriv.stdout.cnt == 0)
+			clone_msgs(&spec->priv.stdout, &spec->unpriv.stdout);
 	}
 
 	spec->valid = true;
@@ -654,44 +768,155 @@ static void emit_jited(const char *jited, bool force)
 	fprintf(stdout, "JITED:\n=============\n%s=============\n", jited);
 }
 
-static void validate_msgs(char *log_buf, struct expected_msgs *msgs,
-			  void (*emit_fn)(const char *buf, bool force))
+static void emit_stderr(const char *stderr, bool force)
+{
+	if (!force && env.verbosity == VERBOSE_NONE)
+		return;
+	fprintf(stdout, "STDERR:\n=============\n%s=============\n", stderr);
+}
+
+static void emit_stdout(const char *bpf_stdout, bool force)
+{
+	if (!force && env.verbosity == VERBOSE_NONE)
+		return;
+	fprintf(stdout, "STDOUT:\n=============\n%s=============\n", bpf_stdout);
+}
+
+static const char *match_msg(struct expect_msg *msg, const char **log)
 {
-	const char *log = log_buf, *prev_match;
+	const char *match = NULL;
 	regmatch_t reg_match[1];
-	int prev_match_line;
-	int match_line;
-	int i, j, err;
+	int err;
+
+	if (!msg->is_regex) {
+		match = strstr(*log, msg->substr);
+		if (match)
+			*log = match + strlen(msg->substr);
+	} else {
+		err = regexec(&msg->regex, *log, 1, reg_match, 0);
+		if (err == 0) {
+			match = *log + reg_match[0].rm_so;
+			*log += reg_match[0].rm_eo;
+		}
+	}
+	return match;
+}
+
+static int count_lines(const char *start, const char *end)
+{
+	const char *tmp;
+	int n = 0;
+
+	for (tmp = start; tmp < end; ++tmp)
+		if (*tmp == '\n')
+			n++;
+	return n;
+}
+
+struct match {
+	const char *start;
+	const char *end;
+	int line;
+};
+
+/*
+ * Positive messages are matched sequentially, each next message
+ * is looked for starting from the end of a previous matched one.
+ */
+static void match_positive_msgs(const char *log, struct expected_msgs *msgs, struct match *matches)
+{
+	const char *prev_match;
+	int i, line;
 
-	prev_match_line = -1;
-	match_line = 0;
 	prev_match = log;
+	line = 0;
+	for (i = 0; i < msgs->cnt; i++) {
+		struct expect_msg *msg = &msgs->patterns[i];
+		const char *match = NULL;
+
+		if (msg->negative)
+			continue;
+
+		match = match_msg(msg, &log);
+		if (match) {
+			line += count_lines(prev_match, match);
+			matches[i].start = match;
+			matches[i].end   = log;
+			matches[i].line  = line;
+			prev_match = match;
+		}
+	}
+}
+
+/*
+ * Each negative messages N located between positive messages P1 and P2
+ * is matched in the span P1.end .. P2.start. Consequently, negative messages
+ * are unordered within the span.
+ */
+static void match_negative_msgs(const char *log, struct expected_msgs *msgs, struct match *matches)
+{
+	const char *start = log, *end, *next, *match;
+	const char *log_end = log + strlen(log);
+	int i, j, next_positive;
+
 	for (i = 0; i < msgs->cnt; i++) {
 		struct expect_msg *msg = &msgs->patterns[i];
-		const char *match = NULL, *pat_status;
-		bool wrong_line = false;
-
-		if (!msg->is_regex) {
-			match = strstr(log, msg->substr);
-			if (match)
-				log = match + strlen(msg->substr);
-		} else {
-			err = regexec(&msg->regex, log, 1, reg_match, 0);
-			if (err == 0) {
-				match = log + reg_match[0].rm_so;
-				log += reg_match[0].rm_eo;
+
+		/* positive message bumps span start */
+		if (!msg->negative) {
+			start = matches[i].end ?: start;
+			continue;
+		}
+
+		/* count stride of negative patterns and adjust span end */
+		end = log_end;
+		for (next_positive = i + 1; next_positive < msgs->cnt; next_positive++) {
+			if (!msgs->patterns[next_positive].negative) {
+				end = matches[next_positive].start;
+				break;
 			}
 		}
 
-		if (match) {
-			for (; prev_match < match; ++prev_match)
-				if (*prev_match == '\n')
-					++match_line;
-			wrong_line = msg->on_next_line && prev_match_line >= 0 &&
-				     prev_match_line + 1 != match_line;
+		/* try matching negative messages within identified span */
+		for (j = i; j < next_positive; j++) {
+			next = start;
+			match = match_msg(msg, &next);
+			if (match && next <= end) {
+				matches[j].start = match;
+				matches[j].end = next;
+			}
 		}
 
-		if (!match || wrong_line) {
+		/* -1 to account for i++ */
+		i = next_positive - 1;
+	}
+}
+
+void validate_msgs(const char *log_buf, struct expected_msgs *msgs,
+		   void (*emit_fn)(const char *buf, bool force))
+{
+	struct match matches[msgs->cnt];
+	struct match *prev_match = NULL;
+	int i, j;
+
+	memset(matches, 0, sizeof(*matches) * msgs->cnt);
+	match_positive_msgs(log_buf, msgs, matches);
+	match_negative_msgs(log_buf, msgs, matches);
+
+	for (i = 0; i < msgs->cnt; i++) {
+		struct expect_msg *msg = &msgs->patterns[i];
+		struct match *match = &matches[i];
+		const char *pat_status;
+		bool unexpected;
+		bool wrong_line;
+		bool no_match;
+
+		no_match   = !msg->negative && !match->start;
+		wrong_line = !msg->negative &&
+			     msg->on_next_line &&
+			     prev_match && prev_match->line + 1 != match->line;
+		unexpected = msg->negative && match->start;
+		if (no_match || wrong_line || unexpected) {
 			PRINT_FAIL("expect_msg\n");
 			if (env.verbosity == VERBOSE_NONE)
 				emit_fn(log_buf, true /*force*/);
@@ -701,8 +926,10 @@ static void validate_msgs(char *log_buf, struct expected_msgs *msgs,
 					pat_status = "MATCHED   ";
 				else if (wrong_line)
 					pat_status = "WRONG LINE";
-				else
+				else if (no_match)
 					pat_status = "EXPECTED  ";
+				else
+					pat_status = "UNEXPECTED";
 				msg = &msgs->patterns[j];
 				fprintf(stderr, "%s %s: '%s'\n",
 					pat_status,
@@ -712,12 +939,13 @@ static void validate_msgs(char *log_buf, struct expected_msgs *msgs,
 			if (wrong_line) {
 				fprintf(stderr,
 					"expecting match at line %d, actual match is at line %d\n",
-					prev_match_line + 1, match_line);
+					prev_match->line + 1, match->line);
 			}
 			break;
 		}
 
-		prev_match_line = match_line;
+		if (!msg->negative)
+			prev_match = match;
 	}
 }
 
@@ -734,7 +962,7 @@ static int drop_capabilities(struct cap_state *caps)
 
 	err = cap_disable_effective(caps_to_drop, &caps->old_caps);
 	if (err) {
-		PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(err));
+		PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(-err));
 		return err;
 	}
 
@@ -751,7 +979,7 @@ static int restore_capabilities(struct cap_state *caps)
 
 	err = cap_enable_effective(caps->old_caps, NULL);
 	if (err)
-		PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(err));
+		PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(-err));
 	caps->initialized = false;
 	return err;
 }
@@ -796,10 +1024,11 @@ static bool is_unpriv_capable_map(struct bpf_map *map)
 	}
 }
 
-static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts)
+static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts, int linear_sz)
 {
 	__u8 tmp_out[TEST_DATA_LEN << 2] = {};
 	__u8 tmp_in[TEST_DATA_LEN] = {};
+	struct __sk_buff ctx = {};
 	int err, saved_errno;
 	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.data_in = tmp_in,
@@ -809,6 +1038,12 @@ static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts)
 		.repeat = 1,
 	);
 
+	if (linear_sz) {
+		ctx.data_end = linear_sz;
+		topts.ctx_in = &ctx;
+		topts.ctx_size_in = sizeof(ctx);
+	}
+
 	if (empty_opts) {
 		memset(&topts, 0, sizeof(struct bpf_test_run_opts));
 		topts.sz = sizeof(struct bpf_test_run_opts);
@@ -876,6 +1111,19 @@ out:
 	return err;
 }
 
+/* Read the bpf stream corresponding to the stream_id */
+static int get_stream(int stream_id, int prog_fd, char *text, size_t text_sz)
+{
+	LIBBPF_OPTS(bpf_prog_stream_read_opts, ropts);
+	int ret;
+
+	ret = bpf_prog_stream_read(prog_fd, stream_id, text, text_sz, &ropts);
+	ASSERT_GT(ret, 0, "stream read");
+	text[ret] = '\0';
+
+	return ret;
+}
+
 /* this function is forced noinline and has short generic name to look better
  * in test_progs output (in case of a failure)
  */
@@ -889,6 +1137,7 @@ void run_subtest(struct test_loader *tester,
 		 bool unpriv)
 {
 	struct test_subspec *subspec = unpriv ? &spec->unpriv : &spec->priv;
+	int current_runtime = is_jit_enabled() ? JITED : NO_JITED;
 	struct bpf_program *tprog = NULL, *tprog_iter;
 	struct bpf_link *link, *links[32] = {};
 	struct test_spec *spec_iter;
@@ -907,6 +1156,11 @@ void run_subtest(struct test_loader *tester,
 		return;
 	}
 
+	if ((current_runtime & spec->load_mask) == 0) {
+		test__skip();
+		return;
+	}
+
 	if (unpriv) {
 		if (!can_execute_unpriv(tester, spec)) {
 			test__skip();
@@ -917,6 +1171,13 @@ void run_subtest(struct test_loader *tester,
 			test__end_subtest();
 			return;
 		}
+		if (subspec->caps) {
+			err = cap_enable_effective(subspec->caps, NULL);
+			if (err) {
+				PRINT_FAIL("failed to set capabilities: %i, %s\n", err, strerror(-err));
+				goto subtest_cleanup;
+			}
+		}
 	}
 
 	/* Implicitly reset to NULL if next test case doesn't specify */
@@ -970,6 +1231,14 @@ void run_subtest(struct test_loader *tester,
 	emit_verifier_log(tester->log_buf, false /*force*/);
 	validate_msgs(tester->log_buf, &subspec->expect_msgs, emit_verifier_log);
 
+	/* Restore capabilities because the kernel will silently ignore requests
+	 * for program info (such as xlated program text) if we are not
+	 * bpf-capable. Also, for some reason test_verifier executes programs
+	 * with all capabilities restored. Do the same here.
+	 */
+	if (restore_capabilities(&caps))
+		goto tobj_cleanup;
+
 	if (subspec->expect_xlated.cnt) {
 		err = get_xlated_program_text(bpf_program__fd(tprog),
 					      tester->log_buf, tester->log_buf_sz);
@@ -995,12 +1264,6 @@ void run_subtest(struct test_loader *tester,
 	}
 
 	if (should_do_test_run(spec, subspec)) {
-		/* For some reason test_verifier executes programs
-		 * with all capabilities restored. Do the same here.
-		 */
-		if (restore_capabilities(&caps))
-			goto tobj_cleanup;
-
 		/* Do bpf_map__attach_struct_ops() for each struct_ops map.
 		 * This should trigger bpf_struct_ops->reg callback on kernel side.
 		 */
@@ -1015,7 +1278,7 @@ void run_subtest(struct test_loader *tester,
 			link = bpf_map__attach_struct_ops(map);
 			if (!link) {
 				PRINT_FAIL("bpf_map__attach_struct_ops failed for map %s: err=%d\n",
-					   bpf_map__name(map), err);
+					   bpf_map__name(map), -errno);
 				goto tobj_cleanup;
 			}
 			links[links_cnt++] = link;
@@ -1029,12 +1292,38 @@ void run_subtest(struct test_loader *tester,
 			}
 		}
 
-		do_prog_test_run(bpf_program__fd(tprog), &retval,
-				 bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false);
-		if (retval != subspec->retval && subspec->retval != POINTER_VALUE) {
+		err = do_prog_test_run(bpf_program__fd(tprog), &retval,
+				       bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false,
+				       spec->linear_sz);
+		if (!err && retval != subspec->retval && subspec->retval != POINTER_VALUE) {
 			PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
 			goto tobj_cleanup;
 		}
+
+		if (subspec->stderr.cnt) {
+			err = get_stream(2, bpf_program__fd(tprog),
+					 tester->log_buf, tester->log_buf_sz);
+			if (err <= 0) {
+				PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n",
+					   err, errno);
+				goto tobj_cleanup;
+			}
+			emit_stderr(tester->log_buf, false /*force*/);
+			validate_msgs(tester->log_buf, &subspec->stderr, emit_stderr);
+		}
+
+		if (subspec->stdout.cnt) {
+			err = get_stream(1, bpf_program__fd(tprog),
+					 tester->log_buf, tester->log_buf_sz);
+			if (err <= 0) {
+				PRINT_FAIL("Unexpected retval from get_stream(): %d, errno = %d\n",
+					   err, errno);
+				goto tobj_cleanup;
+			}
+			emit_stdout(tester->log_buf, false /*force*/);
+			validate_msgs(tester->log_buf, &subspec->stdout, emit_stdout);
+		}
+
 		/* redo bpf_map__attach_struct_ops for each test */
 		while (links_cnt > 0)
 			bpf_link__destroy(links[--links_cnt]);
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index fda7589c5023..0921939532c6 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -138,6 +138,18 @@ static int sched_next_online(int pid, int *next_to_try)
 	return ret;
 }
 
+/* Derive target_free from map_size, same as bpf_common_lru_populate */
+static unsigned int __tgt_size(unsigned int map_size)
+{
+	return (map_size / nr_cpus) / 2;
+}
+
+/* Inverse of how bpf_common_lru_populate derives target_free from map_size. */
+static unsigned int __map_size(unsigned int tgt_free)
+{
+	return tgt_free * nr_cpus * 2;
+}
+
 /* Size of the LRU map is 2
  * Add key=1 (+1 key)
  * Add key=2 (+1 key)
@@ -231,11 +243,11 @@ static void test_lru_sanity0(int map_type, int map_flags)
 	printf("Pass\n");
 }
 
-/* Size of the LRU map is 1.5*tgt_free
- * Insert 1 to tgt_free (+tgt_free keys)
- * Lookup 1 to tgt_free/2
- * Insert 1+tgt_free to 2*tgt_free (+tgt_free keys)
- * => 1+tgt_free/2 to LOCALFREE_TARGET will be removed by LRU
+/* Verify that unreferenced elements are recycled before referenced ones.
+ * Insert elements.
+ * Reference a subset of these.
+ * Insert more, enough to trigger recycling.
+ * Verify that unreferenced are recycled.
  */
 static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
 {
@@ -257,7 +269,7 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
 	batch_size = tgt_free / 2;
 	assert(batch_size * 2 == tgt_free);
 
-	map_size = tgt_free + batch_size;
+	map_size = __map_size(tgt_free) + batch_size;
 	lru_map_fd = create_map(map_type, map_flags, map_size);
 	assert(lru_map_fd != -1);
 
@@ -266,13 +278,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
 
 	value[0] = 1234;
 
-	/* Insert 1 to tgt_free (+tgt_free keys) */
-	end_key = 1 + tgt_free;
+	/* Insert map_size - batch_size keys */
+	end_key = 1 + __map_size(tgt_free);
 	for (key = 1; key < end_key; key++)
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
 
-	/* Lookup 1 to tgt_free/2 */
+	/* Lookup 1 to batch_size */
 	end_key = 1 + batch_size;
 	for (key = 1; key < end_key; key++) {
 		assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value));
@@ -280,12 +292,13 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
 					    BPF_NOEXIST));
 	}
 
-	/* Insert 1+tgt_free to 2*tgt_free
-	 * => 1+tgt_free/2 to LOCALFREE_TARGET will be
+	/* Insert another map_size - batch_size keys
+	 * Map will contain 1 to batch_size plus these latest, i.e.,
+	 * => previous 1+batch_size to map_size - batch_size will have been
 	 * removed by LRU
 	 */
-	key = 1 + tgt_free;
-	end_key = key + tgt_free;
+	key = 1 + __map_size(tgt_free);
+	end_key = key + __map_size(tgt_free);
 	for (; key < end_key; key++) {
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
@@ -301,17 +314,8 @@ static void test_lru_sanity1(int map_type, int map_flags, unsigned int tgt_free)
 	printf("Pass\n");
 }
 
-/* Size of the LRU map 1.5 * tgt_free
- * Insert 1 to tgt_free (+tgt_free keys)
- * Update 1 to tgt_free/2
- *   => The original 1 to tgt_free/2 will be removed due to
- *      the LRU shrink process
- * Re-insert 1 to tgt_free/2 again and do a lookup immeidately
- * Insert 1+tgt_free to tgt_free*3/2
- * Insert 1+tgt_free*3/2 to tgt_free*5/2
- *   => Key 1+tgt_free to tgt_free*3/2
- *      will be removed from LRU because it has never
- *      been lookup and ref bit is not set
+/* Verify that insertions exceeding map size will recycle the oldest.
+ * Verify that unreferenced elements are recycled before referenced.
  */
 static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 {
@@ -334,7 +338,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	batch_size = tgt_free / 2;
 	assert(batch_size * 2 == tgt_free);
 
-	map_size = tgt_free + batch_size;
+	map_size = __map_size(tgt_free) + batch_size;
 	lru_map_fd = create_map(map_type, map_flags, map_size);
 	assert(lru_map_fd != -1);
 
@@ -343,8 +347,8 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 
 	value[0] = 1234;
 
-	/* Insert 1 to tgt_free (+tgt_free keys) */
-	end_key = 1 + tgt_free;
+	/* Insert map_size - batch_size keys */
+	end_key = 1 + __map_size(tgt_free);
 	for (key = 1; key < end_key; key++)
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
@@ -357,8 +361,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	 * shrink the inactive list to get tgt_free
 	 * number of free nodes.
 	 *
-	 * Hence, the oldest key 1 to tgt_free/2
-	 * are removed from the LRU list.
+	 * Hence, the oldest key is removed from the LRU list.
 	 */
 	key = 1;
 	if (map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
@@ -370,8 +373,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 					   BPF_EXIST));
 	}
 
-	/* Re-insert 1 to tgt_free/2 again and do a lookup
-	 * immeidately.
+	/* Re-insert 1 to batch_size again and do a lookup immediately.
 	 */
 	end_key = 1 + batch_size;
 	value[0] = 4321;
@@ -387,17 +389,18 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 
 	value[0] = 1234;
 
-	/* Insert 1+tgt_free to tgt_free*3/2 */
-	end_key = 1 + tgt_free + batch_size;
-	for (key = 1 + tgt_free; key < end_key; key++)
+	/* Insert batch_size new elements */
+	key = 1 + __map_size(tgt_free);
+	end_key = key + batch_size;
+	for (; key < end_key; key++)
 		/* These newly added but not referenced keys will be
 		 * gone during the next LRU shrink.
 		 */
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
 
-	/* Insert 1+tgt_free*3/2 to  tgt_free*5/2 */
-	end_key = key + tgt_free;
+	/* Insert map_size - batch_size elements */
+	end_key += __map_size(tgt_free);
 	for (; key < end_key; key++) {
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
@@ -413,12 +416,12 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
 	printf("Pass\n");
 }
 
-/* Size of the LRU map is 2*tgt_free
- * It is to test the active/inactive list rotation
- * Insert 1 to 2*tgt_free (+2*tgt_free keys)
- * Lookup key 1 to tgt_free*3/2
- * Add 1+2*tgt_free to tgt_free*5/2 (+tgt_free/2 keys)
- *  => key 1+tgt_free*3/2 to 2*tgt_free are removed from LRU
+/* Test the active/inactive list rotation
+ *
+ * Fill the whole map, deplete the free list.
+ * Reference all except the last lru->target_free elements.
+ * Insert lru->target_free new elements. This triggers one shrink.
+ * Verify that the non-referenced elements are replaced.
  */
 static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 {
@@ -437,8 +440,7 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 
 	assert(sched_next_online(0, &next_cpu) != -1);
 
-	batch_size = tgt_free / 2;
-	assert(batch_size * 2 == tgt_free);
+	batch_size = __tgt_size(tgt_free);
 
 	map_size = tgt_free * 2;
 	lru_map_fd = create_map(map_type, map_flags, map_size);
@@ -449,23 +451,21 @@ static void test_lru_sanity3(int map_type, int map_flags, unsigned int tgt_free)
 
 	value[0] = 1234;
 
-	/* Insert 1 to 2*tgt_free (+2*tgt_free keys) */
-	end_key = 1 + (2 * tgt_free);
+	/* Fill the map */
+	end_key = 1 + map_size;
 	for (key = 1; key < end_key; key++)
 		assert(!bpf_map_update_elem(lru_map_fd, &key, value,
 					    BPF_NOEXIST));
 
-	/* Lookup key 1 to tgt_free*3/2 */
-	end_key = tgt_free + batch_size;
+	/* Reference all but the last batch_size */
+	end_key = 1 + map_size - batch_size;
 	for (key = 1; key < end_key; key++) {
 		assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value));
 		assert(!bpf_map_update_elem(expected_map_fd, &key, value,
 					    BPF_NOEXIST));
 	}
 
-	/* Add 1+2*tgt_free to tgt_free*5/2
-	 * (+tgt_free/2 keys)
-	 */
+	/* Insert new batch_size: replaces the non-referenced elements */
 	key = 2 * tgt_free + 1;
 	end_key = key + batch_size;
 	for (; key < end_key; key++) {
@@ -500,7 +500,8 @@ static void test_lru_sanity4(int map_type, int map_flags, unsigned int tgt_free)
 		lru_map_fd = create_map(map_type, map_flags,
 					3 * tgt_free * nr_cpus);
 	else
-		lru_map_fd = create_map(map_type, map_flags, 3 * tgt_free);
+		lru_map_fd = create_map(map_type, map_flags,
+					3 * __map_size(tgt_free));
 	assert(lru_map_fd != -1);
 
 	expected_map_fd = create_map(BPF_MAP_TYPE_HASH, 0,
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
deleted file mode 100755
index 1e565f47aca9..000000000000
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ /dev/null
@@ -1,476 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Setup/topology:
-#
-#    NS1             NS2             NS3
-#   veth1 <---> veth2   veth3 <---> veth4 (the top route)
-#   veth5 <---> veth6   veth7 <---> veth8 (the bottom route)
-#
-#   each vethN gets IPv[4|6]_N address
-#
-#   IPv*_SRC = IPv*_1
-#   IPv*_DST = IPv*_4
-#
-#   all tests test pings from IPv*_SRC to IPv*_DST
-#
-#   by default, routes are configured to allow packets to go
-#   IP*_1 <=> IP*_2 <=> IP*_3 <=> IP*_4 (the top route)
-#
-#   a GRE device is installed in NS3 with IPv*_GRE, and
-#   NS1/NS2 are configured to route packets to IPv*_GRE via IP*_8
-#   (the bottom route)
-#
-# Tests:
-#
-#   1. routes NS2->IPv*_DST are brought down, so the only way a ping
-#      from IP*_SRC to IP*_DST can work is via IPv*_GRE
-#
-#   2a. in an egress test, a bpf LWT_XMIT program is installed on veth1
-#       that encaps the packets with an IP/GRE header to route to IPv*_GRE
-#
-#       ping: SRC->[encap at veth1:egress]->GRE:decap->DST
-#       ping replies go DST->SRC directly
-#
-#   2b. in an ingress test, a bpf LWT_IN program is installed on veth2
-#       that encaps the packets with an IP/GRE header to route to IPv*_GRE
-#
-#       ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
-#       ping replies go DST->SRC directly
-
-BPF_FILE="test_lwt_ip_encap.bpf.o"
-if [[ $EUID -ne 0 ]]; then
-	echo "This script must be run as root"
-	echo "FAIL"
-	exit 1
-fi
-
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-readonly NS3="ns3-$(mktemp -u XXXXXX)"
-
-readonly IPv4_1="172.16.1.100"
-readonly IPv4_2="172.16.2.100"
-readonly IPv4_3="172.16.3.100"
-readonly IPv4_4="172.16.4.100"
-readonly IPv4_5="172.16.5.100"
-readonly IPv4_6="172.16.6.100"
-readonly IPv4_7="172.16.7.100"
-readonly IPv4_8="172.16.8.100"
-readonly IPv4_GRE="172.16.16.100"
-
-readonly IPv4_SRC=$IPv4_1
-readonly IPv4_DST=$IPv4_4
-
-readonly IPv6_1="fb01::1"
-readonly IPv6_2="fb02::1"
-readonly IPv6_3="fb03::1"
-readonly IPv6_4="fb04::1"
-readonly IPv6_5="fb05::1"
-readonly IPv6_6="fb06::1"
-readonly IPv6_7="fb07::1"
-readonly IPv6_8="fb08::1"
-readonly IPv6_GRE="fb10::1"
-
-readonly IPv6_SRC=$IPv6_1
-readonly IPv6_DST=$IPv6_4
-
-TEST_STATUS=0
-TESTS_SUCCEEDED=0
-TESTS_FAILED=0
-
-TMPFILE=""
-
-process_test_results()
-{
-	if [[ "${TEST_STATUS}" -eq 0 ]] ; then
-		echo "PASS"
-		TESTS_SUCCEEDED=$((TESTS_SUCCEEDED+1))
-	else
-		echo "FAIL"
-		TESTS_FAILED=$((TESTS_FAILED+1))
-	fi
-}
-
-print_test_summary_and_exit()
-{
-	echo "passed tests: ${TESTS_SUCCEEDED}"
-	echo "failed tests: ${TESTS_FAILED}"
-	if [ "${TESTS_FAILED}" -eq "0" ] ; then
-		exit 0
-	else
-		exit 1
-	fi
-}
-
-setup()
-{
-	set -e  # exit on error
-	TEST_STATUS=0
-
-	# create devices and namespaces
-	ip netns add "${NS1}"
-	ip netns add "${NS2}"
-	ip netns add "${NS3}"
-
-	# rp_filter gets confused by what these tests are doing, so disable it
-	ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0
-	ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
-	ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
-
-	# disable IPv6 DAD because it sometimes takes too long and fails tests
-	ip netns exec ${NS1} sysctl -wq net.ipv6.conf.all.accept_dad=0
-	ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.accept_dad=0
-	ip netns exec ${NS3} sysctl -wq net.ipv6.conf.all.accept_dad=0
-	ip netns exec ${NS1} sysctl -wq net.ipv6.conf.default.accept_dad=0
-	ip netns exec ${NS2} sysctl -wq net.ipv6.conf.default.accept_dad=0
-	ip netns exec ${NS3} sysctl -wq net.ipv6.conf.default.accept_dad=0
-
-	ip link add veth1 type veth peer name veth2
-	ip link add veth3 type veth peer name veth4
-	ip link add veth5 type veth peer name veth6
-	ip link add veth7 type veth peer name veth8
-
-	ip netns exec ${NS2} sysctl -wq net.ipv4.ip_forward=1
-	ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.forwarding=1
-
-	ip link set veth1 netns ${NS1}
-	ip link set veth2 netns ${NS2}
-	ip link set veth3 netns ${NS2}
-	ip link set veth4 netns ${NS3}
-	ip link set veth5 netns ${NS1}
-	ip link set veth6 netns ${NS2}
-	ip link set veth7 netns ${NS2}
-	ip link set veth8 netns ${NS3}
-
-	if [ ! -z "${VRF}" ] ; then
-		ip -netns ${NS1} link add red type vrf table 1001
-		ip -netns ${NS1} link set red up
-		ip -netns ${NS1} route add table 1001 unreachable default metric 8192
-		ip -netns ${NS1} -6 route add table 1001 unreachable default metric 8192
-		ip -netns ${NS1} link set veth1 vrf red
-		ip -netns ${NS1} link set veth5 vrf red
-
-		ip -netns ${NS2} link add red type vrf table 1001
-		ip -netns ${NS2} link set red up
-		ip -netns ${NS2} route add table 1001 unreachable default metric 8192
-		ip -netns ${NS2} -6 route add table 1001 unreachable default metric 8192
-		ip -netns ${NS2} link set veth2 vrf red
-		ip -netns ${NS2} link set veth3 vrf red
-		ip -netns ${NS2} link set veth6 vrf red
-		ip -netns ${NS2} link set veth7 vrf red
-	fi
-
-	# configure addesses: the top route (1-2-3-4)
-	ip -netns ${NS1}    addr add ${IPv4_1}/24  dev veth1
-	ip -netns ${NS2}    addr add ${IPv4_2}/24  dev veth2
-	ip -netns ${NS2}    addr add ${IPv4_3}/24  dev veth3
-	ip -netns ${NS3}    addr add ${IPv4_4}/24  dev veth4
-	ip -netns ${NS1} -6 addr add ${IPv6_1}/128 nodad dev veth1
-	ip -netns ${NS2} -6 addr add ${IPv6_2}/128 nodad dev veth2
-	ip -netns ${NS2} -6 addr add ${IPv6_3}/128 nodad dev veth3
-	ip -netns ${NS3} -6 addr add ${IPv6_4}/128 nodad dev veth4
-
-	# configure addresses: the bottom route (5-6-7-8)
-	ip -netns ${NS1}    addr add ${IPv4_5}/24  dev veth5
-	ip -netns ${NS2}    addr add ${IPv4_6}/24  dev veth6
-	ip -netns ${NS2}    addr add ${IPv4_7}/24  dev veth7
-	ip -netns ${NS3}    addr add ${IPv4_8}/24  dev veth8
-	ip -netns ${NS1} -6 addr add ${IPv6_5}/128 nodad dev veth5
-	ip -netns ${NS2} -6 addr add ${IPv6_6}/128 nodad dev veth6
-	ip -netns ${NS2} -6 addr add ${IPv6_7}/128 nodad dev veth7
-	ip -netns ${NS3} -6 addr add ${IPv6_8}/128 nodad dev veth8
-
-	ip -netns ${NS1} link set dev veth1 up
-	ip -netns ${NS2} link set dev veth2 up
-	ip -netns ${NS2} link set dev veth3 up
-	ip -netns ${NS3} link set dev veth4 up
-	ip -netns ${NS1} link set dev veth5 up
-	ip -netns ${NS2} link set dev veth6 up
-	ip -netns ${NS2} link set dev veth7 up
-	ip -netns ${NS3} link set dev veth8 up
-
-	# configure routes: IP*_SRC -> veth1/IP*_2 (= top route) default;
-	# the bottom route to specific bottom addresses
-
-	# NS1
-	# top route
-	ip -netns ${NS1}    route add ${IPv4_2}/32  dev veth1 ${VRF}
-	ip -netns ${NS1}    route add default dev veth1 via ${IPv4_2} ${VRF}  # go top by default
-	ip -netns ${NS1} -6 route add ${IPv6_2}/128 dev veth1 ${VRF}
-	ip -netns ${NS1} -6 route add default dev veth1 via ${IPv6_2} ${VRF}  # go top by default
-	# bottom route
-	ip -netns ${NS1}    route add ${IPv4_6}/32  dev veth5 ${VRF}
-	ip -netns ${NS1}    route add ${IPv4_7}/32  dev veth5 via ${IPv4_6} ${VRF}
-	ip -netns ${NS1}    route add ${IPv4_8}/32  dev veth5 via ${IPv4_6} ${VRF}
-	ip -netns ${NS1} -6 route add ${IPv6_6}/128 dev veth5 ${VRF}
-	ip -netns ${NS1} -6 route add ${IPv6_7}/128 dev veth5 via ${IPv6_6} ${VRF}
-	ip -netns ${NS1} -6 route add ${IPv6_8}/128 dev veth5 via ${IPv6_6} ${VRF}
-
-	# NS2
-	# top route
-	ip -netns ${NS2}    route add ${IPv4_1}/32  dev veth2 ${VRF}
-	ip -netns ${NS2}    route add ${IPv4_4}/32  dev veth3 ${VRF}
-	ip -netns ${NS2} -6 route add ${IPv6_1}/128 dev veth2 ${VRF}
-	ip -netns ${NS2} -6 route add ${IPv6_4}/128 dev veth3 ${VRF}
-	# bottom route
-	ip -netns ${NS2}    route add ${IPv4_5}/32  dev veth6 ${VRF}
-	ip -netns ${NS2}    route add ${IPv4_8}/32  dev veth7 ${VRF}
-	ip -netns ${NS2} -6 route add ${IPv6_5}/128 dev veth6 ${VRF}
-	ip -netns ${NS2} -6 route add ${IPv6_8}/128 dev veth7 ${VRF}
-
-	# NS3
-	# top route
-	ip -netns ${NS3}    route add ${IPv4_3}/32  dev veth4
-	ip -netns ${NS3}    route add ${IPv4_1}/32  dev veth4 via ${IPv4_3}
-	ip -netns ${NS3}    route add ${IPv4_2}/32  dev veth4 via ${IPv4_3}
-	ip -netns ${NS3} -6 route add ${IPv6_3}/128 dev veth4
-	ip -netns ${NS3} -6 route add ${IPv6_1}/128 dev veth4 via ${IPv6_3}
-	ip -netns ${NS3} -6 route add ${IPv6_2}/128 dev veth4 via ${IPv6_3}
-	# bottom route
-	ip -netns ${NS3}    route add ${IPv4_7}/32  dev veth8
-	ip -netns ${NS3}    route add ${IPv4_5}/32  dev veth8 via ${IPv4_7}
-	ip -netns ${NS3}    route add ${IPv4_6}/32  dev veth8 via ${IPv4_7}
-	ip -netns ${NS3} -6 route add ${IPv6_7}/128 dev veth8
-	ip -netns ${NS3} -6 route add ${IPv6_5}/128 dev veth8 via ${IPv6_7}
-	ip -netns ${NS3} -6 route add ${IPv6_6}/128 dev veth8 via ${IPv6_7}
-
-	# configure IPv4 GRE device in NS3, and a route to it via the "bottom" route
-	ip -netns ${NS3} tunnel add gre_dev mode gre remote ${IPv4_1} local ${IPv4_GRE} ttl 255
-	ip -netns ${NS3} link set gre_dev up
-	ip -netns ${NS3} addr add ${IPv4_GRE} dev gre_dev
-	ip -netns ${NS1} route add ${IPv4_GRE}/32 dev veth5 via ${IPv4_6} ${VRF}
-	ip -netns ${NS2} route add ${IPv4_GRE}/32 dev veth7 via ${IPv4_8} ${VRF}
-
-
-	# configure IPv6 GRE device in NS3, and a route to it via the "bottom" route
-	ip -netns ${NS3} -6 tunnel add name gre6_dev mode ip6gre remote ${IPv6_1} local ${IPv6_GRE} ttl 255
-	ip -netns ${NS3} link set gre6_dev up
-	ip -netns ${NS3} -6 addr add ${IPv6_GRE} nodad dev gre6_dev
-	ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF}
-	ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF}
-
-	TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX)
-
-	sleep 1  # reduce flakiness
-	set +e
-}
-
-cleanup()
-{
-	if [ -f ${TMPFILE} ] ; then
-		rm ${TMPFILE}
-	fi
-
-	ip netns del ${NS1} 2> /dev/null
-	ip netns del ${NS2} 2> /dev/null
-	ip netns del ${NS3} 2> /dev/null
-}
-
-trap cleanup EXIT
-
-remove_routes_to_gredev()
-{
-	ip -netns ${NS1} route del ${IPv4_GRE} dev veth5 ${VRF}
-	ip -netns ${NS2} route del ${IPv4_GRE} dev veth7 ${VRF}
-	ip -netns ${NS1} -6 route del ${IPv6_GRE}/128 dev veth5 ${VRF}
-	ip -netns ${NS2} -6 route del ${IPv6_GRE}/128 dev veth7 ${VRF}
-}
-
-add_unreachable_routes_to_gredev()
-{
-	ip -netns ${NS1} route add unreachable ${IPv4_GRE}/32 ${VRF}
-	ip -netns ${NS2} route add unreachable ${IPv4_GRE}/32 ${VRF}
-	ip -netns ${NS1} -6 route add unreachable ${IPv6_GRE}/128 ${VRF}
-	ip -netns ${NS2} -6 route add unreachable ${IPv6_GRE}/128 ${VRF}
-}
-
-test_ping()
-{
-	local readonly PROTO=$1
-	local readonly EXPECTED=$2
-	local RET=0
-
-	if [ "${PROTO}" == "IPv4" ] ; then
-		ip netns exec ${NS1} ping  -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null
-		RET=$?
-	elif [ "${PROTO}" == "IPv6" ] ; then
-		ip netns exec ${NS1} ping6 -c 1 -W 1 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
-		RET=$?
-	else
-		echo "    test_ping: unknown PROTO: ${PROTO}"
-		TEST_STATUS=1
-	fi
-
-	if [ "0" != "${RET}" ]; then
-		RET=1
-	fi
-
-	if [ "${EXPECTED}" != "${RET}" ] ; then
-		echo "    test_ping failed: expected: ${EXPECTED}; got ${RET}"
-		TEST_STATUS=1
-	fi
-}
-
-test_gso()
-{
-	local readonly PROTO=$1
-	local readonly PKT_SZ=5000
-	local IP_DST=""
-	: > ${TMPFILE}  # trim the capture file
-
-	# check that nc is present
-	command -v nc >/dev/null 2>&1 || \
-		{ echo >&2 "nc is not available: skipping TSO tests"; return; }
-
-	# listen on port 9000, capture TCP into $TMPFILE
-	if [ "${PROTO}" == "IPv4" ] ; then
-		IP_DST=${IPv4_DST}
-		ip netns exec ${NS3} bash -c \
-			"nc -4 -l -p 9000 > ${TMPFILE} &"
-	elif [ "${PROTO}" == "IPv6" ] ; then
-		IP_DST=${IPv6_DST}
-		ip netns exec ${NS3} bash -c \
-			"nc -6 -l -p 9000 > ${TMPFILE} &"
-		RET=$?
-	else
-		echo "    test_gso: unknown PROTO: ${PROTO}"
-		TEST_STATUS=1
-	fi
-	sleep 1  # let nc start listening
-
-	# send a packet larger than MTU
-	ip netns exec ${NS1} bash -c \
-		"dd if=/dev/zero bs=$PKT_SZ count=1 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
-	sleep 2 # let the packet get delivered
-
-	# verify we received all expected bytes
-	SZ=$(stat -c %s ${TMPFILE})
-	if [ "$SZ" != "$PKT_SZ" ] ; then
-		echo "    test_gso failed: ${PROTO}"
-		TEST_STATUS=1
-	fi
-}
-
-test_egress()
-{
-	local readonly ENCAP=$1
-	echo "starting egress ${ENCAP} encap test ${VRF}"
-	setup
-
-	# by default, pings work
-	test_ping IPv4 0
-	test_ping IPv6 0
-
-	# remove NS2->DST routes, ping fails
-	ip -netns ${NS2}    route del ${IPv4_DST}/32  dev veth3 ${VRF}
-	ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF}
-	test_ping IPv4 1
-	test_ping IPv6 1
-
-	# install replacement routes (LWT/eBPF), pings succeed
-	if [ "${ENCAP}" == "IPv4" ] ; then
-		ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
-			${BPF_FILE} sec encap_gre dev veth1 ${VRF}
-		ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
-			${BPF_FILE} sec encap_gre dev veth1 ${VRF}
-	elif [ "${ENCAP}" == "IPv6" ] ; then
-		ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
-			${BPF_FILE} sec encap_gre6 dev veth1 ${VRF}
-		ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
-			${BPF_FILE} sec encap_gre6 dev veth1 ${VRF}
-	else
-		echo "    unknown encap ${ENCAP}"
-		TEST_STATUS=1
-	fi
-	test_ping IPv4 0
-	test_ping IPv6 0
-
-	# skip GSO tests with VRF: VRF routing needs properly assigned
-	# source IP/device, which is easy to do with ping and hard with dd/nc.
-	if [ -z "${VRF}" ] ; then
-		test_gso IPv4
-		test_gso IPv6
-	fi
-
-	# a negative test: remove routes to GRE devices: ping fails
-	remove_routes_to_gredev
-	test_ping IPv4 1
-	test_ping IPv6 1
-
-	# another negative test
-	add_unreachable_routes_to_gredev
-	test_ping IPv4 1
-	test_ping IPv6 1
-
-	cleanup
-	process_test_results
-}
-
-test_ingress()
-{
-	local readonly ENCAP=$1
-	echo "starting ingress ${ENCAP} encap test ${VRF}"
-	setup
-
-	# need to wait a bit for IPv6 to autoconf, otherwise
-	# ping6 sometimes fails with "unable to bind to address"
-
-	# by default, pings work
-	test_ping IPv4 0
-	test_ping IPv6 0
-
-	# remove NS2->DST routes, pings fail
-	ip -netns ${NS2}    route del ${IPv4_DST}/32  dev veth3 ${VRF}
-	ip -netns ${NS2} -6 route del ${IPv6_DST}/128 dev veth3 ${VRF}
-	test_ping IPv4 1
-	test_ping IPv6 1
-
-	# install replacement routes (LWT/eBPF), pings succeed
-	if [ "${ENCAP}" == "IPv4" ] ; then
-		ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
-			${BPF_FILE} sec encap_gre dev veth2 ${VRF}
-		ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
-			${BPF_FILE} sec encap_gre dev veth2 ${VRF}
-	elif [ "${ENCAP}" == "IPv6" ] ; then
-		ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
-			${BPF_FILE} sec encap_gre6 dev veth2 ${VRF}
-		ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
-			${BPF_FILE} sec encap_gre6 dev veth2 ${VRF}
-	else
-		echo "FAIL: unknown encap ${ENCAP}"
-		TEST_STATUS=1
-	fi
-	test_ping IPv4 0
-	test_ping IPv6 0
-
-	# a negative test: remove routes to GRE devices: ping fails
-	remove_routes_to_gredev
-	test_ping IPv4 1
-	test_ping IPv6 1
-
-	# another negative test
-	add_unreachable_routes_to_gredev
-	test_ping IPv4 1
-	test_ping IPv6 1
-
-	cleanup
-	process_test_results
-}
-
-VRF=""
-test_egress IPv4
-test_egress IPv6
-test_ingress IPv4
-test_ingress IPv6
-
-VRF="vrf red"
-test_egress IPv4
-test_egress IPv6
-test_ingress IPv4
-test_ingress IPv6
-
-print_test_summary_and_exit
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
deleted file mode 100755
index 0efea2292d6a..000000000000
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ /dev/null
@@ -1,156 +0,0 @@
-#!/bin/bash
-# Connects 6 network namespaces through veths.
-# Each NS may have different IPv6 global scope addresses :
-#   NS1 ---- NS2 ---- NS3 ---- NS4 ---- NS5 ---- NS6
-# fb00::1           fd00::1  fd00::2  fd00::3  fb00::6
-#                   fc42::1           fd00::4
-#
-# All IPv6 packets going to fb00::/16 through NS2 will be encapsulated in a
-# IPv6 header with a Segment Routing Header, with segments :
-# 	fd00::1 -> fd00::2 -> fd00::3 -> fd00::4
-#
-# 3 fd00::/16 IPv6 addresses are binded to seg6local End.BPF actions :
-# - fd00::1 : add a TLV, change the flags and apply a End.X action to fc42::1
-# - fd00::2 : remove the TLV, change the flags, add a tag
-# - fd00::3 : apply an End.T action to fd00::4, through routing table 117
-#
-# fd00::4 is a simple Segment Routing node decapsulating the inner IPv6 packet.
-# Each End.BPF action will validate the operations applied on the SRH by the
-# previous BPF program in the chain, otherwise the packet is dropped.
-#
-# An UDP datagram is sent from fb00::1 to fb00::6. The test succeeds if this
-# datagram can be read on NS6 when binding to fb00::6.
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-BPF_FILE="test_lwt_seg6local.bpf.o"
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-readonly NS3="ns3-$(mktemp -u XXXXXX)"
-readonly NS4="ns4-$(mktemp -u XXXXXX)"
-readonly NS5="ns5-$(mktemp -u XXXXXX)"
-readonly NS6="ns6-$(mktemp -u XXXXXX)"
-
-msg="skip all tests:"
-if [ $UID != 0 ]; then
-	echo $msg please run this as root >&2
-	exit $ksft_skip
-fi
-
-TMP_FILE="/tmp/selftest_lwt_seg6local.txt"
-
-cleanup()
-{
-	if [ "$?" = "0" ]; then
-		echo "selftests: test_lwt_seg6local [PASS]";
-	else
-		echo "selftests: test_lwt_seg6local [FAILED]";
-	fi
-
-	set +e
-	ip netns del ${NS1} 2> /dev/null
-	ip netns del ${NS2} 2> /dev/null
-	ip netns del ${NS3} 2> /dev/null
-	ip netns del ${NS4} 2> /dev/null
-	ip netns del ${NS5} 2> /dev/null
-	ip netns del ${NS6} 2> /dev/null
-	rm -f $TMP_FILE
-}
-
-set -e
-
-ip netns add ${NS1}
-ip netns add ${NS2}
-ip netns add ${NS3}
-ip netns add ${NS4}
-ip netns add ${NS5}
-ip netns add ${NS6}
-
-trap cleanup 0 2 3 6 9
-
-ip link add veth1 type veth peer name veth2
-ip link add veth3 type veth peer name veth4
-ip link add veth5 type veth peer name veth6
-ip link add veth7 type veth peer name veth8
-ip link add veth9 type veth peer name veth10
-
-ip link set veth1 netns ${NS1}
-ip link set veth2 netns ${NS2}
-ip link set veth3 netns ${NS2}
-ip link set veth4 netns ${NS3}
-ip link set veth5 netns ${NS3}
-ip link set veth6 netns ${NS4}
-ip link set veth7 netns ${NS4}
-ip link set veth8 netns ${NS5}
-ip link set veth9 netns ${NS5}
-ip link set veth10 netns ${NS6}
-
-ip netns exec ${NS1} ip link set dev veth1 up
-ip netns exec ${NS2} ip link set dev veth2 up
-ip netns exec ${NS2} ip link set dev veth3 up
-ip netns exec ${NS3} ip link set dev veth4 up
-ip netns exec ${NS3} ip link set dev veth5 up
-ip netns exec ${NS4} ip link set dev veth6 up
-ip netns exec ${NS4} ip link set dev veth7 up
-ip netns exec ${NS5} ip link set dev veth8 up
-ip netns exec ${NS5} ip link set dev veth9 up
-ip netns exec ${NS6} ip link set dev veth10 up
-ip netns exec ${NS6} ip link set dev lo up
-
-# All link scope addresses and routes required between veths
-ip netns exec ${NS1} ip -6 addr add fb00::12/16 dev veth1 scope link
-ip netns exec ${NS1} ip -6 route add fb00::21 dev veth1 scope link
-ip netns exec ${NS2} ip -6 addr add fb00::21/16 dev veth2 scope link
-ip netns exec ${NS2} ip -6 addr add fb00::34/16 dev veth3 scope link
-ip netns exec ${NS2} ip -6 route add fb00::43 dev veth3 scope link
-ip netns exec ${NS3} ip -6 route add fb00::65 dev veth5 scope link
-ip netns exec ${NS3} ip -6 addr add fb00::43/16 dev veth4 scope link
-ip netns exec ${NS3} ip -6 addr add fb00::56/16 dev veth5 scope link
-ip netns exec ${NS4} ip -6 addr add fb00::65/16 dev veth6 scope link
-ip netns exec ${NS4} ip -6 addr add fb00::78/16 dev veth7 scope link
-ip netns exec ${NS4} ip -6 route add fb00::87 dev veth7 scope link
-ip netns exec ${NS5} ip -6 addr add fb00::87/16 dev veth8 scope link
-ip netns exec ${NS5} ip -6 addr add fb00::910/16 dev veth9 scope link
-ip netns exec ${NS5} ip -6 route add fb00::109 dev veth9 scope link
-ip netns exec ${NS5} ip -6 route add fb00::109 table 117 dev veth9 scope link
-ip netns exec ${NS6} ip -6 addr add fb00::109/16 dev veth10 scope link
-
-ip netns exec ${NS1} ip -6 addr add fb00::1/16 dev lo
-ip netns exec ${NS1} ip -6 route add fb00::6 dev veth1 via fb00::21
-
-ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj ${BPF_FILE} sec encap_srh dev veth2
-ip netns exec ${NS2} ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
-
-ip netns exec ${NS3} ip -6 route add fc42::1 dev veth5 via fb00::65
-ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec add_egr_x dev veth4
-
-ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec pop_egr dev veth6
-ip netns exec ${NS4} ip -6 addr add fc42::1 dev lo
-ip netns exec ${NS4} ip -6 route add fd00::3 dev veth7 via fb00::87
-
-ip netns exec ${NS5} ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
-ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec inspect_t dev veth8
-
-ip netns exec ${NS6} ip -6 addr add fb00::6/16 dev lo
-ip netns exec ${NS6} ip -6 addr add fd00::4/16 dev lo
-
-ip netns exec ${NS1} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${NS2} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${NS3} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${NS4} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${NS5} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-
-ip netns exec ${NS6} sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
-ip netns exec ${NS6} sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
-ip netns exec ${NS6} sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
-
-ip netns exec ${NS6} nc -l -6 -u -d 7330 > $TMP_FILE &
-ip netns exec ${NS1} bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
-sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
-kill -TERM $!
-
-if [[ $(< $TMP_FILE) != "foobar" ]]; then
-	exit 1
-fi
-
-exit 0
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 8b40e9496af1..ccc5acd55ff9 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -535,7 +535,7 @@ static void test_devmap_hash(unsigned int task, void *data)
 static void test_queuemap(unsigned int task, void *data)
 {
 	const int MAP_SIZE = 32;
-	__u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+	__u32 vals[MAP_SIZE + MAP_SIZE/2], val = 0;
 	int fd, i;
 
 	/* Fill test values to be used */
@@ -591,7 +591,7 @@ static void test_queuemap(unsigned int task, void *data)
 static void test_stackmap(unsigned int task, void *data)
 {
 	const int MAP_SIZE = 32;
-	__u32 vals[MAP_SIZE + MAP_SIZE/2], val;
+	__u32 vals[MAP_SIZE + MAP_SIZE/2], val = 0;
 	int fd, i;
 
 	/* Fill test values to be used */
@@ -1396,9 +1396,11 @@ static void test_map_stress(void)
 #define MAX_DELAY_US 50000
 #define MIN_DELAY_RANGE_US 5000
 
-static bool retry_for_again_or_busy(int err)
+static bool can_retry(int err)
 {
-	return (err == EAGAIN || err == EBUSY);
+	return (err == EAGAIN || err == EBUSY ||
+		((err == ENOMEM || err == E2BIG) &&
+		 map_opts.map_flags == BPF_F_NO_PREALLOC));
 }
 
 int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
@@ -1451,12 +1453,12 @@ static void test_update_delete(unsigned int fn, void *data)
 
 		if (do_update) {
 			err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES,
-						   retry_for_again_or_busy);
+						   can_retry);
 			if (err)
 				printf("error %d %d\n", err, errno);
 			assert(err == 0);
 			err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES,
-						   retry_for_again_or_busy);
+						   can_retry);
 			if (err)
 				printf("error %d %d\n", err, errno);
 			assert(err == 0);
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 6088d8222d59..02a85dda30e6 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -14,12 +14,14 @@
 #include <netinet/in.h>
 #include <sys/select.h>
 #include <sys/socket.h>
+#include <linux/keyctl.h>
 #include <sys/un.h>
 #include <bpf/btf.h>
 #include <time.h>
 #include "json_writer.h"
 
 #include "network_helpers.h"
+#include "verification_cert.h"
 
 /* backtrace() and backtrace_symbols_fd() are glibc specific,
  * use header file when glibc is available and provide stub
@@ -88,7 +90,9 @@ static void stdio_hijack(char **log_buf, size_t *log_cnt)
 #endif
 }
 
-static void stdio_restore_cleanup(void)
+static pthread_mutex_t stdout_lock = PTHREAD_MUTEX_INITIALIZER;
+
+static void stdio_restore(void)
 {
 #ifdef __GLIBC__
 	if (verbose() && env.worker_id == -1) {
@@ -98,34 +102,33 @@ static void stdio_restore_cleanup(void)
 
 	fflush(stdout);
 
+	pthread_mutex_lock(&stdout_lock);
+
 	if (env.subtest_state) {
-		fclose(env.subtest_state->stdout_saved);
+		if (env.subtest_state->stdout_saved)
+			fclose(env.subtest_state->stdout_saved);
 		env.subtest_state->stdout_saved = NULL;
 		stdout = env.test_state->stdout_saved;
 		stderr = env.test_state->stdout_saved;
 	} else {
-		fclose(env.test_state->stdout_saved);
+		if (env.test_state->stdout_saved)
+			fclose(env.test_state->stdout_saved);
 		env.test_state->stdout_saved = NULL;
+		stdout = env.stdout_saved;
+		stderr = env.stderr_saved;
 	}
+
+	pthread_mutex_unlock(&stdout_lock);
 #endif
 }
 
-static void stdio_restore(void)
+static int traffic_monitor_print_fn(const char *format, va_list args)
 {
-#ifdef __GLIBC__
-	if (verbose() && env.worker_id == -1) {
-		/* nothing to do, output to stdout by default */
-		return;
-	}
-
-	if (stdout == env.stdout_saved)
-		return;
-
-	stdio_restore_cleanup();
+	pthread_mutex_lock(&stdout_lock);
+	vfprintf(stdout, format, args);
+	pthread_mutex_unlock(&stdout_lock);
 
-	stdout = env.stdout_saved;
-	stderr = env.stderr_saved;
-#endif
+	return 0;
 }
 
 /* Adapted from perf/util/string.c */
@@ -474,8 +477,6 @@ static void dump_test_log(const struct prog_test_def *test,
 	print_test_result(test, test_state);
 }
 
-static void stdio_restore(void);
-
 /* A bunch of tests set custom affinity per-thread and/or per-process. Reset
  * it after each test/sub-test.
  */
@@ -490,13 +491,11 @@ static void reset_affinity(void)
 
 	err = sched_setaffinity(0, sizeof(cpuset), &cpuset);
 	if (err < 0) {
-		stdio_restore();
 		fprintf(stderr, "Failed to reset process affinity: %d!\n", err);
 		exit(EXIT_ERR_SETUP_INFRA);
 	}
 	err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
 	if (err < 0) {
-		stdio_restore();
 		fprintf(stderr, "Failed to reset thread affinity: %d!\n", err);
 		exit(EXIT_ERR_SETUP_INFRA);
 	}
@@ -514,7 +513,6 @@ static void save_netns(void)
 static void restore_netns(void)
 {
 	if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) {
-		stdio_restore();
 		perror("setns(CLONE_NEWNS)");
 		exit(EXIT_ERR_SETUP_INFRA);
 	}
@@ -541,7 +539,8 @@ void test__end_subtest(void)
 				   test_result(subtest_state->error_cnt,
 					       subtest_state->skipped));
 
-	stdio_restore_cleanup();
+	stdio_restore();
+
 	env.subtest_state = NULL;
 }
 
@@ -1270,8 +1269,10 @@ void crash_handler(int signum)
 
 	sz = backtrace(bt, ARRAY_SIZE(bt));
 
-	if (env.stdout_saved)
-		stdio_restore();
+	fflush(stdout);
+	stdout = env.stdout_saved;
+	stderr = env.stderr_saved;
+
 	if (env.test) {
 		env.test_state->error_cnt++;
 		dump_test_log(env.test, env.test_state, true, false, NULL);
@@ -1282,6 +1283,21 @@ void crash_handler(int signum)
 	backtrace_symbols_fd(bt, sz, STDERR_FILENO);
 }
 
+void hexdump(const char *prefix, const void *buf, size_t len)
+{
+	for (int i = 0; i < len; i++) {
+		if (!(i % 16)) {
+			if (i)
+				fprintf(stdout, "\n");
+			fprintf(stdout, "%s", prefix);
+		}
+		if (i && !(i % 8) && (i % 16))
+			fprintf(stdout, "\t");
+		fprintf(stdout, "%02X ", ((uint8_t *)(buf))[i]);
+	}
+	fprintf(stdout, "\n");
+}
+
 static void sigint_handler(int signum)
 {
 	int i;
@@ -1350,10 +1366,19 @@ static int recv_message(int sock, struct msg *msg)
 	return ret;
 }
 
+static bool ns_is_needed(const char *test_name)
+{
+	if (strlen(test_name) < 3)
+		return false;
+
+	return !strncmp(test_name, "ns_", 3);
+}
+
 static void run_one_test(int test_num)
 {
 	struct prog_test_def *test = &prog_test_defs[test_num];
 	struct test_state *state = &test_states[test_num];
+	struct netns_obj *ns = NULL;
 
 	env.test = test;
 	env.test_state = state;
@@ -1361,10 +1386,13 @@ static void run_one_test(int test_num)
 	stdio_hijack(&state->log_buf, &state->log_cnt);
 
 	watchdog_start();
+	if (ns_is_needed(test->test_name))
+		ns = netns_new(test->test_name, true);
 	if (test->run_test)
 		test->run_test();
 	else if (test->run_serial_test)
 		test->run_serial_test();
+	netns_free(ns);
 	watchdog_stop();
 
 	/* ensure last sub-test is finalized properly */
@@ -1373,6 +1401,8 @@ static void run_one_test(int test_num)
 
 	state->tested = true;
 
+	stdio_restore();
+
 	if (verbose() && env.worker_id == -1)
 		print_test_result(test, state);
 
@@ -1381,7 +1411,6 @@ static void run_one_test(int test_num)
 	if (test->need_cgroup_cleanup)
 		cleanup_cgroup_environment();
 
-	stdio_restore();
 	free(stop_libbpf_log_capture());
 
 	dump_test_log(test, state, false, false, NULL);
@@ -1901,6 +1930,13 @@ static void free_test_states(void)
 	}
 }
 
+static __u32 register_session_key(const char *key_data, size_t key_data_size)
+{
+	return syscall(__NR_add_key, "asymmetric", "libbpf_session_key",
+			(const void *)key_data, key_data_size,
+			KEY_SPEC_SESSION_KEYRING);
+}
+
 int main(int argc, char **argv)
 {
 	static const struct argp argp = {
@@ -1916,6 +1952,9 @@ int main(int argc, char **argv)
 
 	sigaction(SIGSEGV, &sigact, NULL);
 
+	env.stdout_saved = stdout;
+	env.stderr_saved = stderr;
+
 	env.secs_till_notify = 10;
 	env.secs_till_kill = 120;
 	err = argp_parse(&argp, argc, argv, 0, NULL, &env);
@@ -1931,6 +1970,12 @@ int main(int argc, char **argv)
 	/* Use libbpf 1.0 API mode */
 	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
 	libbpf_set_print(libbpf_print_fn);
+	err = register_session_key((const char *)test_progs_verification_cert,
+				   test_progs_verification_cert_len);
+	if (err < 0)
+		return err;
+
+	traffic_monitor_set_print(traffic_monitor_print_fn);
 
 	srand(time(NULL));
 
@@ -1942,9 +1987,6 @@ int main(int argc, char **argv)
 		return -1;
 	}
 
-	env.stdout_saved = stdout;
-	env.stderr_saved = stderr;
-
 	env.has_testmod = true;
 	if (!env.list_test_names) {
 		/* ensure previous instance of the module is unloaded */
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 74de33ae37e5..eebfc18cdcd2 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -7,6 +7,7 @@
 #include <errno.h>
 #include <string.h>
 #include <assert.h>
+#include <regex.h>
 #include <stdlib.h>
 #include <stdarg.h>
 #include <time.h>
@@ -185,6 +186,7 @@ void test__end_subtest(void);
 void test__skip(void);
 void test__fail(void);
 int test__join_cgroup(const char *path);
+void hexdump(const char *prefix, const void *buf, size_t len);
 
 #define PRINT_FAIL(format...)                                                  \
 	({                                                                     \
@@ -344,6 +346,20 @@ int test__join_cgroup(const char *path);
 	___ok;								\
 })
 
+#define ASSERT_MEMEQ(actual, expected, len, name) ({			\
+	static int duration = 0;					\
+	const void *__act = actual;					\
+	const void *__exp = expected;					\
+	int __len = len;						\
+	bool ___ok = memcmp(__act, __exp, __len) == 0;			\
+	CHECK(!___ok, (name), "unexpected memory mismatch\n");		\
+	fprintf(stdout, "actual:\n");					\
+	hexdump("\t", __act, __len);					\
+	fprintf(stdout, "expected:\n");					\
+	hexdump("\t", __exp, __len);					\
+	___ok;								\
+})
+
 #define ASSERT_OK(res, name) ({						\
 	static int duration = 0;					\
 	long long ___res = (res);					\
@@ -412,6 +428,14 @@ int test__join_cgroup(const char *path);
 			goto goto_label;				\
 	})
 
+#define SYS_FAIL(goto_label, fmt, ...)					\
+	({								\
+		char cmd[1024];						\
+		snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__);		\
+		if (!ASSERT_NEQ(0, system(cmd), cmd))			\
+			goto goto_label;				\
+	})
+
 #define ALL_TO_DEV_NULL " >/dev/null 2>&1"
 
 #define SYS_NOFAIL(fmt, ...)						\
@@ -437,6 +461,34 @@ static inline void *u64_to_ptr(__u64 ptr)
 	return (void *) (unsigned long) ptr;
 }
 
+static inline __u32 id_from_prog_fd(int fd)
+{
+	struct bpf_prog_info prog_info = {};
+	__u32 prog_info_len = sizeof(prog_info);
+	int err;
+
+	err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len);
+	if (!ASSERT_OK(err, "id_from_prog_fd"))
+		return 0;
+
+	ASSERT_NEQ(prog_info.id, 0, "prog_info.id");
+	return prog_info.id;
+}
+
+static inline __u32 id_from_link_fd(int fd)
+{
+	struct bpf_link_info link_info = {};
+	__u32 link_info_len = sizeof(link_info);
+	int err;
+
+	err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
+	if (!ASSERT_OK(err, "id_from_link_fd"))
+		return 0;
+
+	ASSERT_NEQ(link_info.id, 0, "link_info.id");
+	return link_info.id;
+}
+
 int bpf_find_map(const char *test, struct bpf_object *obj, const char *name);
 int compare_map_keys(int map1_fd, int map2_fd);
 int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
@@ -495,4 +547,20 @@ extern void test_loader_fini(struct test_loader *tester);
 	test_loader_fini(&tester);					       \
 })
 
+struct expect_msg {
+	const char *substr; /* substring match */
+	regex_t regex;
+	bool is_regex;
+	bool on_next_line;
+	bool negative;
+};
+
+struct expected_msgs {
+	struct expect_msg *patterns;
+	size_t cnt;
+};
+
+void validate_msgs(const char *log_buf, struct expected_msgs *msgs,
+		   void (*emit_fn)(const char *buf, bool force));
+
 #endif /* __TEST_PROGS_H */
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index e5c7ecbe57e3..76568db7a664 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1372,7 +1372,7 @@ run:
 	} else
 		fprintf(stderr, "unknown test\n");
 out:
-	/* Detatch and zero all the maps */
+	/* Detach and zero all the maps */
 	bpf_prog_detach2(bpf_program__fd(progs[3]), cg_fd, BPF_CGROUP_SOCK_OPS);
 
 	for (i = 0; i < ARRAY_SIZE(links); i++) {
@@ -1579,8 +1579,12 @@ static void test_txmsg_redir(int cgrp, struct sockmap_options *opt)
 
 static void test_txmsg_redir_wait_sndmem(int cgrp, struct sockmap_options *opt)
 {
-	txmsg_redir = 1;
 	opt->tx_wait_mem = true;
+	txmsg_redir = 1;
+	test_send_large(opt, cgrp);
+
+	txmsg_redir = 1;
+	txmsg_apply = 4097;
 	test_send_large(opt, cgrp);
 	opt->tx_wait_mem = false;
 }
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
index 5546b05a0486..f1300047c1e0 100644
--- a/tools/testing/selftests/bpf/test_tag.c
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -116,7 +116,7 @@ static void tag_from_alg(int insns, uint8_t *tag, uint32_t len)
 	static const struct sockaddr_alg alg = {
 		.salg_family	= AF_ALG,
 		.salg_type	= "hash",
-		.salg_name	= "sha1",
+		.salg_name	= "sha256",
 	};
 	int fd_base, fd_alg, ret;
 	ssize_t size;
diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh
deleted file mode 100755
index 76f0bd17061f..000000000000
--- a/tools/testing/selftests/bpf/test_tc_edt.sh
+++ /dev/null
@@ -1,100 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# This test installs a TC bpf program that throttles a TCP flow
-# with dst port = 9000 down to 5MBps. Then it measures actual
-# throughput of the flow.
-
-BPF_FILE="test_tc_edt.bpf.o"
-if [[ $EUID -ne 0 ]]; then
-	echo "This script must be run as root"
-	echo "FAIL"
-	exit 1
-fi
-
-# check that nc, dd, and timeout are present
-command -v nc >/dev/null 2>&1 || \
-	{ echo >&2 "nc is not available"; exit 1; }
-command -v dd >/dev/null 2>&1 || \
-	{ echo >&2 "nc is not available"; exit 1; }
-command -v timeout >/dev/null 2>&1 || \
-	{ echo >&2 "timeout is not available"; exit 1; }
-
-readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
-readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
-
-readonly IP_SRC="172.16.1.100"
-readonly IP_DST="172.16.2.100"
-
-cleanup()
-{
-	ip netns del ${NS_SRC}
-	ip netns del ${NS_DST}
-}
-
-trap cleanup EXIT
-
-set -e  # exit on error
-
-ip netns add "${NS_SRC}"
-ip netns add "${NS_DST}"
-ip link add veth_src type veth peer name veth_dst
-ip link set veth_src netns ${NS_SRC}
-ip link set veth_dst netns ${NS_DST}
-
-ip -netns ${NS_SRC} addr add ${IP_SRC}/24  dev veth_src
-ip -netns ${NS_DST} addr add ${IP_DST}/24  dev veth_dst
-
-ip -netns ${NS_SRC} link set dev veth_src up
-ip -netns ${NS_DST} link set dev veth_dst up
-
-ip -netns ${NS_SRC} route add ${IP_DST}/32  dev veth_src
-ip -netns ${NS_DST} route add ${IP_SRC}/32  dev veth_dst
-
-# set up TC on TX
-ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq
-ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact
-ip netns exec ${NS_SRC} tc filter add dev veth_src egress \
-	bpf da obj ${BPF_FILE} sec cls_test
-
-
-# start the listener
-ip netns exec ${NS_DST} bash -c \
-	"nc -4 -l -p 9000 >/dev/null &"
-declare -i NC_PID=$!
-sleep 1
-
-declare -ir TIMEOUT=20
-declare -ir EXPECTED_BPS=5000000
-
-# run the load, capture RX bytes on DST
-declare -ir RX_BYTES_START=$( ip netns exec ${NS_DST} \
-	cat /sys/class/net/veth_dst/statistics/rx_bytes )
-
-set +e
-ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero \
-	bs=1000 count=1000000 > /dev/tcp/${IP_DST}/9000 2>/dev/null"
-set -e
-
-declare -ir RX_BYTES_END=$( ip netns exec ${NS_DST} \
-	cat /sys/class/net/veth_dst/statistics/rx_bytes )
-
-declare -ir ACTUAL_BPS=$(( ($RX_BYTES_END - $RX_BYTES_START) / $TIMEOUT ))
-
-echo $TIMEOUT $ACTUAL_BPS $EXPECTED_BPS | \
-	awk '{printf "elapsed: %d sec; bps difference: %.2f%%\n",
-		$1, ($2-$3)*100.0/$3}'
-
-# Pass the test if the actual bps is within 1% of the expected bps.
-# The difference is usually about 0.1% on a 20-sec test, and ==> zero
-# the longer the test runs.
-declare -ir RES=$( echo $ACTUAL_BPS $EXPECTED_BPS | \
-	 awk 'function abs(x){return ((x < 0.0) ? -x : x)}
-	      {if (abs(($1-$2)*100.0/$2) > 1.0) { print "1" }
-		else { print "0"} }' )
-if [ "${RES}" == "0" ] ; then
-	echo "PASS"
-else
-	echo "FAIL"
-	exit 1
-fi
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
deleted file mode 100755
index 7989ec608454..000000000000
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ /dev/null
@@ -1,319 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# In-place tunneling
-
-BPF_FILE="test_tc_tunnel.bpf.o"
-# must match the port that the bpf program filters on
-readonly port=8000
-
-readonly ns_prefix="ns-$$-"
-readonly ns1="${ns_prefix}1"
-readonly ns2="${ns_prefix}2"
-
-readonly ns1_v4=192.168.1.1
-readonly ns2_v4=192.168.1.2
-readonly ns1_v6=fd::1
-readonly ns2_v6=fd::2
-
-# Must match port used by bpf program
-readonly udpport=5555
-# MPLSoverUDP
-readonly mplsudpport=6635
-readonly mplsproto=137
-
-readonly infile="$(mktemp)"
-readonly outfile="$(mktemp)"
-
-setup() {
-	ip netns add "${ns1}"
-	ip netns add "${ns2}"
-
-	ip link add dev veth1 mtu 1500 netns "${ns1}" type veth \
-	      peer name veth2 mtu 1500 netns "${ns2}"
-
-	ip netns exec "${ns1}" ethtool -K veth1 tso off
-
-	ip -netns "${ns1}" link set veth1 up
-	ip -netns "${ns2}" link set veth2 up
-
-	ip -netns "${ns1}" -4 addr add "${ns1_v4}/24" dev veth1
-	ip -netns "${ns2}" -4 addr add "${ns2_v4}/24" dev veth2
-	ip -netns "${ns1}" -6 addr add "${ns1_v6}/64" dev veth1 nodad
-	ip -netns "${ns2}" -6 addr add "${ns2_v6}/64" dev veth2 nodad
-
-	# clamp route to reserve room for tunnel headers
-	ip -netns "${ns1}" -4 route flush table main
-	ip -netns "${ns1}" -6 route flush table main
-	ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1450 dev veth1
-	ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1430 dev veth1
-
-	sleep 1
-
-	dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
-}
-
-cleanup() {
-	ip netns del "${ns2}"
-	ip netns del "${ns1}"
-
-	if [[ -f "${outfile}" ]]; then
-		rm "${outfile}"
-	fi
-	if [[ -f "${infile}" ]]; then
-		rm "${infile}"
-	fi
-
-	if [[ -n $server_pid ]]; then
-		kill $server_pid 2> /dev/null
-	fi
-}
-
-server_listen() {
-	ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" &
-	server_pid=$!
-}
-
-client_connect() {
-	ip netns exec "${ns1}" timeout 2 nc "${netcat_opt}" -w 1 "${addr2}" "${port}" < "${infile}"
-	echo $?
-}
-
-verify_data() {
-	wait "${server_pid}"
-	server_pid=
-	# sha1sum returns two fields [sha1] [filepath]
-	# convert to bash array and access first elem
-	insum=($(sha1sum ${infile}))
-	outsum=($(sha1sum ${outfile}))
-	if [[ "${insum[0]}" != "${outsum[0]}" ]]; then
-		echo "data mismatch"
-		exit 1
-	fi
-}
-
-wait_for_port() {
-	for i in $(seq 20); do
-		if ip netns exec "${ns2}" ss ${2:--4}OHntl | grep -q "$1"; then
-			return 0
-		fi
-		sleep 0.1
-	done
-	return 1
-}
-
-set -e
-
-# no arguments: automated test, run all
-if [[ "$#" -eq "0" ]]; then
-	echo "ipip"
-	$0 ipv4 ipip none 100
-
-	echo "ipip6"
-	$0 ipv4 ipip6 none 100
-
-	echo "ip6ip6"
-	$0 ipv6 ip6tnl none 100
-
-	echo "sit"
-	$0 ipv6 sit none 100
-
-	echo "ip4 vxlan"
-	$0 ipv4 vxlan eth 2000
-
-	echo "ip6 vxlan"
-	$0 ipv6 ip6vxlan eth 2000
-
-	for mac in none mpls eth ; do
-		echo "ip gre $mac"
-		$0 ipv4 gre $mac 100
-
-		echo "ip6 gre $mac"
-		$0 ipv6 ip6gre $mac 100
-
-		echo "ip gre $mac gso"
-		$0 ipv4 gre $mac 2000
-
-		echo "ip6 gre $mac gso"
-		$0 ipv6 ip6gre $mac 2000
-
-		echo "ip udp $mac"
-		$0 ipv4 udp $mac 100
-
-		echo "ip6 udp $mac"
-		$0 ipv6 ip6udp $mac 100
-
-		echo "ip udp $mac gso"
-		$0 ipv4 udp $mac 2000
-
-		echo "ip6 udp $mac gso"
-		$0 ipv6 ip6udp $mac 2000
-	done
-
-	echo "OK. All tests passed"
-	exit 0
-fi
-
-if [[ "$#" -ne "4" ]]; then
-	echo "Usage: $0"
-	echo "   or: $0 <ipv4|ipv6> <tuntype> <none|mpls|eth> <data_len>"
-	exit 1
-fi
-
-case "$1" in
-"ipv4")
-	readonly addr1="${ns1_v4}"
-	readonly addr2="${ns2_v4}"
-	readonly ipproto=4
-	readonly netcat_opt=-${ipproto}
-	readonly foumod=fou
-	readonly foutype=ipip
-	readonly fouproto=4
-	readonly fouproto_mpls=${mplsproto}
-	readonly gretaptype=gretap
-	;;
-"ipv6")
-	readonly addr1="${ns1_v6}"
-	readonly addr2="${ns2_v6}"
-	readonly ipproto=6
-	readonly netcat_opt=-${ipproto}
-	readonly foumod=fou6
-	readonly foutype=ip6tnl
-	readonly fouproto="41 -6"
-	readonly fouproto_mpls="${mplsproto} -6"
-	readonly gretaptype=ip6gretap
-	;;
-*)
-	echo "unknown arg: $1"
-	exit 1
-	;;
-esac
-
-readonly tuntype=$2
-readonly mac=$3
-readonly datalen=$4
-
-echo "encap ${addr1} to ${addr2}, type ${tuntype}, mac ${mac} len ${datalen}"
-
-trap cleanup EXIT
-
-setup
-
-# basic communication works
-echo "test basic connectivity"
-server_listen
-wait_for_port ${port} ${netcat_opt}
-client_connect
-verify_data
-
-# clientside, insert bpf program to encap all TCP to port ${port}
-# client can no longer connect
-ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
-ip netns exec "${ns1}" tc filter add dev veth1 egress \
-	bpf direct-action object-file ${BPF_FILE} \
-	section "encap_${tuntype}_${mac}"
-echo "test bpf encap without decap (expect failure)"
-server_listen
-wait_for_port ${port} ${netcat_opt}
-! client_connect
-
-if [[ "$tuntype" =~ "udp" ]]; then
-	# Set up fou tunnel.
-	ttype="${foutype}"
-	targs="encap fou encap-sport auto encap-dport $udpport"
-	# fou may be a module; allow this to fail.
-	modprobe "${foumod}" ||true
-	if [[ "$mac" == "mpls" ]]; then
-		dport=${mplsudpport}
-		dproto=${fouproto_mpls}
-		tmode="mode any ttl 255"
-	else
-		dport=${udpport}
-		dproto=${fouproto}
-	fi
-	ip netns exec "${ns2}" ip fou add port $dport ipproto ${dproto}
-	targs="encap fou encap-sport auto encap-dport $dport"
-elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
-	ttype=$gretaptype
-elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then
-	ttype="vxlan"
-	targs="id 1 dstport 8472 udp6zerocsumrx"
-elif [[ "$tuntype" == "ipip6" ]]; then
-	ttype="ip6tnl"
-	targs=""
-else
-	ttype=$tuntype
-	targs=""
-fi
-
-# tunnel address family differs from inner for SIT
-if [[ "${tuntype}" == "sit" ]]; then
-	link_addr1="${ns1_v4}"
-	link_addr2="${ns2_v4}"
-elif [[ "${tuntype}" == "ipip6" ]]; then
-	link_addr1="${ns1_v6}"
-	link_addr2="${ns2_v6}"
-else
-	link_addr1="${addr1}"
-	link_addr2="${addr2}"
-fi
-
-# serverside, insert decap module
-# server is still running
-# client can connect again
-ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \
-	${tmode} remote "${link_addr1}" local "${link_addr2}" $targs
-
-expect_tun_fail=0
-
-if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
-	# No support for MPLS IPv6 fou tunnel; expect failure.
-	expect_tun_fail=1
-elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
-	# No support for TEB fou tunnel; expect failure.
-	expect_tun_fail=1
-elif [[ "$tuntype" =~ (gre|vxlan) && "$mac" == "eth" ]]; then
-	# Share ethernet address between tunnel/veth2 so L2 decap works.
-	ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \
-		  awk '/ether/ { print $2 }')
-	ip netns exec "${ns2}" ip link set testtun0 address $ethaddr
-elif [[ "$mac" == "mpls" ]]; then
-	modprobe mpls_iptunnel ||true
-	modprobe mpls_gso ||true
-	ip netns exec "${ns2}" sysctl -qw net.mpls.platform_labels=65536
-	ip netns exec "${ns2}" ip -f mpls route add 1000 dev lo
-	ip netns exec "${ns2}" ip link set lo up
-	ip netns exec "${ns2}" sysctl -qw net.mpls.conf.testtun0.input=1
-	ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.lo.rp_filter=0
-fi
-
-# Because packets are decapped by the tunnel they arrive on testtun0 from
-# the IP stack perspective.  Ensure reverse path filtering is disabled
-# otherwise we drop the TCP SYN as arriving on testtun0 instead of the
-# expected veth2 (veth2 is where 192.168.1.2 is configured).
-ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
-# rp needs to be disabled for both all and testtun0 as the rp value is
-# selected as the max of the "all" and device-specific values.
-ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0
-ip netns exec "${ns2}" ip link set dev testtun0 up
-if [[ "$expect_tun_fail" == 1 ]]; then
-	# This tunnel mode is not supported, so we expect failure.
-	echo "test bpf encap with tunnel device decap (expect failure)"
-	! client_connect
-else
-	echo "test bpf encap with tunnel device decap"
-	client_connect
-	verify_data
-	server_listen
-fi
-
-# serverside, use BPF for decap
-ip netns exec "${ns2}" ip link del dev testtun0
-ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
-ip netns exec "${ns2}" tc filter add dev veth2 ingress \
-	bpf direct-action object-file ${BPF_FILE} section decap
-echo "test bpf encap with bpf decap"
-client_connect
-verify_data
-
-echo OK
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 595194453ff8..35b4893ccdf8 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -15,20 +15,18 @@
 #include <bpf/libbpf.h>
 #include <sys/ioctl.h>
 #include <linux/rtnetlink.h>
-#include <signal.h>
 #include <linux/perf_event.h>
-#include <linux/err.h>
 
-#include "bpf_util.h"
 #include "cgroup_helpers.h"
 
 #include "test_tcpnotify.h"
-#include "trace_helpers.h"
 #include "testing_helpers.h"
 
 #define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L)
 
 pthread_t tid;
+static bool exit_thread;
+
 int rx_callbacks;
 
 static void dummyfn(void *ctx, int cpu, void *data, __u32 size)
@@ -45,7 +43,7 @@ void tcp_notifier_poller(struct perf_buffer *pb)
 {
 	int err;
 
-	while (1) {
+	while (!exit_thread) {
 		err = perf_buffer__poll(pb, 100);
 		if (err < 0 && err != -EINTR) {
 			printf("failed perf_buffer__poll: %d\n", err);
@@ -78,15 +76,10 @@ int main(int argc, char **argv)
 	int error = EXIT_FAILURE;
 	struct bpf_object *obj;
 	char test_script[80];
-	cpu_set_t cpuset;
 	__u32 key = 0;
 
 	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
 
-	CPU_ZERO(&cpuset);
-	CPU_SET(0, &cpuset);
-	pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
-
 	cg_fd = cgroup_setup_and_join(cg_path);
 	if (cg_fd < 0)
 		goto err;
@@ -151,6 +144,13 @@ int main(int argc, char **argv)
 
 	sleep(10);
 
+	exit_thread = true;
+	int ret = pthread_join(tid, NULL);
+	if (ret) {
+		printf("FAILED: pthread_join\n");
+		goto err;
+	}
+
 	if (verify_result(&g)) {
 		printf("FAILED: Wrong stats Expected %d calls, got %d\n",
 			g.ncalls, rx_callbacks);
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
deleted file mode 100755
index d9661b9988ba..000000000000
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ /dev/null
@@ -1,645 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# End-to-end eBPF tunnel test suite
-#   The script tests BPF network tunnel implementation.
-#
-# Topology:
-# ---------
-#     root namespace   |     at_ns0 namespace
-#                      |
-#      -----------     |     -----------
-#      | tnl dev |     |     | tnl dev |  (overlay network)
-#      -----------     |     -----------
-#      metadata-mode   |     native-mode
-#       with bpf       |
-#                      |
-#      ----------      |     ----------
-#      |  veth1  | --------- |  veth0  |  (underlay network)
-#      ----------    peer    ----------
-#
-#
-# Device Configuration
-# --------------------
-# Root namespace with metadata-mode tunnel + BPF
-# Device names and addresses:
-# 	veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
-# 	tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay)
-#
-# Namespace at_ns0 with native tunnel
-# Device names and addresses:
-# 	veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
-# 	tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay)
-#
-#
-# End-to-end ping packet flow
-# ---------------------------
-# Most of the tests start by namespace creation, device configuration,
-# then ping the underlay and overlay network.  When doing 'ping 10.1.1.100'
-# from root namespace, the following operations happen:
-# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
-# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
-#    with remote_ip=172.16.1.100 and others.
-# 3) Outer tunnel header is prepended and route the packet to veth1's egress
-# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0
-# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
-# 6) Forward the packet to the overlay tnl dev
-
-BPF_FILE="test_tunnel_kern.bpf.o"
-BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel"
-PING_ARG="-c 3 -w 10 -q"
-ret=0
-GREEN='\033[0;92m'
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-config_device()
-{
-	ip netns add at_ns0
-	ip link add veth0 type veth peer name veth1
-	ip link set veth0 netns at_ns0
-	ip netns exec at_ns0 ip addr add 172.16.1.100/24 dev veth0
-	ip netns exec at_ns0 ip link set dev veth0 up
-	ip link set dev veth1 up mtu 1500
-	ip addr add dev veth1 172.16.1.200/24
-}
-
-add_gre_tunnel()
-{
-	tun_key=
-	if [ -n "$1" ]; then
-		tun_key="key $1"
-	fi
-
-	# at_ns0 namespace
-	ip netns exec at_ns0 \
-        ip link add dev $DEV_NS type $TYPE seq $tun_key \
-		local 172.16.1.100 remote 172.16.1.200
-	ip netns exec at_ns0 ip link set dev $DEV_NS up
-	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
-	# root namespace
-	ip link add dev $DEV type $TYPE $tun_key external
-	ip link set dev $DEV up
-	ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6gretap_tunnel()
-{
-
-	# assign ipv6 address
-	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
-	ip netns exec at_ns0 ip link set dev veth0 up
-	ip addr add dev veth1 ::22/96
-	ip link set dev veth1 up
-
-	# at_ns0 namespace
-	ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE seq flowlabel 0xbcdef key 2 \
-		local ::11 remote ::22
-
-	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-	ip netns exec at_ns0 ip addr add dev $DEV_NS fc80::100/96
-	ip netns exec at_ns0 ip link set dev $DEV_NS up
-
-	# root namespace
-	ip link add dev $DEV type $TYPE external
-	ip addr add dev $DEV 10.1.1.200/24
-	ip addr add dev $DEV fc80::200/24
-	ip link set dev $DEV up
-}
-
-add_erspan_tunnel()
-{
-	# at_ns0 namespace
-	if [ "$1" == "v1" ]; then
-		ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE seq key 2 \
-		local 172.16.1.100 remote 172.16.1.200 \
-		erspan_ver 1 erspan 123
-	else
-		ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE seq key 2 \
-		local 172.16.1.100 remote 172.16.1.200 \
-		erspan_ver 2 erspan_dir egress erspan_hwid 3
-	fi
-	ip netns exec at_ns0 ip link set dev $DEV_NS up
-	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
-	# root namespace
-	ip link add dev $DEV type $TYPE external
-	ip link set dev $DEV up
-	ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6erspan_tunnel()
-{
-
-	# assign ipv6 address
-	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
-	ip netns exec at_ns0 ip link set dev veth0 up
-	ip addr add dev veth1 ::22/96
-	ip link set dev veth1 up
-
-	# at_ns0 namespace
-	if [ "$1" == "v1" ]; then
-		ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE seq key 2 \
-		local ::11 remote ::22 \
-		erspan_ver 1 erspan 123
-	else
-		ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE seq key 2 \
-		local ::11 remote ::22 \
-		erspan_ver 2 erspan_dir egress erspan_hwid 7
-	fi
-	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-	ip netns exec at_ns0 ip link set dev $DEV_NS up
-
-	# root namespace
-	ip link add dev $DEV type $TYPE external
-	ip addr add dev $DEV 10.1.1.200/24
-	ip link set dev $DEV up
-}
-
-add_geneve_tunnel()
-{
-	# at_ns0 namespace
-	ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE \
-		id 2 dstport 6081 remote 172.16.1.200
-	ip netns exec at_ns0 ip link set dev $DEV_NS up
-	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
-	# root namespace
-	ip link add dev $DEV type $TYPE dstport 6081 external
-	ip link set dev $DEV up
-	ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6geneve_tunnel()
-{
-	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
-	ip netns exec at_ns0 ip link set dev veth0 up
-	ip addr add dev veth1 ::22/96
-	ip link set dev veth1 up
-
-	# at_ns0 namespace
-	ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE id 22 \
-		remote ::22     # geneve has no local option
-	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-	ip netns exec at_ns0 ip link set dev $DEV_NS up
-
-	# root namespace
-	ip link add dev $DEV type $TYPE external
-	ip addr add dev $DEV 10.1.1.200/24
-	ip link set dev $DEV up
-}
-
-add_ipip_tunnel()
-{
-	# at_ns0 namespace
-	ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE \
-		local 172.16.1.100 remote 172.16.1.200
-	ip netns exec at_ns0 ip link set dev $DEV_NS up
-	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-
-	# root namespace
-	ip link add dev $DEV type $TYPE external
-	ip link set dev $DEV up
-	ip addr add dev $DEV 10.1.1.200/24
-}
-
-add_ip6tnl_tunnel()
-{
-	ip netns exec at_ns0 ip addr add ::11/96 dev veth0
-	ip netns exec at_ns0 ip link set dev veth0 up
-	ip addr add dev veth1 ::22/96
-	ip link set dev veth1 up
-
-	# at_ns0 namespace
-	ip netns exec at_ns0 \
-		ip link add dev $DEV_NS type $TYPE \
-		local ::11 remote ::22
-	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-	ip netns exec at_ns0 ip addr add dev $DEV_NS 1::11/96
-	ip netns exec at_ns0 ip link set dev $DEV_NS up
-
-	# root namespace
-	ip link add dev $DEV type $TYPE external
-	ip addr add dev $DEV 10.1.1.200/24
-	ip addr add dev $DEV 1::22/96
-	ip link set dev $DEV up
-}
-
-test_gre()
-{
-	TYPE=gretap
-	DEV_NS=gretap00
-	DEV=gretap11
-	ret=0
-
-	check $TYPE
-	config_device
-	add_gre_tunnel 2
-	attach_bpf $DEV gre_set_tunnel gre_get_tunnel
-	ping $PING_ARG 10.1.1.100
-	check_err $?
-	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
-	check_err $?
-	cleanup
-
-        if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: $TYPE"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_gre_no_tunnel_key()
-{
-	TYPE=gre
-	DEV_NS=gre00
-	DEV=gre11
-	ret=0
-
-	check $TYPE
-	config_device
-	add_gre_tunnel
-	attach_bpf $DEV gre_set_tunnel_no_key gre_get_tunnel
-	ping $PING_ARG 10.1.1.100
-	check_err $?
-	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
-	check_err $?
-	cleanup
-
-        if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: $TYPE"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6gre()
-{
-	TYPE=ip6gre
-	DEV_NS=ip6gre00
-	DEV=ip6gre11
-	ret=0
-
-	check $TYPE
-	config_device
-	# reuse the ip6gretap function
-	add_ip6gretap_tunnel
-	attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
-	# underlay
-	ping6 $PING_ARG ::11
-	# overlay: ipv4 over ipv6
-	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
-	ping $PING_ARG 10.1.1.100
-	check_err $?
-	# overlay: ipv6 over ipv6
-	ip netns exec at_ns0 ping6 $PING_ARG fc80::200
-	check_err $?
-	cleanup
-
-        if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: $TYPE"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6gretap()
-{
-	TYPE=ip6gretap
-	DEV_NS=ip6gretap00
-	DEV=ip6gretap11
-	ret=0
-
-	check $TYPE
-	config_device
-	add_ip6gretap_tunnel
-	attach_bpf $DEV ip6gretap_set_tunnel ip6gretap_get_tunnel
-	# underlay
-	ping6 $PING_ARG ::11
-	# overlay: ipv4 over ipv6
-	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
-	ping $PING_ARG 10.1.1.100
-	check_err $?
-	# overlay: ipv6 over ipv6
-	ip netns exec at_ns0 ping6 $PING_ARG fc80::200
-	check_err $?
-	cleanup
-
-	if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: $TYPE"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_erspan()
-{
-	TYPE=erspan
-	DEV_NS=erspan00
-	DEV=erspan11
-	ret=0
-
-	check $TYPE
-	config_device
-	add_erspan_tunnel $1
-	attach_bpf $DEV erspan_set_tunnel erspan_get_tunnel
-	ping $PING_ARG 10.1.1.100
-	check_err $?
-	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
-	check_err $?
-	cleanup
-
-	if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: $TYPE"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6erspan()
-{
-	TYPE=ip6erspan
-	DEV_NS=ip6erspan00
-	DEV=ip6erspan11
-	ret=0
-
-	check $TYPE
-	config_device
-	add_ip6erspan_tunnel $1
-	attach_bpf $DEV ip4ip6erspan_set_tunnel ip4ip6erspan_get_tunnel
-	ping6 $PING_ARG ::11
-	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
-	check_err $?
-	cleanup
-
-	if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: $TYPE"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_geneve()
-{
-	TYPE=geneve
-	DEV_NS=geneve00
-	DEV=geneve11
-	ret=0
-
-	check $TYPE
-	config_device
-	add_geneve_tunnel
-	attach_bpf $DEV geneve_set_tunnel geneve_get_tunnel
-	ping $PING_ARG 10.1.1.100
-	check_err $?
-	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
-	check_err $?
-	cleanup
-
-	if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: $TYPE"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6geneve()
-{
-	TYPE=geneve
-	DEV_NS=ip6geneve00
-	DEV=ip6geneve11
-	ret=0
-
-	check $TYPE
-	config_device
-	add_ip6geneve_tunnel
-	attach_bpf $DEV ip6geneve_set_tunnel ip6geneve_get_tunnel
-	ping $PING_ARG 10.1.1.100
-	check_err $?
-	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
-	check_err $?
-	cleanup
-
-	if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: ip6$TYPE"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
-}
-
-test_ipip()
-{
-	TYPE=ipip
-	DEV_NS=ipip00
-	DEV=ipip11
-	ret=0
-
-	check $TYPE
-	config_device
-	add_ipip_tunnel
-	ip link set dev veth1 mtu 1500
-	attach_bpf $DEV ipip_set_tunnel ipip_get_tunnel
-	ping $PING_ARG 10.1.1.100
-	check_err $?
-	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
-	check_err $?
-	cleanup
-
-	if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: $TYPE"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ipip6()
-{
-	TYPE=ip6tnl
-	DEV_NS=ipip6tnl00
-	DEV=ipip6tnl11
-	ret=0
-
-	check $TYPE
-	config_device
-	add_ip6tnl_tunnel
-	ip link set dev veth1 mtu 1500
-	attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
-	# underlay
-	ping6 $PING_ARG ::11
-	# ip4 over ip6
-	ping $PING_ARG 10.1.1.100
-	check_err $?
-	ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
-	check_err $?
-	cleanup
-
-	if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: $TYPE"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6ip6()
-{
-	TYPE=ip6tnl
-	DEV_NS=ip6ip6tnl00
-	DEV=ip6ip6tnl11
-	ret=0
-
-	check $TYPE
-	config_device
-	add_ip6tnl_tunnel
-	ip link set dev veth1 mtu 1500
-	attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel
-	# underlay
-	ping6 $PING_ARG ::11
-	# ip6 over ip6
-	ping6 $PING_ARG 1::11
-	check_err $?
-	ip netns exec at_ns0 ping6 $PING_ARG 1::22
-	check_err $?
-	cleanup
-
-	if [ $ret -ne 0 ]; then
-                echo -e ${RED}"FAIL: ip6$TYPE"${NC}
-                return 1
-        fi
-        echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
-}
-
-attach_bpf()
-{
-	DEV=$1
-	SET=$2
-	GET=$3
-	mkdir -p ${BPF_PIN_TUNNEL_DIR}
-	bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}/
-	tc qdisc add dev $DEV clsact
-	tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET
-	tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET
-}
-
-cleanup()
-{
-        rm -rf ${BPF_PIN_TUNNEL_DIR}
-
-	ip netns delete at_ns0 2> /dev/null
-	ip link del veth1 2> /dev/null
-	ip link del ipip11 2> /dev/null
-	ip link del ipip6tnl11 2> /dev/null
-	ip link del ip6ip6tnl11 2> /dev/null
-	ip link del gretap11 2> /dev/null
-	ip link del gre11 2> /dev/null
-	ip link del ip6gre11 2> /dev/null
-	ip link del ip6gretap11 2> /dev/null
-	ip link del geneve11 2> /dev/null
-	ip link del ip6geneve11 2> /dev/null
-	ip link del erspan11 2> /dev/null
-	ip link del ip6erspan11 2> /dev/null
-}
-
-cleanup_exit()
-{
-	echo "CATCH SIGKILL or SIGINT, cleanup and exit"
-	cleanup
-	exit 0
-}
-
-check()
-{
-	ip link help 2>&1 | grep -q "\s$1\s"
-	if [ $? -ne 0 ];then
-		echo "SKIP $1: iproute2 not support"
-	cleanup
-	return 1
-	fi
-}
-
-enable_debug()
-{
-	echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
-	echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
-	echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
-	echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
-}
-
-check_err()
-{
-	if [ $ret -eq 0 ]; then
-		ret=$1
-	fi
-}
-
-bpf_tunnel_test()
-{
-	local errors=0
-
-	echo "Testing GRE tunnel..."
-	test_gre
-	errors=$(( $errors + $? ))
-
-	echo "Testing GRE tunnel (without tunnel keys)..."
-	test_gre_no_tunnel_key
-	errors=$(( $errors + $? ))
-
-	echo "Testing IP6GRE tunnel..."
-	test_ip6gre
-	errors=$(( $errors + $? ))
-
-	echo "Testing IP6GRETAP tunnel..."
-	test_ip6gretap
-	errors=$(( $errors + $? ))
-
-	echo "Testing ERSPAN tunnel..."
-	test_erspan v2
-	errors=$(( $errors + $? ))
-
-	echo "Testing IP6ERSPAN tunnel..."
-	test_ip6erspan v2
-	errors=$(( $errors + $? ))
-
-	echo "Testing GENEVE tunnel..."
-	test_geneve
-	errors=$(( $errors + $? ))
-
-	echo "Testing IP6GENEVE tunnel..."
-	test_ip6geneve
-	errors=$(( $errors + $? ))
-
-	echo "Testing IPIP tunnel..."
-	test_ipip
-	errors=$(( $errors + $? ))
-
-	echo "Testing IPIP6 tunnel..."
-	test_ipip6
-	errors=$(( $errors + $? ))
-
-	echo "Testing IP6IP6 tunnel..."
-	test_ip6ip6
-	errors=$(( $errors + $? ))
-
-	return $errors
-}
-
-trap cleanup 0 3 6
-trap cleanup_exit 2 9
-
-cleanup
-bpf_tunnel_test
-
-if [ $? -ne 0 ]; then
-	echo -e "$(basename $0): ${RED}FAIL${NC}"
-	exit 1
-fi
-echo -e "$(basename $0): ${GREEN}PASS${NC}"
-exit 0
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 447b68509d76..27db34ecf3f5 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -734,7 +734,7 @@ static __u32 btf_raw_types[] = {
 	BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr *ptr; */
 };
 
-static char bpf_vlog[UINT_MAX >> 8];
+static char bpf_vlog[UINT_MAX >> 5];
 
 static int load_btf_spec(__u32 *types, int types_len,
 			 const char *strings, int strings_len)
@@ -1559,10 +1559,10 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		       test->errstr_unpriv : test->errstr;
 
 	opts.expected_attach_type = test->expected_attach_type;
-	if (verbose)
-		opts.log_level = verif_log_level | 4; /* force stats */
-	else if (expected_ret == VERBOSE_ACCEPT)
+	if (expected_ret == VERBOSE_ACCEPT)
 		opts.log_level = 2;
+	else if (verbose)
+		opts.log_level = verif_log_level | 4; /* force stats */
 	else
 		opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL;
 	opts.prog_flags = pflags;
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
deleted file mode 100755
index 2740322c1878..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_meta.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/sh
-
-BPF_FILE="test_xdp_meta.bpf.o"
-# Kselftest framework requirement - SKIP code is 4.
-readonly KSFT_SKIP=4
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-
-cleanup()
-{
-	if [ "$?" = "0" ]; then
-		echo "selftests: test_xdp_meta [PASS]";
-	else
-		echo "selftests: test_xdp_meta [FAILED]";
-	fi
-
-	set +e
-	ip link del veth1 2> /dev/null
-	ip netns del ${NS1} 2> /dev/null
-	ip netns del ${NS2} 2> /dev/null
-}
-
-ip link set dev lo xdp off 2>/dev/null > /dev/null
-if [ $? -ne 0 ];then
-	echo "selftests: [SKIP] Could not run test without the ip xdp support"
-	exit $KSFT_SKIP
-fi
-set -e
-
-ip netns add ${NS1}
-ip netns add ${NS2}
-
-trap cleanup 0 2 3 6 9
-
-ip link add veth1 type veth peer name veth2
-
-ip link set veth1 netns ${NS1}
-ip link set veth2 netns ${NS2}
-
-ip netns exec ${NS1} ip addr add 10.1.1.11/24 dev veth1
-ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2
-
-ip netns exec ${NS1} tc qdisc add dev veth1 clsact
-ip netns exec ${NS2} tc qdisc add dev veth2 clsact
-
-ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec t
-ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec t
-
-ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec x
-ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec x
-
-ip netns exec ${NS1} ip link set dev veth1 up
-ip netns exec ${NS2} ip link set dev veth2 up
-
-ip netns exec ${NS1} ping -c 1 10.1.1.22
-ip netns exec ${NS2} ping -c 1 10.1.1.11
-
-exit 0
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
deleted file mode 100755
index 0746a4fde9d3..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
+++ /dev/null
@@ -1,79 +0,0 @@
-#!/bin/bash
-# Create 2 namespaces with two veth peers, and
-# forward packets in-between using generic XDP
-#
-# NS1(veth11)     NS2(veth22)
-#     |               |
-#     |               |
-#   (veth1, ------ (veth2,
-#   id:111)         id:222)
-#     | xdp forwarding |
-#     ------------------
-
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-ret=0
-
-setup()
-{
-
-	local xdpmode=$1
-
-	ip netns add ${NS1}
-	ip netns add ${NS2}
-
-	ip link add veth1 index 111 type veth peer name veth11 netns ${NS1}
-	ip link add veth2 index 222 type veth peer name veth22 netns ${NS2}
-
-	ip link set veth1 up
-	ip link set veth2 up
-	ip -n ${NS1} link set dev veth11 up
-	ip -n ${NS2} link set dev veth22 up
-
-	ip -n ${NS1} addr add 10.1.1.11/24 dev veth11
-	ip -n ${NS2} addr add 10.1.1.22/24 dev veth22
-}
-
-cleanup()
-{
-	ip link del veth1 2> /dev/null
-	ip link del veth2 2> /dev/null
-	ip netns del ${NS1} 2> /dev/null
-	ip netns del ${NS2} 2> /dev/null
-}
-
-test_xdp_redirect()
-{
-	local xdpmode=$1
-
-	setup
-
-	ip link set dev veth1 $xdpmode off &> /dev/null
-	if [ $? -ne 0 ];then
-		echo "selftests: test_xdp_redirect $xdpmode [SKIP]"
-		return 0
-	fi
-
-	ip -n ${NS1} link set veth11 $xdpmode obj xdp_dummy.bpf.o sec xdp &> /dev/null
-	ip -n ${NS2} link set veth22 $xdpmode obj xdp_dummy.bpf.o sec xdp &> /dev/null
-	ip link set dev veth1 $xdpmode obj test_xdp_redirect.bpf.o sec redirect_to_222 &> /dev/null
-	ip link set dev veth2 $xdpmode obj test_xdp_redirect.bpf.o sec redirect_to_111 &> /dev/null
-
-	if ip netns exec ${NS1} ping -c 1 10.1.1.22 &> /dev/null &&
-	   ip netns exec ${NS2} ping -c 1 10.1.1.11 &> /dev/null; then
-		echo "selftests: test_xdp_redirect $xdpmode [PASS]";
-	else
-		ret=1
-		echo "selftests: test_xdp_redirect $xdpmode [FAILED]";
-	fi
-
-	cleanup
-}
-
-set -e
-trap cleanup 2 3 6 9
-
-test_xdp_redirect xdpgeneric
-test_xdp_redirect xdpdrv
-
-exit $ret
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
deleted file mode 100755
index 4c3c3fdd2d73..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
+++ /dev/null
@@ -1,214 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Test topology:
-#    - - - - - - - - - - - - - - - - - - -
-#    | veth1         veth2         veth3 |  ns0
-#     - -| - - - - - - | - - - - - - | - -
-#    ---------     ---------     ---------
-#    | veth0 |     | veth0 |     | veth0 |
-#    ---------     ---------     ---------
-#       ns1           ns2           ns3
-#
-# Test modules:
-# XDP modes: generic, native, native + egress_prog
-#
-# Test cases:
-#   ARP: Testing BPF_F_BROADCAST, the ingress interface also should receive
-#   the redirects.
-#      ns1 -> gw: ns1, ns2, ns3, should receive the arp request
-#   IPv4: Testing BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, the ingress
-#   interface should not receive the redirects.
-#      ns1 -> gw: ns1 should not receive, ns2, ns3 should receive redirects.
-#   IPv6: Testing none flag, all the pkts should be redirected back
-#      ping test: ns1 -> ns2 (block), echo requests will be redirect back
-#   egress_prog:
-#      all src mac should be egress interface's mac
-
-# netns numbers
-NUM=3
-IFACES=""
-DRV_MODE="xdpgeneric xdpdrv xdpegress"
-PASS=0
-FAIL=0
-LOG_DIR=$(mktemp -d)
-declare -a NS
-NS[0]="ns0-$(mktemp -u XXXXXX)"
-NS[1]="ns1-$(mktemp -u XXXXXX)"
-NS[2]="ns2-$(mktemp -u XXXXXX)"
-NS[3]="ns3-$(mktemp -u XXXXXX)"
-
-test_pass()
-{
-	echo "Pass: $@"
-	PASS=$((PASS + 1))
-}
-
-test_fail()
-{
-	echo "fail: $@"
-	FAIL=$((FAIL + 1))
-}
-
-clean_up()
-{
-	for i in $(seq 0 $NUM); do
-		ip netns del ${NS[$i]} 2> /dev/null
-	done
-}
-
-# Kselftest framework requirement - SKIP code is 4.
-check_env()
-{
-	ip link set dev lo xdpgeneric off &>/dev/null
-	if [ $? -ne 0 ];then
-		echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
-		exit 4
-	fi
-
-	which tcpdump &>/dev/null
-	if [ $? -ne 0 ];then
-		echo "selftests: [SKIP] Could not run test without tcpdump"
-		exit 4
-	fi
-}
-
-setup_ns()
-{
-	local mode=$1
-	IFACES=""
-
-	if [ "$mode" = "xdpegress" ]; then
-		mode="xdpdrv"
-	fi
-
-	ip netns add ${NS[0]}
-	for i in $(seq $NUM); do
-	        ip netns add ${NS[$i]}
-		ip -n ${NS[$i]} link add veth0 type veth peer name veth$i netns ${NS[0]}
-		ip -n ${NS[$i]} link set veth0 up
-		ip -n ${NS[0]} link set veth$i up
-
-		ip -n ${NS[$i]} addr add 192.0.2.$i/24 dev veth0
-		ip -n ${NS[$i]} addr add 2001:db8::$i/64 dev veth0
-		# Add a neigh entry for IPv4 ping test
-		ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
-		ip -n ${NS[$i]} link set veth0 $mode obj \
-			xdp_dummy.bpf.o sec xdp &> /dev/null || \
-			{ test_fail "Unable to load dummy xdp" && exit 1; }
-		IFACES="$IFACES veth$i"
-		veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}')
-	done
-}
-
-do_egress_tests()
-{
-	local mode=$1
-
-	# mac test
-	ip netns exec ${NS[2]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log &
-	ip netns exec ${NS[3]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log &
-	sleep 0.5
-	ip netns exec ${NS[1]} ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
-	sleep 0.5
-	pkill tcpdump
-
-	# mac check
-	grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-2_${mode}.log && \
-	       test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2"
-	grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-3_${mode}.log && \
-		test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3"
-}
-
-do_ping_tests()
-{
-	local mode=$1
-
-	# ping6 test: echo request should be redirect back to itself, not others
-	ip netns exec ${NS[1]} ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
-
-	ip netns exec ${NS[1]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log &
-	ip netns exec ${NS[2]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log &
-	ip netns exec ${NS[3]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log &
-	sleep 0.5
-	# ARP test
-	ip netns exec ${NS[1]} arping -q -c 2 -I veth0 192.0.2.254
-	# IPv4 test
-	ip netns exec ${NS[1]} ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
-	# IPv6 test
-	ip netns exec ${NS[1]} ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
-	sleep 0.5
-	pkill tcpdump
-
-	# All netns should receive the redirect arp requests
-	[ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
-		test_pass "$mode arp(F_BROADCAST) ns1-1" || \
-		test_fail "$mode arp(F_BROADCAST) ns1-1"
-	[ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-2_${mode}.log) -eq 2 ] && \
-		test_pass "$mode arp(F_BROADCAST) ns1-2" || \
-		test_fail "$mode arp(F_BROADCAST) ns1-2"
-	[ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-3_${mode}.log) -eq 2 ] && \
-		test_pass "$mode arp(F_BROADCAST) ns1-3" || \
-		test_fail "$mode arp(F_BROADCAST) ns1-3"
-
-	# ns1 should not receive the redirect echo request, others should
-	[ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
-		test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \
-		test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1"
-	[ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 4 ] && \
-		test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \
-		test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2"
-	[ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-3_${mode}.log) -eq 4 ] && \
-		test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \
-		test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3"
-
-	# ns1 should receive the echo request, ns2 should not
-	[ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
-		test_pass "$mode IPv6 (no flags) ns1-1" || \
-		test_fail "$mode IPv6 (no flags) ns1-1"
-	[ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 0 ] && \
-		test_pass "$mode IPv6 (no flags) ns1-2" || \
-		test_fail "$mode IPv6 (no flags) ns1-2"
-}
-
-do_tests()
-{
-	local mode=$1
-	local drv_p
-
-	case ${mode} in
-		xdpdrv)  drv_p="-N";;
-		xdpegress) drv_p="-X";;
-		xdpgeneric) drv_p="-S";;
-	esac
-
-	ip netns exec ${NS[0]} ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log &
-	xdp_pid=$!
-	sleep 1
-	if ! ps -p $xdp_pid > /dev/null; then
-		test_fail "$mode xdp_redirect_multi start failed"
-		return 1
-	fi
-
-	if [ "$mode" = "xdpegress" ]; then
-		do_egress_tests $mode
-	else
-		do_ping_tests $mode
-	fi
-
-	kill $xdp_pid
-}
-
-check_env
-
-trap clean_up EXIT
-
-for mode in ${DRV_MODE}; do
-	setup_ns $mode
-	do_tests $mode
-	clean_up
-done
-rm -rf ${LOG_DIR}
-
-echo "Summary: PASS $PASS, FAIL $FAIL"
-[ $FAIL -eq 0 ] && exit 0 || exit 1
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh
deleted file mode 100755
index fbcaa9f0120b..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_vlan.sh
+++ /dev/null
@@ -1,233 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-# Author: Jesper Dangaard Brouer <hawk@kernel.org>
-
-# Kselftest framework requirement - SKIP code is 4.
-readonly KSFT_SKIP=4
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-
-# Allow wrapper scripts to name test
-if [ -z "$TESTNAME" ]; then
-    TESTNAME=xdp_vlan
-fi
-
-# Default XDP mode
-XDP_MODE=xdpgeneric
-
-usage() {
-  echo "Testing XDP + TC eBPF VLAN manipulations: $TESTNAME"
-  echo ""
-  echo "Usage: $0 [-vfh]"
-  echo "  -v | --verbose : Verbose"
-  echo "  --flush        : Flush before starting (e.g. after --interactive)"
-  echo "  --interactive  : Keep netns setup running after test-run"
-  echo "  --mode=XXX     : Choose XDP mode (xdp | xdpgeneric | xdpdrv)"
-  echo ""
-}
-
-valid_xdp_mode()
-{
-	local mode=$1
-
-	case "$mode" in
-		xdpgeneric | xdpdrv | xdp)
-			return 0
-			;;
-		*)
-			return 1
-	esac
-}
-
-cleanup()
-{
-	local status=$?
-
-	if [ "$status" = "0" ]; then
-		echo "selftests: $TESTNAME [PASS]";
-	else
-		echo "selftests: $TESTNAME [FAILED]";
-	fi
-
-	if [ -n "$INTERACTIVE" ]; then
-		echo "Namespace setup still active explore with:"
-		echo " ip netns exec ${NS1} bash"
-		echo " ip netns exec ${NS2} bash"
-		exit $status
-	fi
-
-	set +e
-	ip link del veth1 2> /dev/null
-	ip netns del ${NS1} 2> /dev/null
-	ip netns del ${NS2} 2> /dev/null
-}
-
-# Using external program "getopt" to get --long-options
-OPTIONS=$(getopt -o hvfi: \
-    --long verbose,flush,help,interactive,debug,mode: -- "$@")
-if (( $? != 0 )); then
-    usage
-    echo "selftests: $TESTNAME [FAILED] Error calling getopt, unknown option?"
-    exit 2
-fi
-eval set -- "$OPTIONS"
-
-##  --- Parse command line arguments / parameters ---
-while true; do
-	case "$1" in
-	    -v | --verbose)
-		export VERBOSE=yes
-		shift
-		;;
-	    -i | --interactive | --debug )
-		INTERACTIVE=yes
-		shift
-		;;
-	    -f | --flush )
-		cleanup
-		shift
-		;;
-	    --mode )
-		shift
-		XDP_MODE=$1
-		shift
-		;;
-	    -- )
-		shift
-		break
-		;;
-	    -h | --help )
-		usage;
-		echo "selftests: $TESTNAME [SKIP] usage help info requested"
-		exit $KSFT_SKIP
-		;;
-	    * )
-		shift
-		break
-		;;
-	esac
-done
-
-if [ "$EUID" -ne 0 ]; then
-	echo "selftests: $TESTNAME [FAILED] need root privileges"
-	exit 1
-fi
-
-valid_xdp_mode $XDP_MODE
-if [ $? -ne 0 ]; then
-	echo "selftests: $TESTNAME [FAILED] unknown XDP mode ($XDP_MODE)"
-	exit 1
-fi
-
-ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
-if [ $? -ne 0 ]; then
-	echo "selftests: $TESTNAME [SKIP] need ip xdp support"
-	exit $KSFT_SKIP
-fi
-
-# Interactive mode likely require us to cleanup netns
-if [ -n "$INTERACTIVE" ]; then
-	ip link del veth1 2> /dev/null
-	ip netns del ${NS1} 2> /dev/null
-	ip netns del ${NS2} 2> /dev/null
-fi
-
-# Exit on failure
-set -e
-
-# Some shell-tools dependencies
-which ip > /dev/null
-which tc > /dev/null
-which ethtool > /dev/null
-
-# Make rest of shell verbose, showing comments as doc/info
-if [ -n "$VERBOSE" ]; then
-    set -v
-fi
-
-# Create two namespaces
-ip netns add ${NS1}
-ip netns add ${NS2}
-
-# Run cleanup if failing or on kill
-trap cleanup 0 2 3 6 9
-
-# Create veth pair
-ip link add veth1 type veth peer name veth2
-
-# Move veth1 and veth2 into the respective namespaces
-ip link set veth1 netns ${NS1}
-ip link set veth2 netns ${NS2}
-
-# NOTICE: XDP require VLAN header inside packet payload
-#  - Thus, disable VLAN offloading driver features
-#  - For veth REMEMBER TX side VLAN-offload
-#
-# Disable rx-vlan-offload (mostly needed on ns1)
-ip netns exec ${NS1} ethtool -K veth1 rxvlan off
-ip netns exec ${NS2} ethtool -K veth2 rxvlan off
-#
-# Disable tx-vlan-offload (mostly needed on ns2)
-ip netns exec ${NS2} ethtool -K veth2 txvlan off
-ip netns exec ${NS1} ethtool -K veth1 txvlan off
-
-export IPADDR1=100.64.41.1
-export IPADDR2=100.64.41.2
-
-# In ns1/veth1 add IP-addr on plain net_device
-ip netns exec ${NS1} ip addr add ${IPADDR1}/24 dev veth1
-ip netns exec ${NS1} ip link set veth1 up
-
-# In ns2/veth2 create VLAN device
-export VLAN=4011
-export DEVNS2=veth2
-ip netns exec ${NS2} ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
-ip netns exec ${NS2} ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
-ip netns exec ${NS2} ip link set $DEVNS2 up
-ip netns exec ${NS2} ip link set $DEVNS2.$VLAN up
-
-# Bringup lo in netns (to avoids confusing people using --interactive)
-ip netns exec ${NS1} ip link set lo up
-ip netns exec ${NS2} ip link set lo up
-
-# At this point, the hosts cannot reach each-other,
-# because ns2 are using VLAN tags on the packets.
-
-ip netns exec ${NS2} sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"'
-
-
-# Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags
-# ----------------------------------------------------------------------
-# In ns1: ingress use XDP to remove VLAN tags
-export DEVNS1=veth1
-export BPF_FILE=test_xdp_vlan.bpf.o
-
-# First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
-export XDP_PROG=xdp_vlan_change
-ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG
-
-# In ns1: egress use TC to add back VLAN tag 4011
-#  (del cmd)
-#  tc qdisc del dev $DEVNS1 clsact 2> /dev/null
-#
-ip netns exec ${NS1} tc qdisc add dev $DEVNS1 clsact
-ip netns exec ${NS1} tc filter add dev $DEVNS1 egress \
-  prio 1 handle 1 bpf da obj $BPF_FILE sec tc_vlan_push
-
-# Now the namespaces can reach each-other, test with ping:
-ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
-
-# Second test: Replace xdp prog, that fully remove vlan header
-#
-# Catch kernel bug for generic-XDP, that does didn't allow us to
-# remove a VLAN header, because skb->protocol still contain VLAN
-# ETH_P_8021Q indication, and this cause overwriting of our changes.
-#
-export XDP_PROG=xdp_vlan_remove_outer2
-ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE off
-ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG
-
-# Now the namespaces should still be able reach each-other, test with ping:
-ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh b/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh
deleted file mode 100755
index c515326d6d59..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_vlan_mode_generic.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# Exit on failure
-set -e
-
-# Wrapper script to test generic-XDP
-export TESTNAME=xdp_vlan_mode_generic
-./test_xdp_vlan.sh --mode=xdpgeneric
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh b/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh
deleted file mode 100755
index 5cf7ce1f16c1..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_vlan_mode_native.sh
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-# Exit on failure
-set -e
-
-# Wrapper script to test native-XDP
-export TESTNAME=xdp_vlan_mode_native
-./test_xdp_vlan.sh --mode=xdpdrv
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 65aafe0003db..62db060298a4 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -241,4 +241,6 @@ done
 
 if [ $failures -eq 0 ]; then
         echo "All tests successful!"
+else
+	exit 1
 fi
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 5e9f16683be5..16eb37e5bad6 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -399,7 +399,7 @@ int unload_module(const char *name, bool verbose)
 	return 0;
 }
 
-int load_module(const char *path, bool verbose)
+static int __load_module(const char *path, const char *param_values, bool verbose)
 {
 	int fd;
 
@@ -411,7 +411,7 @@ int load_module(const char *path, bool verbose)
 		fprintf(stdout, "Can't find %s kernel module: %d\n", path, -errno);
 		return -ENOENT;
 	}
-	if (finit_module(fd, "", 0)) {
+	if (finit_module(fd, param_values, 0)) {
 		fprintf(stdout, "Failed to load %s into the kernel: %d\n", path, -errno);
 		close(fd);
 		return -EINVAL;
@@ -423,6 +423,16 @@ int load_module(const char *path, bool verbose)
 	return 0;
 }
 
+int load_module_params(const char *path, const char *param_values, bool verbose)
+{
+	return __load_module(path, param_values, verbose);
+}
+
+int load_module(const char *path, bool verbose)
+{
+	return __load_module(path, "", verbose);
+}
+
 int unload_bpf_testmod(bool verbose)
 {
 	return unload_module("bpf_testmod", verbose);
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index 46d7f7089f63..eb20d3772218 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -39,6 +39,7 @@ int kern_sync_rcu(void);
 int finit_module(int fd, const char *param_values, int flags);
 int delete_module(const char *name, int flags);
 int load_module(const char *path, bool verbose);
+int load_module_params(const char *path, const char *param_values, bool verbose);
 int unload_module(const char *name, bool verbose);
 
 static inline __u64 get_time_ns(void)
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 2d742fdac6b9..eeaab7013ca2 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -17,7 +17,9 @@
 #include <linux/limits.h>
 #include <libelf.h>
 #include <gelf.h>
+#include "bpf/hashmap.h"
 #include "bpf/libbpf_internal.h"
+#include "bpf_util.h"
 
 #define TRACEFS_PIPE	"/sys/kernel/tracing/trace_pipe"
 #define DEBUGFS_PIPE	"/sys/kernel/debug/tracing/trace_pipe"
@@ -293,6 +295,10 @@ static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *st
 	return 0;
 }
 #else
+# ifndef PROCMAP_QUERY_VMA_EXECUTABLE
+#  define PROCMAP_QUERY_VMA_EXECUTABLE 0x04
+# endif
+
 static int procmap_query(int fd, const void *addr, __u32 query_flags, size_t *start, size_t *offset, int *flags)
 {
 	return -EOPNOTSUPP;
@@ -515,3 +521,235 @@ void read_trace_pipe(void)
 {
 	read_trace_pipe_iter(trace_pipe_cb, NULL, 0);
 }
+
+static size_t symbol_hash(long key, void *ctx __maybe_unused)
+{
+	return str_hash((const char *) key);
+}
+
+static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+	return strcmp((const char *) key1, (const char *) key2) == 0;
+}
+
+static bool is_invalid_entry(char *buf, bool kernel)
+{
+	if (kernel && strchr(buf, '['))
+		return true;
+	if (!kernel && !strchr(buf, '['))
+		return true;
+	return false;
+}
+
+static const char * const trace_blacklist[] = {
+	"migrate_disable",
+	"migrate_enable",
+	"rcu_read_unlock_strict",
+	"preempt_count_add",
+	"preempt_count_sub",
+	"__rcu_read_lock",
+	"__rcu_read_unlock",
+	"bpf_get_numa_node_id",
+};
+
+static bool skip_entry(char *name)
+{
+	int i;
+
+	/*
+	 * We attach to almost all kernel functions and some of them
+	 * will cause 'suspicious RCU usage' when fprobe is attached
+	 * to them. Filter out the current culprits - arch_cpu_idle
+	 * default_idle and rcu_* functions.
+	 */
+	if (!strcmp(name, "arch_cpu_idle"))
+		return true;
+	if (!strcmp(name, "default_idle"))
+		return true;
+	if (!strncmp(name, "rcu_", 4))
+		return true;
+	if (!strcmp(name, "bpf_dispatcher_xdp_func"))
+		return true;
+	if (!strncmp(name, "__ftrace_invalid_address__",
+		     sizeof("__ftrace_invalid_address__") - 1))
+		return true;
+
+	for (i = 0; i < ARRAY_SIZE(trace_blacklist); i++) {
+		if (!strcmp(name, trace_blacklist[i]))
+			return true;
+	}
+
+	return false;
+}
+
+/* Do comparison by ignoring '.llvm.<hash>' suffixes. */
+static int compare_name(const char *name1, const char *name2)
+{
+	const char *res1, *res2;
+	int len1, len2;
+
+	res1 = strstr(name1, ".llvm.");
+	res2 = strstr(name2, ".llvm.");
+	len1 = res1 ? res1 - name1 : strlen(name1);
+	len2 = res2 ? res2 - name2 : strlen(name2);
+
+	if (len1 == len2)
+		return strncmp(name1, name2, len1);
+	if (len1 < len2)
+		return strncmp(name1, name2, len1) <= 0 ? -1 : 1;
+	return strncmp(name1, name2, len2) >= 0 ? 1 : -1;
+}
+
+static int load_kallsyms_compare(const void *p1, const void *p2)
+{
+	return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name);
+}
+
+static int search_kallsyms_compare(const void *p1, const struct ksym *p2)
+{
+	return compare_name(p1, p2->name);
+}
+
+int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel)
+{
+	size_t cap = 0, cnt = 0;
+	char *name = NULL, *ksym_name, **syms = NULL;
+	struct hashmap *map;
+	struct ksyms *ksyms;
+	struct ksym *ks;
+	char buf[256];
+	FILE *f;
+	int err = 0;
+
+	ksyms = load_kallsyms_custom_local(load_kallsyms_compare);
+	if (!ksyms)
+		return -EINVAL;
+
+	/*
+	 * The available_filter_functions contains many duplicates,
+	 * but other than that all symbols are usable to trace.
+	 * Filtering out duplicates by using hashmap__add, which won't
+	 * add existing entry.
+	 */
+
+	if (access("/sys/kernel/tracing/trace", F_OK) == 0)
+		f = fopen("/sys/kernel/tracing/available_filter_functions", "r");
+	else
+		f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
+
+	if (!f)
+		return -EINVAL;
+
+	map = hashmap__new(symbol_hash, symbol_equal, NULL);
+	if (IS_ERR(map)) {
+		err = libbpf_get_error(map);
+		goto error;
+	}
+
+	while (fgets(buf, sizeof(buf), f)) {
+		if (is_invalid_entry(buf, kernel))
+			continue;
+
+		free(name);
+		if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
+			continue;
+		if (skip_entry(name))
+			continue;
+
+		ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare);
+		if (!ks) {
+			err = -EINVAL;
+			goto error;
+		}
+
+		ksym_name = ks->name;
+		err = hashmap__add(map, ksym_name, 0);
+		if (err == -EEXIST) {
+			err = 0;
+			continue;
+		}
+		if (err)
+			goto error;
+
+		err = libbpf_ensure_mem((void **) &syms, &cap,
+					sizeof(*syms), cnt + 1);
+		if (err)
+			goto error;
+
+		syms[cnt++] = ksym_name;
+	}
+
+	*symsp = syms;
+	*cntp = cnt;
+
+error:
+	free(name);
+	fclose(f);
+	hashmap__free(map);
+	if (err)
+		free(syms);
+	return err;
+}
+
+int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel)
+{
+	unsigned long *addr, *addrs, *tmp_addrs;
+	int err = 0, max_cnt, inc_cnt;
+	char *name = NULL;
+	size_t cnt = 0;
+	char buf[256];
+	FILE *f;
+
+	if (access("/sys/kernel/tracing/trace", F_OK) == 0)
+		f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r");
+	else
+		f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r");
+
+	if (!f)
+		return -ENOENT;
+
+	/* In my local setup, the number of entries is 50k+ so Let us initially
+	 * allocate space to hold 64k entries. If 64k is not enough, incrementally
+	 * increase 1k each time.
+	 */
+	max_cnt = 65536;
+	inc_cnt = 1024;
+	addrs = malloc(max_cnt * sizeof(long));
+	if (addrs == NULL) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	while (fgets(buf, sizeof(buf), f)) {
+		if (is_invalid_entry(buf, kernel))
+			continue;
+
+		free(name);
+		if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2)
+			continue;
+		if (skip_entry(name))
+			continue;
+
+		if (cnt == max_cnt) {
+			max_cnt += inc_cnt;
+			tmp_addrs = realloc(addrs, max_cnt * sizeof(long));
+			if (!tmp_addrs) {
+				err = -ENOMEM;
+				goto error;
+			}
+			addrs = tmp_addrs;
+		}
+
+		addrs[cnt++] = (unsigned long)addr;
+	}
+
+	*addrsp = addrs;
+	*cntp = cnt;
+
+error:
+	free(name);
+	fclose(f);
+	if (err)
+		free(addrs);
+	return err;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 2ce873c9f9aa..9437bdd4afa5 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -41,4 +41,7 @@ ssize_t get_rel_offset(uintptr_t addr);
 
 int read_build_id(const char *path, char *build_id, size_t size);
 
+int bpf_get_ksyms(char ***symsp, size_t *cntp, bool kernel);
+int bpf_get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel);
+
 #endif
diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c
index 220f6a963813..f997d7ec8fd0 100644
--- a/tools/testing/selftests/bpf/unpriv_helpers.c
+++ b/tools/testing/selftests/bpf/unpriv_helpers.c
@@ -1,15 +1,76 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
+#include <errno.h>
+#include <limits.h>
 #include <stdbool.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+#include <sys/utsname.h>
 #include <unistd.h>
 #include <fcntl.h>
+#include <zlib.h>
 
 #include "unpriv_helpers.h"
 
-static bool get_mitigations_off(void)
+static gzFile open_config(void)
+{
+	struct utsname uts;
+	char buf[PATH_MAX];
+	gzFile config;
+
+	if (uname(&uts)) {
+		perror("uname");
+		goto config_gz;
+	}
+
+	snprintf(buf, sizeof(buf), "/boot/config-%s", uts.release);
+	config = gzopen(buf, "rb");
+	if (config)
+		return config;
+	fprintf(stderr, "gzopen %s: %s\n", buf, strerror(errno));
+
+config_gz:
+	config = gzopen("/proc/config.gz", "rb");
+	if (!config)
+		perror("gzopen /proc/config.gz");
+	return config;
+}
+
+static int config_contains(const char *pat)
+{
+	const char *msg;
+	char buf[1024];
+	gzFile config;
+	int n, err;
+
+	config = open_config();
+	if (!config)
+		return -1;
+
+	for (;;) {
+		if (!gzgets(config, buf, sizeof(buf))) {
+			msg = gzerror(config, &err);
+			if (err == Z_ERRNO)
+				perror("gzgets /proc/config.gz");
+			else if (err != Z_OK)
+				fprintf(stderr, "gzgets /proc/config.gz: %s", msg);
+			gzclose(config);
+			return -1;
+		}
+		n = strlen(buf);
+		if (buf[n - 1] == '\n')
+			buf[n - 1] = 0;
+		if (strcmp(buf, pat) == 0) {
+			gzclose(config);
+			return 1;
+		}
+	}
+	gzclose(config);
+	return 0;
+}
+
+static bool cmdline_contains(const char *pat)
 {
 	char cmdline[4096], *c;
 	int fd, ret = false;
@@ -27,7 +88,7 @@ static bool get_mitigations_off(void)
 
 	cmdline[sizeof(cmdline) - 1] = '\0';
 	for (c = strtok(cmdline, " \n"); c; c = strtok(NULL, " \n")) {
-		if (strncmp(c, "mitigations=off", strlen(c)))
+		if (strncmp(c, pat, strlen(c)))
 			continue;
 		ret = true;
 		break;
@@ -37,8 +98,21 @@ out:
 	return ret;
 }
 
+static int get_mitigations_off(void)
+{
+	int enabled_in_config;
+
+	if (cmdline_contains("mitigations=off"))
+		return 1;
+	enabled_in_config = config_contains("CONFIG_CPU_MITIGATIONS=y");
+	if (enabled_in_config < 0)
+		return -1;
+	return !enabled_in_config;
+}
+
 bool get_unpriv_disabled(void)
 {
+	int mitigations_off;
 	bool disabled;
 	char buf[2];
 	FILE *fd;
@@ -52,5 +126,19 @@ bool get_unpriv_disabled(void)
 		disabled = true;
 	}
 
-	return disabled ? true : get_mitigations_off();
+	if (disabled)
+		return true;
+
+	/*
+	 * Some unpriv tests rely on spectre mitigations being on.
+	 * If mitigations are off or status can't be determined
+	 * assume that unpriv tests are disabled.
+	 */
+	mitigations_off = get_mitigations_off();
+	if (mitigations_off < 0) {
+		fprintf(stderr,
+			"Can't determine if mitigations are enabled, disabling unpriv tests.");
+		return true;
+	}
+	return mitigations_off;
 }
diff --git a/tools/testing/selftests/bpf/usdt.h b/tools/testing/selftests/bpf/usdt.h
new file mode 100644
index 000000000000..549d1f774810
--- /dev/null
+++ b/tools/testing/selftests/bpf/usdt.h
@@ -0,0 +1,545 @@
+// SPDX-License-Identifier: BSD-2-Clause
+/*
+ *  This single-header library defines a collection of variadic macros for
+ *  defining and triggering USDTs (User Statically-Defined Tracepoints):
+ *
+ *      - For USDTs without associated semaphore:
+ *          USDT(group, name, args...)
+ *
+ *      - For USDTs with implicit (transparent to the user) semaphore:
+ *          USDT_WITH_SEMA(group, name, args...)
+ *          USDT_IS_ACTIVE(group, name)
+ *
+ *      - For USDTs with explicit (user-defined and provided) semaphore:
+ *          USDT_WITH_EXPLICIT_SEMA(sema, group, name, args...)
+ *          USDT_SEMA_IS_ACTIVE(sema)
+ *
+ *  all of which emit a NOP instruction into the instruction stream, and so
+ *  have *zero* overhead for the surrounding code. USDTs are identified by
+ *  a combination of `group` and `name` identifiers, which is used by external
+ *  tracing tooling (tracers) for identifying exact USDTs of interest.
+ *
+ *  USDTs can have an associated (2-byte) activity counter (USDT semaphore),
+ *  automatically maintained by Linux kernel whenever any correctly written
+ *  BPF-based tracer is attached to the USDT. This USDT semaphore can be used
+ *  to check whether there is a need to do any extra data collection and
+ *  processing for a given USDT (if necessary), and otherwise avoid extra work
+ *  for a common case of USDT not being traced ("active").
+ *
+ *  See documentation for USDT_WITH_SEMA()/USDT_IS_ACTIVE() or
+ *  USDT_WITH_EXPLICIT_SEMA()/USDT_SEMA_IS_ACTIVE() APIs below for details on
+ *  working with USDTs with implicitly or explicitly associated
+ *  USDT semaphores, respectively.
+ *
+ *  There is also some additional data recorded into an auxiliary note
+ *  section. The data in the note section describes the operands, in terms of
+ *  size and location, used by tracing tooling to know where to find USDT
+ *  arguments. Each location is encoded as an assembler operand string.
+ *  Tracing tools (bpftrace and BPF-based tracers, systemtap, etc) insert
+ *  breakpoints on top of the nop, and decode the location operand-strings,
+ *  like an assembler, to find the values being passed.
+ *
+ *  The operand strings are selected by the compiler for each operand.
+ *  They are constrained by inline-assembler codes.The default is:
+ *
+ *  #define USDT_ARG_CONSTRAINT nor
+ *
+ *  This is a good default if the operands tend to be integral and
+ *  moderate in number (smaller than number of registers). In other
+ *  cases, the compiler may report "'asm' requires impossible reload" or
+ *  similar. In this case, consider simplifying the macro call (fewer
+ *  and simpler operands), reduce optimization, or override the default
+ *  constraints string via:
+ *
+ *  #define USDT_ARG_CONSTRAINT g
+ *  #include <usdt.h>
+ *
+ * For some historical description of USDT v3 format (the one used by this
+ * library and generally recognized and assumed by BPF-based tracing tools)
+ * see [0]. The more formal specification can be found at [1]. Additional
+ * argument constraints information can be found at [2].
+ *
+ * Original SystemTap's sys/sdt.h implementation ([3]) was used as a base for
+ * this USDT library implementation. Current implementation differs *a lot* in
+ * terms of exposed user API and general usability, which was the main goal
+ * and focus of the reimplementation work. Nevertheless, underlying recorded
+ * USDT definitions are fully binary compatible and any USDT-based tooling
+ * should work equally well with USDTs defined by either SystemTap's or this
+ * library's USDT implementation.
+ *
+ *   [0] https://ecos.sourceware.org/ml/systemtap/2010-q3/msg00145.html
+ *   [1] https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ *   [2] https://gcc.gnu.org/onlinedocs/gcc/Constraints.html
+ *   [3] https://sourceware.org/git/?p=systemtap.git;a=blob;f=includes/sys/sdt.h
+ */
+#ifndef __USDT_H
+#define __USDT_H
+
+/*
+ * Changelog:
+ *
+ * 0.1.0
+ * -----
+ * - Initial release
+ */
+#define USDT_MAJOR_VERSION 0
+#define USDT_MINOR_VERSION 1
+#define USDT_PATCH_VERSION 0
+
+/* C++20 and C23 added __VA_OPT__ as a standard replacement for non-standard `##__VA_ARGS__` extension */
+#if (defined(__STDC_VERSION__) && __STDC_VERSION__ > 201710L) || (defined(__cplusplus) && __cplusplus > 201703L)
+#define __usdt_va_opt 1
+#define __usdt_va_args(...) __VA_OPT__(,) __VA_ARGS__
+#else
+#define __usdt_va_args(...) , ##__VA_ARGS__
+#endif
+
+/*
+ * Trigger USDT with `group`:`name` identifier and pass through `args` as its
+ * arguments. Zero arguments are acceptable as well. No USDT semaphore is
+ * associated with this USDT.
+ *
+ * Such "semaphoreless" USDTs are commonly used when there is no extra data
+ * collection or processing needed to collect and prepare USDT arguments and
+ * they are just available in the surrounding code. USDT() macro will just
+ * record their locations in CPU registers or in memory for tracing tooling to
+ * be able to access them, if necessary.
+ */
+#ifdef __usdt_va_opt
+#define USDT(group, name, ...)							\
+	__usdt_probe(group, name, __usdt_sema_none, 0 __VA_OPT__(,) __VA_ARGS__)
+#else
+#define USDT(group, name, ...)							\
+	__usdt_probe(group, name, __usdt_sema_none, 0, ##__VA_ARGS__)
+#endif
+
+/*
+ * Trigger USDT with `group`:`name` identifier and pass through `args` as its
+ * arguments. Zero arguments are acceptable as well. USDT also get an
+ * implicitly-defined associated USDT semaphore, which will be "activated" by
+ * tracing tooling and can be used to check whether USDT is being actively
+ * observed.
+ *
+ * USDTs with semaphore are commonly used when there is a need to perform
+ * additional data collection and processing to prepare USDT arguments, which
+ * otherwise might not be necessary for the rest of application logic. In such
+ * case, USDT semaphore can be used to avoid unnecessary extra work. If USDT
+ * is not traced (which is presumed to be a common situation), the associated
+ * USDT semaphore is "inactive", and so there is no need to waste resources to
+ * prepare USDT arguments. Use USDT_IS_ACTIVE(group, name) to check whether
+ * USDT is "active".
+ *
+ * N.B. There is an inherent (albeit short) gap between checking whether USDT
+ * is active and triggering corresponding USDT, in which external tracer can
+ * be attached to an USDT and activate USDT semaphore after the activity check.
+ * If such a race occurs, tracers might miss one USDT execution. Tracers are
+ * expected to accommodate such possibility and this is expected to not be
+ * a problem for applications and tracers.
+ *
+ * N.B. Implicit USDT semaphore defined by USDT_WITH_SEMA() is contained
+ * within a single executable or shared library and is not shared outside
+ * them. I.e., if you use USDT_WITH_SEMA() with the same USDT group and name
+ * identifier across executable and shared library, it will work and won't
+ * conflict, per se, but will define independent USDT semaphores, one for each
+ * shared library/executable in which USDT_WITH_SEMA(group, name) is used.
+ * That is, if you attach to this USDT in one shared library (or executable),
+ * then only USDT semaphore within that shared library (or executable) will be
+ * updated by the kernel, while other libraries (or executable) will not see
+ * activated USDT semaphore. In short, it's best to use unique USDT group:name
+ * identifiers across different shared libraries (and, equivalently, between
+ * executable and shared library). This is advanced consideration and is
+ * rarely (if ever) seen in practice, but just to avoid surprises this is
+ * called out here. (Static libraries become a part of final executable, once
+ * linked by linker, so the above considerations don't apply to them.)
+ */
+#ifdef __usdt_va_opt
+#define USDT_WITH_SEMA(group, name, ...)					\
+	__usdt_probe(group, name,						\
+		     __usdt_sema_implicit, __usdt_sema_name(group, name)	\
+		     __VA_OPT__(,) __VA_ARGS__)
+#else
+#define USDT_WITH_SEMA(group, name, ...)					\
+	__usdt_probe(group, name,						\
+		     __usdt_sema_implicit, __usdt_sema_name(group, name),	\
+		     ##__VA_ARGS__)
+#endif
+
+struct usdt_sema { volatile unsigned short active; };
+
+/*
+ * Check if USDT with `group`:`name` identifier is "active" (i.e., whether it
+ * is attached to by external tracing tooling and is actively observed).
+ *
+ * This macro can be used to decide whether any additional and potentially
+ * expensive data collection or processing should be done to pass extra
+ * information into the given USDT. It is assumed that USDT is triggered with
+ * USDT_WITH_SEMA() macro which will implicitly define associated USDT
+ * semaphore. (If one needs more control over USDT semaphore, see
+ * USDT_DEFINE_SEMA() and USDT_WITH_EXPLICIT_SEMA() macros below.)
+ *
+ * N.B. Such checks are necessarily racy and speculative. Between checking
+ * whether USDT is active and triggering the USDT itself, tracer can be
+ * detached with no notification. This race should be extremely rare and worst
+ * case should result in one-time wasted extra data collection and processing.
+ */
+#define USDT_IS_ACTIVE(group, name) ({						\
+	extern struct usdt_sema __usdt_sema_name(group, name)			\
+		__usdt_asm_name(__usdt_sema_name(group, name));			\
+	__usdt_sema_implicit(__usdt_sema_name(group, name));			\
+	__usdt_sema_name(group, name).active > 0;				\
+})
+
+/*
+ * APIs for working with user-defined explicit USDT semaphores.
+ *
+ * This is a less commonly used advanced API for use cases in which user needs
+ * an explicit control over (potentially shared across multiple USDTs) USDT
+ * semaphore instance. This can be used when there is a group of logically
+ * related USDTs that all need extra data collection and processing whenever
+ * any of a family of related USDTs are "activated" (i.e., traced). In such
+ * a case, all such related USDTs will be associated with the same shared USDT
+ * semaphore defined with USDT_DEFINE_SEMA() and the USDTs themselves will be
+ * triggered with USDT_WITH_EXPLICIT_SEMA() macros, taking an explicit extra
+ * USDT semaphore identifier as an extra parameter.
+ */
+
+/**
+ * Underlying C global variable name for user-defined USDT semaphore with
+ * `sema` identifier. Could be useful for debugging, but normally shouldn't be
+ * used explicitly.
+ */
+#define USDT_SEMA(sema) __usdt_sema_##sema
+
+/*
+ * Define storage for user-defined USDT semaphore `sema`.
+ *
+ * Should be used only once in non-header source file to let compiler allocate
+ * space for the semaphore variable. Just like with any other global variable.
+ *
+ * This macro can be used anywhere where global variable declaration is
+ * allowed. Just like with global variable definitions, there should be only
+ * one definition of user-defined USDT semaphore with given `sema` identifier,
+ * otherwise compiler or linker will complain about duplicate variable
+ * definition.
+ *
+ * For C++, it is allowed to use USDT_DEFINE_SEMA() both in global namespace
+ * and inside namespaces (including nested namespaces). Just make sure that
+ * USDT_DECLARE_SEMA() is placed within the namespace where this semaphore is
+ * referenced, or any of its parent namespaces, so the C++ language-level
+ * identifier is visible to the code that needs to reference the semaphore.
+ * At the lowest layer, USDT semaphores have global naming and visibility
+ * (they have a corresponding `__usdt_sema_<name>` symbol, which can be linked
+ * against from C or C++ code, if necessary). To keep it simple, putting
+ * USDT_DECLARE_SEMA() declarations into global namespaces is the simplest
+ * no-brainer solution. All these aspects are irrelevant for plain C, because
+ * C doesn't have namespaces and everything is always in the global namespace.
+ *
+ * N.B. Due to USDT metadata being recorded in non-allocatable ELF note
+ * section, it has limitations when it comes to relocations, which, in
+ * practice, means that it's not possible to correctly share USDT semaphores
+ * between main executable and shared libraries, or even between multiple
+ * shared libraries. USDT semaphore has to be contained to individual shared
+ * library or executable to avoid unpleasant surprises with half-working USDT
+ * semaphores. We enforce this by marking semaphore ELF symbols as having
+ * a hidden visibility. This is quite an advanced use case and consideration
+ * and for most users this should have no consequences whatsoever.
+ */
+#define USDT_DEFINE_SEMA(sema)							\
+	struct usdt_sema __usdt_sema_sec USDT_SEMA(sema)			\
+		__usdt_asm_name(USDT_SEMA(sema))				\
+		__attribute__((visibility("hidden"))) = { 0 }
+
+/*
+ * Declare extern reference to user-defined USDT semaphore `sema`.
+ *
+ * Refers to a variable defined in another compilation unit by
+ * USDT_DEFINE_SEMA() and allows to use the same USDT semaphore across
+ * multiple compilation units (i.e., .c and .cpp files).
+ *
+ * See USDT_DEFINE_SEMA() notes above for C++ language usage peculiarities.
+ */
+#define USDT_DECLARE_SEMA(sema)							\
+	extern struct usdt_sema USDT_SEMA(sema) __usdt_asm_name(USDT_SEMA(sema))
+
+/*
+ * Check if user-defined USDT semaphore `sema` is "active" (i.e., whether it
+ * is attached to by external tracing tooling and is actively observed).
+ *
+ * This macro can be used to decide whether any additional and potentially
+ * expensive data collection or processing should be done to pass extra
+ * information into USDT(s) associated with USDT semaphore `sema`.
+ *
+ * N.B. Such checks are necessarily racy. Between checking the state of USDT
+ * semaphore and triggering associated USDT(s), the active tracer might attach
+ * or detach. This race should be extremely rare and worst case should result
+ * in one-time missed USDT event or wasted extra data collection and
+ * processing. USDT-using tracers should be written with this in mind and is
+ * not a concern of the application defining USDTs with associated semaphore.
+ */
+#define USDT_SEMA_IS_ACTIVE(sema) (USDT_SEMA(sema).active > 0)
+
+/*
+ * Invoke USDT specified by `group` and `name` identifiers and associate
+ * explicitly user-defined semaphore `sema` with it. Pass through `args` as
+ * USDT arguments. `args` are optional and zero arguments are acceptable.
+ *
+ * Semaphore is defined with the help of USDT_DEFINE_SEMA() macro and can be
+ * checked whether active with USDT_SEMA_IS_ACTIVE().
+ */
+#ifdef __usdt_va_opt
+#define USDT_WITH_EXPLICIT_SEMA(sema, group, name, ...)				\
+	__usdt_probe(group, name, __usdt_sema_explicit, USDT_SEMA(sema), ##__VA_ARGS__)
+#else
+#define USDT_WITH_EXPLICIT_SEMA(sema, group, name, ...)				\
+	__usdt_probe(group, name, __usdt_sema_explicit, USDT_SEMA(sema) __VA_OPT__(,) __VA_ARGS__)
+#endif
+
+/*
+ * Adjustable implementation aspects
+ */
+#ifndef USDT_ARG_CONSTRAINT
+#if defined __powerpc__
+#define USDT_ARG_CONSTRAINT		nZr
+#elif defined __arm__
+#define USDT_ARG_CONSTRAINT		g
+#elif defined __loongarch__
+#define USDT_ARG_CONSTRAINT		nmr
+#else
+#define USDT_ARG_CONSTRAINT		nor
+#endif
+#endif /* USDT_ARG_CONSTRAINT */
+
+#ifndef USDT_NOP
+#if defined(__ia64__) || defined(__s390__) || defined(__s390x__)
+#define USDT_NOP			nop 0
+#else
+#define USDT_NOP			nop
+#endif
+#endif /* USDT_NOP */
+
+/*
+ * Implementation details
+ */
+/* USDT name for implicitly-defined USDT semaphore, derived from group:name */
+#define __usdt_sema_name(group, name)	__usdt_sema_##group##__##name
+/* ELF section into which USDT semaphores are put */
+#define __usdt_sema_sec			__attribute__((section(".probes")))
+
+#define __usdt_concat(a, b)		a ## b
+#define __usdt_apply(fn, n)		__usdt_concat(fn, n)
+
+#ifndef __usdt_nth
+#define __usdt_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, N, ...) N
+#endif
+
+#ifndef __usdt_narg
+#ifdef __usdt_va_opt
+#define __usdt_narg(...) __usdt_nth(_ __VA_OPT__(,) __VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#else
+#define __usdt_narg(...) __usdt_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
+#endif
+#endif /* __usdt_narg */
+
+#define __usdt_hash			#
+#define __usdt_str_(x)			#x
+#define __usdt_str(x)			__usdt_str_(x)
+
+#ifndef __usdt_asm_name
+#define __usdt_asm_name(name)		__asm__(__usdt_str(name))
+#endif
+
+#define __usdt_asm0()		"\n"
+#define __usdt_asm1(x)		__usdt_str(x) "\n"
+#define __usdt_asm2(x, ...)	__usdt_str(x) "," __usdt_asm1(__VA_ARGS__)
+#define __usdt_asm3(x, ...)	__usdt_str(x) "," __usdt_asm2(__VA_ARGS__)
+#define __usdt_asm4(x, ...)	__usdt_str(x) "," __usdt_asm3(__VA_ARGS__)
+#define __usdt_asm5(x, ...)	__usdt_str(x) "," __usdt_asm4(__VA_ARGS__)
+#define __usdt_asm6(x, ...)	__usdt_str(x) "," __usdt_asm5(__VA_ARGS__)
+#define __usdt_asm7(x, ...)	__usdt_str(x) "," __usdt_asm6(__VA_ARGS__)
+#define __usdt_asm8(x, ...)	__usdt_str(x) "," __usdt_asm7(__VA_ARGS__)
+#define __usdt_asm9(x, ...)	__usdt_str(x) "," __usdt_asm8(__VA_ARGS__)
+#define __usdt_asm10(x, ...)	__usdt_str(x) "," __usdt_asm9(__VA_ARGS__)
+#define __usdt_asm11(x, ...)	__usdt_str(x) "," __usdt_asm10(__VA_ARGS__)
+#define __usdt_asm12(x, ...)	__usdt_str(x) "," __usdt_asm11(__VA_ARGS__)
+#define __usdt_asm(...)		__usdt_apply(__usdt_asm, __usdt_narg(__VA_ARGS__))(__VA_ARGS__)
+
+#ifdef __LP64__
+#define __usdt_asm_addr		.8byte
+#else
+#define __usdt_asm_addr		.4byte
+#endif
+
+#define __usdt_asm_strz_(x)	__usdt_asm1(.asciz #x)
+#define __usdt_asm_strz(x)	__usdt_asm_strz_(x)
+#define __usdt_asm_str_(x)	__usdt_asm1(.ascii #x)
+#define __usdt_asm_str(x)	__usdt_asm_str_(x)
+
+/* "semaphoreless" USDT case */
+#ifndef __usdt_sema_none
+#define __usdt_sema_none(sema)
+#endif
+
+/* implicitly defined __usdt_sema__group__name semaphore (using weak symbols) */
+#ifndef __usdt_sema_implicit
+#define __usdt_sema_implicit(sema)								\
+	__asm__ __volatile__ (									\
+	__usdt_asm1(.ifndef sema)								\
+	__usdt_asm3(		.pushsection .probes, "aw", "progbits")				\
+	__usdt_asm1(		.weak sema)							\
+	__usdt_asm1(		.hidden sema)							\
+	__usdt_asm1(		.align 2)							\
+	__usdt_asm1(sema:)									\
+	__usdt_asm1(		.zero 2)							\
+	__usdt_asm2(		.type sema, @object)						\
+	__usdt_asm2(		.size sema, 2)							\
+	__usdt_asm1(		.popsection)							\
+	__usdt_asm1(.endif)									\
+	);
+#endif
+
+/* externally defined semaphore using USDT_DEFINE_SEMA() and passed explicitly by user */
+#ifndef __usdt_sema_explicit
+#define __usdt_sema_explicit(sema)								\
+	__asm__ __volatile__ ("" :: "m" (sema));
+#endif
+
+/* main USDT definition (nop and .note.stapsdt metadata) */
+#define __usdt_probe(group, name, sema_def, sema, ...) do {					\
+	sema_def(sema)										\
+	__asm__ __volatile__ (									\
+	__usdt_asm( 990:	USDT_NOP)							\
+	__usdt_asm3(		.pushsection .note.stapsdt, "", "note")				\
+	__usdt_asm1(		.balign 4)							\
+	__usdt_asm3(		.4byte 992f-991f,994f-993f,3)					\
+	__usdt_asm1(991:	.asciz "stapsdt")						\
+	__usdt_asm1(992:	.balign 4)							\
+	__usdt_asm1(993:	__usdt_asm_addr 990b)						\
+	__usdt_asm1(		__usdt_asm_addr _.stapsdt.base)					\
+	__usdt_asm1(		__usdt_asm_addr sema)						\
+	__usdt_asm_strz(group)									\
+	__usdt_asm_strz(name)									\
+	__usdt_asm_args(__VA_ARGS__)								\
+	__usdt_asm1(		.ascii "\0")							\
+	__usdt_asm1(994:	.balign 4)							\
+	__usdt_asm1(		.popsection)							\
+	__usdt_asm1(.ifndef _.stapsdt.base)							\
+	__usdt_asm5(		.pushsection .stapsdt.base,"aG","progbits",.stapsdt.base,comdat)\
+	__usdt_asm1(		.weak _.stapsdt.base)						\
+	__usdt_asm1(		.hidden _.stapsdt.base)						\
+	__usdt_asm1(_.stapsdt.base:)								\
+	__usdt_asm1(		.space 1)							\
+	__usdt_asm2(		.size _.stapsdt.base, 1)					\
+	__usdt_asm1(		.popsection)							\
+	__usdt_asm1(.endif)									\
+	:: __usdt_asm_ops(__VA_ARGS__)								\
+	);											\
+} while (0)
+
+/*
+ * NB: gdb PR24541 highlighted an unspecified corner of the sdt.h
+ * operand note format.
+ *
+ * The named register may be a longer or shorter (!) alias for the
+ * storage where the value in question is found. For example, on
+ * i386, 64-bit value may be put in register pairs, and a register
+ * name stored would identify just one of them. Previously, gcc was
+ * asked to emit the %w[id] (16-bit alias of some registers holding
+ * operands), even when a wider 32-bit value was used.
+ *
+ * Bottom line: the byte-width given before the @ sign governs. If
+ * there is a mismatch between that width and that of the named
+ * register, then a sys/sdt.h note consumer may need to employ
+ * architecture-specific heuristics to figure out where the compiler
+ * has actually put the complete value.
+ */
+#if defined(__powerpc__) || defined(__powerpc64__)
+#define __usdt_argref(id)	%I[id]%[id]
+#elif defined(__i386__)
+#define __usdt_argref(id)	%k[id]		/* gcc.gnu.org/PR80115 sourceware.org/PR24541 */
+#else
+#define __usdt_argref(id)	%[id]
+#endif
+
+#define __usdt_asm_arg(n)	__usdt_asm_str(%c[__usdt_asz##n])				\
+				__usdt_asm1(.ascii "@")						\
+				__usdt_asm_str(__usdt_argref(__usdt_aval##n))
+
+#define __usdt_asm_args0	/* no arguments */
+#define __usdt_asm_args1	__usdt_asm_arg(1)
+#define __usdt_asm_args2	__usdt_asm_args1 __usdt_asm1(.ascii " ") __usdt_asm_arg(2)
+#define __usdt_asm_args3	__usdt_asm_args2 __usdt_asm1(.ascii " ") __usdt_asm_arg(3)
+#define __usdt_asm_args4	__usdt_asm_args3 __usdt_asm1(.ascii " ") __usdt_asm_arg(4)
+#define __usdt_asm_args5	__usdt_asm_args4 __usdt_asm1(.ascii " ") __usdt_asm_arg(5)
+#define __usdt_asm_args6	__usdt_asm_args5 __usdt_asm1(.ascii " ") __usdt_asm_arg(6)
+#define __usdt_asm_args7	__usdt_asm_args6 __usdt_asm1(.ascii " ") __usdt_asm_arg(7)
+#define __usdt_asm_args8	__usdt_asm_args7 __usdt_asm1(.ascii " ") __usdt_asm_arg(8)
+#define __usdt_asm_args9	__usdt_asm_args8 __usdt_asm1(.ascii " ") __usdt_asm_arg(9)
+#define __usdt_asm_args10	__usdt_asm_args9 __usdt_asm1(.ascii " ") __usdt_asm_arg(10)
+#define __usdt_asm_args11	__usdt_asm_args10 __usdt_asm1(.ascii " ") __usdt_asm_arg(11)
+#define __usdt_asm_args12	__usdt_asm_args11 __usdt_asm1(.ascii " ") __usdt_asm_arg(12)
+#define __usdt_asm_args(...)	__usdt_apply(__usdt_asm_args, __usdt_narg(__VA_ARGS__))
+
+#define __usdt_is_arr(x)	(__builtin_classify_type(x) == 14 || __builtin_classify_type(x) == 5)
+#define __usdt_arg_size(x)	(__usdt_is_arr(x) ? sizeof(void *) : sizeof(x))
+
+/*
+ * We can't use __builtin_choose_expr() in C++, so fall back to table-based
+ * signedness determination for known types, utilizing templates magic.
+ */
+#ifdef __cplusplus
+
+#define __usdt_is_signed(x)	(!__usdt_is_arr(x) && __usdt_t<__typeof(x)>::is_signed)
+
+#include <cstddef>
+
+template<typename T> struct __usdt_t { static const bool is_signed = false; };
+template<typename A> struct __usdt_t<A[]> : public __usdt_t<A *> {};
+template<typename A, size_t N> struct __usdt_t<A[N]> : public __usdt_t<A *> {};
+
+#define __usdt_def_signed(T)									\
+template<> struct __usdt_t<T>		     { static const bool is_signed = true; };		\
+template<> struct __usdt_t<const T>	     { static const bool is_signed = true; };		\
+template<> struct __usdt_t<volatile T>	     { static const bool is_signed = true; };		\
+template<> struct __usdt_t<const volatile T> { static const bool is_signed = true; }
+#define __usdt_maybe_signed(T)									\
+template<> struct __usdt_t<T>		     { static const bool is_signed = (T)-1 < (T)1; };	\
+template<> struct __usdt_t<const T>	     { static const bool is_signed = (T)-1 < (T)1; };	\
+template<> struct __usdt_t<volatile T>	     { static const bool is_signed = (T)-1 < (T)1; };	\
+template<> struct __usdt_t<const volatile T> { static const bool is_signed = (T)-1 < (T)1; }
+
+__usdt_def_signed(signed char);
+__usdt_def_signed(short);
+__usdt_def_signed(int);
+__usdt_def_signed(long);
+__usdt_def_signed(long long);
+__usdt_maybe_signed(char);
+__usdt_maybe_signed(wchar_t);
+
+#else /* !__cplusplus */
+
+#define __usdt_is_inttype(x)	(__builtin_classify_type(x) >= 1 && __builtin_classify_type(x) <= 4)
+#define __usdt_inttype(x)	__typeof(__builtin_choose_expr(__usdt_is_inttype(x), (x), 0U))
+#define __usdt_is_signed(x)	((__usdt_inttype(x))-1 < (__usdt_inttype(x))1)
+
+#endif /* __cplusplus */
+
+#define __usdt_asm_op(n, x)									\
+	[__usdt_asz##n] "n" ((__usdt_is_signed(x) ? (int)-1 : 1) * (int)__usdt_arg_size(x)),	\
+	[__usdt_aval##n] __usdt_str(USDT_ARG_CONSTRAINT)(x)
+
+#define __usdt_asm_ops0()				[__usdt_dummy] "g" (0)
+#define __usdt_asm_ops1(x)				__usdt_asm_op(1, x)
+#define __usdt_asm_ops2(a,x)				__usdt_asm_ops1(a), __usdt_asm_op(2, x)
+#define __usdt_asm_ops3(a,b,x)				__usdt_asm_ops2(a,b), __usdt_asm_op(3, x)
+#define __usdt_asm_ops4(a,b,c,x)			__usdt_asm_ops3(a,b,c), __usdt_asm_op(4, x)
+#define __usdt_asm_ops5(a,b,c,d,x)			__usdt_asm_ops4(a,b,c,d), __usdt_asm_op(5, x)
+#define __usdt_asm_ops6(a,b,c,d,e,x)			__usdt_asm_ops5(a,b,c,d,e), __usdt_asm_op(6, x)
+#define __usdt_asm_ops7(a,b,c,d,e,f,x)			__usdt_asm_ops6(a,b,c,d,e,f), __usdt_asm_op(7, x)
+#define __usdt_asm_ops8(a,b,c,d,e,f,g,x)		__usdt_asm_ops7(a,b,c,d,e,f,g), __usdt_asm_op(8, x)
+#define __usdt_asm_ops9(a,b,c,d,e,f,g,h,x)		__usdt_asm_ops8(a,b,c,d,e,f,g,h), __usdt_asm_op(9, x)
+#define __usdt_asm_ops10(a,b,c,d,e,f,g,h,i,x)		__usdt_asm_ops9(a,b,c,d,e,f,g,h,i), __usdt_asm_op(10, x)
+#define __usdt_asm_ops11(a,b,c,d,e,f,g,h,i,j,x)		__usdt_asm_ops10(a,b,c,d,e,f,g,h,i,j), __usdt_asm_op(11, x)
+#define __usdt_asm_ops12(a,b,c,d,e,f,g,h,i,j,k,x)	__usdt_asm_ops11(a,b,c,d,e,f,g,h,i,j,k), __usdt_asm_op(12, x)
+#define __usdt_asm_ops(...)				__usdt_apply(__usdt_asm_ops, __usdt_narg(__VA_ARGS__))(__VA_ARGS__)
+
+#endif /* __USDT_H */
diff --git a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
index b616575c3b00..ce13002c7a19 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
@@ -93,7 +93,7 @@
 	.expected_attach_type = BPF_SK_LOOKUP,
 	.result = VERBOSE_ACCEPT,
 	.runs = -1,
-	.errstr = "0: (7a) *(u64 *)(r10 -8) = -44        ; R10=fp0 fp-8_w=-44\
+	.errstr = "0: (7a) *(u64 *)(r10 -8) = -44        ; R10=fp0 fp-8=-44\
 	2: (c5) if r0 s< 0x0 goto pc+2\
-	R0_w=-44",
+	R0=-44",
 },
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 7afc2619ab14..c8d640802cce 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -1375,7 +1375,7 @@
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
 	/* write into map value */
 	BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
-	/* fetch secound map_value_ptr from the stack */
+	/* fetch second map_value_ptr from the stack */
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -16),
 	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
 	/* write into map value */
@@ -1439,7 +1439,7 @@
 	/* second time with fp-16 */
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
-	/* fetch secound map_value_ptr from the stack */
+	/* fetch second map_value_ptr from the stack */
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
 	/* write into map value */
 	BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
@@ -1493,7 +1493,7 @@
 	/* second time with fp-16 */
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
-	/* fetch secound map_value_ptr from the stack */
+	/* fetch second map_value_ptr from the stack */
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
 	/* write into map value */
 	BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
@@ -2252,7 +2252,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 7 },
-	.errstr_unpriv = "invalid indirect read from stack R2 off -8+0 size 8",
+	.errstr_unpriv = "invalid read from stack R2 off -8+0 size 8",
 	.result_unpriv = REJECT,
 	/* in privileged mode reads from uninitialized stack locations are permitted */
 	.result = ACCEPT,
@@ -2380,7 +2380,7 @@
 	 */
 	BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
 	BPF_MOV64_REG(BPF_REG_9, BPF_REG_8),
-	/* r9 = *r9                ; verifier get's to this point via two paths:
+	/* r9 = *r9                ; verifier gets to this point via two paths:
 	 *                         ; (I) one including r9 = r8, verified first;
 	 *                         ; (II) one excluding r9 = r8, verified next.
 	 *                         ; After load of *r9 to r9 the frame[0].fp[-24].id == r9.id.
@@ -2409,3 +2409,27 @@
 	.errstr_unpriv = "",
 	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
 },
+{
+	"calls: several args with ref_obj_id",
+	.insns = {
+	/* Reserve at least sizeof(struct iphdr) bytes in the ring buffer.
+	 * With a smaller size, the verifier would reject the call to
+	 * bpf_tcp_raw_gen_syncookie_ipv4 before we can reach the
+	 * ref_obj_id error.
+	 */
+	BPF_MOV64_IMM(BPF_REG_2, 20),
+	BPF_MOV64_IMM(BPF_REG_3, 0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+	/* if r0 == 0 goto <exit> */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tcp_raw_gen_syncookie_ipv4),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_ringbuf = { 2 },
+	.result = REJECT,
+	.errstr = "more than one arg with ref_obj_id",
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index ee454327e5c6..77207b498c6f 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -2,14 +2,13 @@
 	"dead code: start",
 	.insns = {
 	BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+	/* unpriv: nospec (inserted to prevent "R9 !read_ok") */
 	BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
 	BPF_JMP_IMM(BPF_JA, 0, 0, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 7),
 	BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R9 !read_ok",
-	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 7,
 },
diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
index 43776f6f92f4..91d83e9cb148 100644
--- a/tools/testing/selftests/bpf/verifier/jmp32.c
+++ b/tools/testing/selftests/bpf/verifier/jmp32.c
@@ -84,11 +84,10 @@
 	BPF_JMP32_IMM(BPF_JSET, BPF_REG_7, 0x10, 1),
 	BPF_EXIT_INSN(),
 	BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x10, 1),
+	/* unpriv: nospec (inserted to prevent "R9 !read_ok") */
 	BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R9 !read_ok",
-	.result_unpriv = REJECT,
 	.result = ACCEPT,
 },
 {
@@ -149,11 +148,10 @@
 	BPF_JMP32_IMM(BPF_JEQ, BPF_REG_7, 0x10, 1),
 	BPF_EXIT_INSN(),
 	BPF_JMP32_IMM(BPF_JSGE, BPF_REG_7, 0xf, 1),
+	/* unpriv: nospec (inserted to prevent "R9 !read_ok") */
 	BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R9 !read_ok",
-	.result_unpriv = REJECT,
 	.result = ACCEPT,
 },
 {
@@ -214,11 +212,10 @@
 	BPF_JMP32_IMM(BPF_JNE, BPF_REG_7, 0x10, 1),
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x10, 1),
 	BPF_EXIT_INSN(),
+	/* unpriv: nospec (inserted to prevent "R9 !read_ok") */
 	BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R9 !read_ok",
-	.result_unpriv = REJECT,
 	.result = ACCEPT,
 },
 {
@@ -283,11 +280,10 @@
 	BPF_JMP32_REG(BPF_JGE, BPF_REG_7, BPF_REG_8, 1),
 	BPF_EXIT_INSN(),
 	BPF_JMP32_IMM(BPF_JGE, BPF_REG_7, 0x7ffffff0, 1),
+	/* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'scalar'",
-	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -354,11 +350,10 @@
 	BPF_JMP32_REG(BPF_JGT, BPF_REG_7, BPF_REG_8, 1),
 	BPF_EXIT_INSN(),
 	BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 0x7ffffff0, 1),
+	/* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'scalar'",
-	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -425,11 +420,10 @@
 	BPF_JMP32_REG(BPF_JLE, BPF_REG_7, BPF_REG_8, 1),
 	BPF_EXIT_INSN(),
 	BPF_JMP32_IMM(BPF_JLE, BPF_REG_7, 0x7ffffff0, 1),
+	/* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'scalar'",
-	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -496,11 +490,10 @@
 	BPF_JMP32_REG(BPF_JLT, BPF_REG_7, BPF_REG_8, 1),
 	BPF_EXIT_INSN(),
 	BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0x7ffffff0, 1),
+	/* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'scalar'",
-	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -567,11 +560,10 @@
 	BPF_JMP32_REG(BPF_JSGE, BPF_REG_7, BPF_REG_8, 1),
 	BPF_EXIT_INSN(),
 	BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0x7ffffff0, 1),
+	/* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'scalar'",
-	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -638,11 +630,10 @@
 	BPF_JMP32_REG(BPF_JSGT, BPF_REG_7, BPF_REG_8, 1),
 	BPF_EXIT_INSN(),
 	BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, -2, 1),
+	/* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'scalar'",
-	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -709,11 +700,10 @@
 	BPF_JMP32_REG(BPF_JSLE, BPF_REG_7, BPF_REG_8, 1),
 	BPF_EXIT_INSN(),
 	BPF_JMP_IMM(BPF_JSLE, BPF_REG_7, 0x7ffffff0, 1),
+	/* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'scalar'",
-	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
@@ -780,11 +770,10 @@
 	BPF_JMP32_REG(BPF_JSLT, BPF_REG_7, BPF_REG_8, 1),
 	BPF_EXIT_INSN(),
 	BPF_JMP32_IMM(BPF_JSLT, BPF_REG_7, -1, 1),
+	/* unpriv: nospec (inserted to prevent "R0 invalid mem access 'scalar'") */
 	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "R0 invalid mem access 'scalar'",
-	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c
index 11fc68da735e..e901eefd774a 100644
--- a/tools/testing/selftests/bpf/verifier/jset.c
+++ b/tools/testing/selftests/bpf/verifier/jset.c
@@ -78,12 +78,11 @@
 	.insns = {
 	BPF_MOV64_IMM(BPF_REG_0, 1),
 	BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 1, 1),
+	/* unpriv: nospec (inserted to prevent "R9 !read_ok") */
 	BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-	.errstr_unpriv = "R9 !read_ok",
-	.result_unpriv = REJECT,
 	.retval = 1,
 	.result = ACCEPT,
 },
@@ -136,13 +135,12 @@
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
 	BPF_ALU64_IMM(BPF_OR, BPF_REG_0, 2),
 	BPF_JMP_IMM(BPF_JSET, BPF_REG_0, 3, 1),
+	/* unpriv: nospec (inserted to prevent "R9 !read_ok") */
 	BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-	.errstr_unpriv = "R9 !read_ok",
-	.result_unpriv = REJECT,
 	.result = ACCEPT,
 },
 {
@@ -154,16 +152,16 @@
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xff),
 	BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0xf0, 3),
 	BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 0x10, 1),
+	/* unpriv: nospec (inserted to prevent "R9 !read_ok") */
 	BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
 	BPF_EXIT_INSN(),
 	BPF_JMP_IMM(BPF_JSET, BPF_REG_1, 0x10, 1),
 	BPF_EXIT_INSN(),
 	BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0x10, 1),
+	/* unpriv: nospec (inserted to prevent "R9 !read_ok") */
 	BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-	.errstr_unpriv = "R9 !read_ok",
-	.result_unpriv = REJECT,
 	.result = ACCEPT,
 },
diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c
index f420c0312aa0..4b39f8472f9b 100644
--- a/tools/testing/selftests/bpf/verifier/map_kptr.c
+++ b/tools/testing/selftests/bpf/verifier/map_kptr.c
@@ -373,7 +373,7 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.fixup_map_kptr = { 1 },
 	.result = REJECT,
-	.errstr = "Unreleased reference id=5 alloc_insn=20",
+	.errstr = "Unreleased reference id=4 alloc_insn=20",
 	.fixup_kfunc_btf_id = {
 		{ "bpf_kfunc_call_test_acquire", 15 },
 	}
diff --git a/tools/testing/selftests/bpf/verify_sig_setup.sh b/tools/testing/selftests/bpf/verify_sig_setup.sh
index f2cac42298ba..09179fb551f0 100755
--- a/tools/testing/selftests/bpf/verify_sig_setup.sh
+++ b/tools/testing/selftests/bpf/verify_sig_setup.sh
@@ -32,7 +32,7 @@ usage()
 	exit 1
 }
 
-setup()
+genkey()
 {
 	local tmp_dir="$1"
 
@@ -45,9 +45,14 @@ setup()
 
 	openssl x509 -in ${tmp_dir}/signing_key.pem -out \
 		${tmp_dir}/signing_key.der -outform der
+}
 
-	key_id=$(cat ${tmp_dir}/signing_key.der | keyctl padd asymmetric ebpf_testing_key @s)
+setup()
+{
+	local tmp_dir="$1"
 
+	genkey "${tmp_dir}"
+	key_id=$(cat ${tmp_dir}/signing_key.der | keyctl padd asymmetric ebpf_testing_key @s)
 	keyring_id=$(keyctl newring ebpf_testing_keyring @s)
 	keyctl link $key_id $keyring_id
 }
@@ -105,6 +110,8 @@ main()
 
 	if [[ "${action}" == "setup" ]]; then
 		setup "${tmp_dir}"
+	elif [[ "${action}" == "genkey" ]]; then
+		genkey "${tmp_dir}"
 	elif [[ "${action}" == "cleanup" ]]; then
 		cleanup "${tmp_dir}"
 	elif [[ "${action}" == "fsverity-create-sign" ]]; then
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index e12ef953fba8..e962f133250c 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -3,6 +3,7 @@
 #define _GNU_SOURCE
 #include <argp.h>
 #include <libgen.h>
+#include <ctype.h>
 #include <string.h>
 #include <stdlib.h>
 #include <sched.h>
@@ -21,11 +22,21 @@
 #include <gelf.h>
 #include <float.h>
 #include <math.h>
+#include <limits.h>
+#include <assert.h>
 
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
 #endif
 
+#ifndef max
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#endif
+
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
 enum stat_id {
 	VERDICT,
 	DURATION,
@@ -34,6 +45,12 @@ enum stat_id {
 	PEAK_STATES,
 	MAX_STATES_PER_INSN,
 	MARK_READ_MAX_LEN,
+	SIZE,
+	JITED_SIZE,
+	STACK,
+	PROG_TYPE,
+	ATTACH_TYPE,
+	MEMORY_PEAK,
 
 	FILE_NAME,
 	PROG_NAME,
@@ -140,6 +157,36 @@ struct filter {
 	bool abs;
 };
 
+struct rvalue {
+	enum { INTEGRAL, ENUMERATOR } type;
+	union {
+		long long ivalue;
+		char *svalue;
+	};
+};
+
+struct field_access {
+	enum { FIELD_NAME, ARRAY_INDEX } type;
+	union {
+		char *name;
+		struct rvalue index;
+	};
+};
+
+struct var_preset {
+	struct field_access *atoms;
+	int atom_count;
+	char *full_name;
+	struct rvalue value;
+	bool applied;
+};
+
+enum dump_mode {
+	DUMP_NONE = 0,
+	DUMP_XLATED = 1,
+	DUMP_JITED = 2,
+};
+
 static struct env {
 	char **filenames;
 	int filename_cnt;
@@ -181,6 +228,12 @@ static struct env {
 	int progs_processed;
 	int progs_skipped;
 	int top_src_lines;
+	struct var_preset *presets;
+	int npresets;
+	char orig_cgroup[PATH_MAX];
+	char stat_cgroup[PATH_MAX];
+	int memory_peak_fd;
+	__u32 dump_mode;
 } env;
 
 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
@@ -192,6 +245,22 @@ static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va
 	return vfprintf(stderr, format, args);
 }
 
+#define log_errno(fmt, ...) log_errno_aux(__FILE__, __LINE__, fmt, ##__VA_ARGS__)
+
+__attribute__((format(printf, 3, 4)))
+static int log_errno_aux(const char *file, int line, const char *fmt, ...)
+{
+	int err = -errno;
+	va_list ap;
+
+	va_start(ap, fmt);
+	fprintf(stderr, "%s:%d: ", file, line);
+	vfprintf(stderr, fmt, ap);
+	fprintf(stderr, " failed with error '%s'.\n", strerror(errno));
+	va_end(ap);
+	return err;
+}
+
 #ifndef VERISTAT_VERSION
 #define VERISTAT_VERSION "<kernel>"
 #endif
@@ -203,11 +272,13 @@ const char argp_program_doc[] =
 "\n"
 "USAGE: veristat <obj-file> [<obj-file>...]\n"
 "   OR: veristat -C <baseline.csv> <comparison.csv>\n"
-"   OR: veristat -R <results.csv>\n";
+"   OR: veristat -R <results.csv>\n"
+"   OR: veristat -vl2 <to_analyze.bpf.o>\n";
 
 enum {
 	OPT_LOG_FIXED = 1000,
 	OPT_LOG_SIZE = 1001,
+	OPT_DUMP = 1002,
 };
 
 static const struct argp_option opts[] = {
@@ -215,7 +286,7 @@ static const struct argp_option opts[] = {
 	{ "version", 'V', NULL, 0, "Print version" },
 	{ "verbose", 'v', NULL, 0, "Verbose mode" },
 	{ "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" },
-	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
+	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode, 2 for full verification log)" },
 	{ "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
 	{ "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
 	{ "top-n", 'n', "N", 0, "Emit only up to first N results." },
@@ -231,16 +302,21 @@ static const struct argp_option opts[] = {
 	{ "test-reg-invariants", 'r', NULL, 0,
 	  "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
 	{ "top-src-lines", 'S', "N", 0, "Emit N most frequent source code lines" },
+	{ "set-global-vars", 'G', "GLOBAL", 0, "Set global variables provided in the expression, for example \"var1 = 1\"" },
+	{ "dump", OPT_DUMP, "DUMP_MODE", OPTION_ARG_OPTIONAL, "Print BPF program dump (xlated, jited)" },
 	{},
 };
 
 static int parse_stats(const char *stats_str, struct stat_specs *specs);
 static int append_filter(struct filter **filters, int *cnt, const char *str);
 static int append_filter_file(const char *path);
+static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr);
+static int append_var_preset_file(const char *filename);
+static int append_file(const char *path);
+static int append_file_from_file(const char *path);
 
 static error_t parse_arg(int key, char *arg, struct argp_state *state)
 {
-	void *tmp;
 	int err;
 
 	switch (key) {
@@ -312,6 +388,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 			fprintf(stderr, "invalid top N specifier: %s\n", arg);
 			argp_usage(state);
 		}
+		break;
 	case 'C':
 		env.comparison_mode = true;
 		break;
@@ -338,15 +415,36 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 			argp_usage(state);
 		}
 		break;
+	case 'G': {
+		if (arg[0] == '@')
+			err = append_var_preset_file(arg + 1);
+		else
+			err = append_var_preset(&env.presets, &env.npresets, arg);
+		if (err) {
+			fprintf(stderr, "Failed to parse global variable presets: %s\n", arg);
+			return err;
+		}
+		break;
+	}
 	case ARGP_KEY_ARG:
-		tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
-		if (!tmp)
-			return -ENOMEM;
-		env.filenames = tmp;
-		env.filenames[env.filename_cnt] = strdup(arg);
-		if (!env.filenames[env.filename_cnt])
-			return -ENOMEM;
-		env.filename_cnt++;
+		if (arg[0] == '@')
+			err = append_file_from_file(arg + 1);
+		else
+			err = append_file(arg);
+		if (err) {
+			fprintf(stderr, "Failed to collect BPF object files: %d\n", err);
+			return err;
+		}
+		break;
+	case OPT_DUMP:
+		if (!arg || strcasecmp(arg, "xlated") == 0) {
+			env.dump_mode |= DUMP_XLATED;
+		} else if (strcasecmp(arg, "jited") == 0) {
+			env.dump_mode |= DUMP_JITED;
+		} else {
+			fprintf(stderr, "Unrecognized dump mode '%s'\n", arg);
+			return -EINVAL;
+		}
 		break;
 	default:
 		return ARGP_ERR_UNKNOWN;
@@ -617,7 +715,7 @@ static int append_filter_file(const char *path)
 	f = fopen(path, "r");
 	if (!f) {
 		err = -errno;
-		fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
+		fprintf(stderr, "Failed to open filters in '%s': %s\n", path, strerror(-err));
 		return err;
 	}
 
@@ -640,19 +738,64 @@ cleanup:
 }
 
 static const struct stat_specs default_output_spec = {
-	.spec_cnt = 7,
+	.spec_cnt = 8,
 	.ids = {
 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
-		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
+		TOTAL_INSNS, TOTAL_STATES, SIZE, JITED_SIZE
 	},
 };
 
+static int append_file(const char *path)
+{
+	void *tmp;
+
+	tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
+	if (!tmp)
+		return -ENOMEM;
+	env.filenames = tmp;
+	env.filenames[env.filename_cnt] = strdup(path);
+	if (!env.filenames[env.filename_cnt])
+		return -ENOMEM;
+	env.filename_cnt++;
+	return 0;
+}
+
+static int append_file_from_file(const char *path)
+{
+	char buf[1024];
+	int err = 0;
+	FILE *f;
+
+	f = fopen(path, "r");
+	if (!f) {
+		err = -errno;
+		fprintf(stderr, "Failed to open object files list in '%s': %s\n",
+			path, strerror(errno));
+		return err;
+	}
+
+	while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
+		/* lines starting with # are comments, skip them */
+		if (buf[0] == '\0' || buf[0] == '#')
+			continue;
+		err = append_file(buf);
+		if (err)
+			goto cleanup;
+	}
+
+cleanup:
+	fclose(f);
+	return err;
+}
+
 static const struct stat_specs default_csv_output_spec = {
-	.spec_cnt = 9,
+	.spec_cnt = 15,
 	.ids = {
 		FILE_NAME, PROG_NAME, VERDICT, DURATION,
 		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
 		MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
+		SIZE, JITED_SIZE, PROG_TYPE, ATTACH_TYPE,
+		STACK, MEMORY_PEAK,
 	},
 };
 
@@ -688,6 +831,12 @@ static struct stat_def {
 	[PEAK_STATES] = { "Peak states", {"peak_states"}, },
 	[MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
 	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
+	[SIZE] = { "Program size", {"prog_size"}, },
+	[JITED_SIZE] = { "Jited size", {"prog_size_jited"}, },
+	[STACK] = {"Stack depth", {"stack_depth", "stack"}, },
+	[PROG_TYPE] = { "Program type", {"prog_type"}, },
+	[ATTACH_TYPE] = { "Attach type", {"attach_type", }, },
+	[MEMORY_PEAK] = { "Peak memory (MiB)", {"mem_peak", }, },
 };
 
 static bool parse_stat_id_var(const char *name, size_t len, int *id,
@@ -761,6 +910,18 @@ static bool is_desc_sym(char c)
 	return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
 }
 
+static char *rtrim(char *str)
+{
+	int i;
+
+	for (i = strlen(str) - 1; i > 0; --i) {
+		if (!isspace(str[i]))
+			break;
+		str[i] = '\0';
+	}
+	return str;
+}
+
 static int parse_stat(const char *stat_name, struct stat_specs *specs)
 {
 	int id;
@@ -835,7 +996,8 @@ static char verif_log_buf[64 * 1024];
 static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
 {
 	const char *cur;
-	int pos, lines;
+	int pos, lines, sub_stack, cnt = 0;
+	char *state = NULL, *token, stack[512];
 
 	buf[buf_sz - 1] = '\0';
 
@@ -853,15 +1015,22 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *
 
 		if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
 			continue;
-		if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
+		if (5 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
 				&s->stats[TOTAL_INSNS],
 				&s->stats[MAX_STATES_PER_INSN],
 				&s->stats[TOTAL_STATES],
 				&s->stats[PEAK_STATES],
 				&s->stats[MARK_READ_MAX_LEN]))
 			continue;
-	}
 
+		if (1 == sscanf(cur, "stack depth %511s", stack))
+			continue;
+	}
+	while ((token = strtok_r(cnt++ ? NULL : stack, "+", &state))) {
+		if (sscanf(token, "%d", &sub_stack) == 0)
+			break;
+		s->stats[STACK] += sub_stack;
+	}
 	return 0;
 }
 
@@ -884,7 +1053,7 @@ static int line_cnt_cmp(const void *a, const void *b)
 	const struct line_cnt *b_cnt = (const struct line_cnt *)b;
 
 	if (a_cnt->cnt != b_cnt->cnt)
-		return a_cnt->cnt < b_cnt->cnt ? -1 : 1;
+		return a_cnt->cnt > b_cnt->cnt ? -1 : 1;
 	return strcmp(a_cnt->line, b_cnt->line);
 }
 
@@ -1032,6 +1201,41 @@ static int guess_prog_type_by_ctx_name(const char *ctx_name,
 	return -ESRCH;
 }
 
+/* Make sure only target program is referenced from struct_ops map,
+ * otherwise libbpf would automatically set autocreate for all
+ * referenced programs.
+ * See libbpf.c:bpf_object_adjust_struct_ops_autoload.
+ */
+static void mask_unrelated_struct_ops_progs(struct bpf_object *obj,
+					    struct bpf_map *map,
+					    struct bpf_program *prog)
+{
+	struct btf *btf = bpf_object__btf(obj);
+	const struct btf_type *t, *mt;
+	struct btf_member *m;
+	int i, moff;
+	size_t data_sz, ptr_sz = sizeof(void *);
+	void *data;
+
+	t = btf__type_by_id(btf, bpf_map__btf_value_type_id(map));
+	if (!btf_is_struct(t))
+		return;
+
+	data = bpf_map__initial_value(map, &data_sz);
+	for (i = 0; i < btf_vlen(t); i++) {
+		m = &btf_members(t)[i];
+		mt = btf__type_by_id(btf, m->type);
+		if (!btf_is_ptr(mt))
+			continue;
+		moff = m->offset / 8;
+		if (moff + ptr_sz > data_sz)
+			continue;
+		if (memcmp(data + moff, &prog, ptr_sz) == 0)
+			continue;
+		memset(data + moff, 0, ptr_sz);
+	}
+}
+
 static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename)
 {
 	struct bpf_map *map;
@@ -1046,6 +1250,10 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch
 		case BPF_MAP_TYPE_TASK_STORAGE:
 		case BPF_MAP_TYPE_INODE_STORAGE:
 		case BPF_MAP_TYPE_CGROUP_STORAGE:
+		case BPF_MAP_TYPE_CGRP_STORAGE:
+			break;
+		case BPF_MAP_TYPE_STRUCT_OPS:
+			mask_unrelated_struct_ops_progs(obj, map, prog);
 			break;
 		default:
 			if (bpf_map__max_entries(map) == 0)
@@ -1095,13 +1303,13 @@ static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const ch
 			bpf_program__set_expected_attach_type(prog, attach_type);
 
 			if (!env.quiet) {
-				printf("Using guessed program type '%s' for %s/%s...\n",
+				fprintf(stderr, "Using guessed program type '%s' for %s/%s...\n",
 					libbpf_bpf_prog_type_str(prog_type),
 					filename, prog_name);
 			}
 		} else {
 			if (!env.quiet) {
-				printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
+				fprintf(stderr, "Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
 					ctx_name, filename, prog_name);
 			}
 		}
@@ -1139,15 +1347,275 @@ static int max_verifier_log_size(void)
 	return log_size;
 }
 
+static bool output_stat_enabled(int id)
+{
+	int i;
+
+	for (i = 0; i < env.output_spec.spec_cnt; i++)
+		if (env.output_spec.ids[i] == id)
+			return true;
+	return false;
+}
+
+__attribute__((format(printf, 2, 3)))
+static int write_one_line(const char *file, const char *fmt, ...)
+{
+	int err, saved_errno;
+	va_list ap;
+	FILE *f;
+
+	f = fopen(file, "w");
+	if (!f)
+		return -1;
+
+	va_start(ap, fmt);
+	errno = 0;
+	err = vfprintf(f, fmt, ap);
+	saved_errno = errno;
+	va_end(ap);
+	fclose(f);
+	errno = saved_errno;
+	return err < 0 ? -1 : 0;
+}
+
+__attribute__((format(scanf, 3, 4)))
+static int scanf_one_line(const char *file, int fields_expected, const char *fmt, ...)
+{
+	int res = 0, saved_errno = 0;
+	char *line = NULL;
+	size_t line_len;
+	va_list ap;
+	FILE *f;
+
+	f = fopen(file, "r");
+	if (!f)
+		return -1;
+
+	va_start(ap, fmt);
+	while (getline(&line, &line_len, f) > 0) {
+		res = vsscanf(line, fmt, ap);
+		if (res == fields_expected)
+			goto out;
+	}
+	if (ferror(f)) {
+		saved_errno = errno;
+		res = -1;
+	}
+
+out:
+	va_end(ap);
+	free(line);
+	fclose(f);
+	errno = saved_errno;
+	return res;
+}
+
+static void destroy_stat_cgroup(void)
+{
+	char buf[PATH_MAX];
+	int err;
+
+	close(env.memory_peak_fd);
+
+	if (env.orig_cgroup[0]) {
+		snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.orig_cgroup);
+		err = write_one_line(buf, "%d\n", getpid());
+		if (err < 0)
+			log_errno("moving self to original cgroup %s\n", env.orig_cgroup);
+	}
+
+	if (env.stat_cgroup[0]) {
+		err = rmdir(env.stat_cgroup);
+		if (err < 0)
+			log_errno("deletion of cgroup %s", env.stat_cgroup);
+	}
+
+	env.memory_peak_fd = -1;
+	env.orig_cgroup[0] = 0;
+	env.stat_cgroup[0] = 0;
+}
+
+/*
+ * Creates a cgroup at /sys/fs/cgroup/veristat-accounting-<pid>,
+ * moves current process to this cgroup.
+ */
+static void create_stat_cgroup(void)
+{
+	char cgroup_fs_mount[4096];
+	char buf[4096];
+	int err;
+
+	env.memory_peak_fd = -1;
+
+	if (!output_stat_enabled(MEMORY_PEAK))
+		return;
+
+	err = scanf_one_line("/proc/self/mounts", 2, "%*s %4095s cgroup2 %s",
+			     cgroup_fs_mount, buf);
+	if (err != 2) {
+		if (err < 0)
+			log_errno("reading /proc/self/mounts");
+		else if (!env.quiet)
+			fprintf(stderr, "Can't find cgroupfs v2 mount point.\n");
+		goto err_out;
+	}
+
+	/* cgroup-v2.rst promises the line "0::<group>" for cgroups v2 */
+	err = scanf_one_line("/proc/self/cgroup", 1, "0::%4095s", buf);
+	if (err != 1) {
+		if (err < 0)
+			log_errno("reading /proc/self/cgroup");
+		else if (!env.quiet)
+			fprintf(stderr, "Can't infer veristat process cgroup.");
+		goto err_out;
+	}
+
+	snprintf(env.orig_cgroup, sizeof(env.orig_cgroup), "%s/%s", cgroup_fs_mount, buf);
+
+	snprintf(buf, sizeof(buf), "%s/veristat-accounting-%d", cgroup_fs_mount, getpid());
+	err = mkdir(buf, 0777);
+	if (err < 0) {
+		log_errno("creation of cgroup %s", buf);
+		goto err_out;
+	}
+	strcpy(env.stat_cgroup, buf);
+
+	snprintf(buf, sizeof(buf), "%s/cgroup.procs", env.stat_cgroup);
+	err = write_one_line(buf, "%d\n", getpid());
+	if (err < 0) {
+		log_errno("entering cgroup %s", buf);
+		goto err_out;
+	}
+
+	snprintf(buf, sizeof(buf), "%s/memory.peak", env.stat_cgroup);
+	env.memory_peak_fd = open(buf, O_RDWR | O_APPEND);
+	if (env.memory_peak_fd < 0) {
+		log_errno("opening %s", buf);
+		goto err_out;
+	}
+
+	return;
+
+err_out:
+	if (!env.quiet)
+		fprintf(stderr, "Memory usage metric unavailable.\n");
+	destroy_stat_cgroup();
+}
+
+/* Current value of /sys/fs/cgroup/veristat-accounting-<pid>/memory.peak */
+static long cgroup_memory_peak(void)
+{
+	long err, memory_peak;
+	char buf[32];
+
+	if (env.memory_peak_fd < 0)
+		return -1;
+
+	err = pread(env.memory_peak_fd, buf, sizeof(buf) - 1, 0);
+	if (err <= 0) {
+		log_errno("pread(%s/memory.peak)", env.stat_cgroup);
+		return -1;
+	}
+
+	buf[err] = 0;
+	errno = 0;
+	memory_peak = strtoll(buf, NULL, 10);
+	if (errno) {
+		log_errno("%s/memory.peak:strtoll(%s)", env.stat_cgroup, buf);
+		return -1;
+	}
+
+	return memory_peak;
+}
+
+static int reset_stat_cgroup(void)
+{
+	char buf[] = "r\n";
+	int err;
+
+	if (env.memory_peak_fd < 0)
+		return -1;
+
+	err = pwrite(env.memory_peak_fd, buf, sizeof(buf), 0);
+	if (err <= 0) {
+		log_errno("pwrite(%s/memory.peak)", env.stat_cgroup);
+		return -1;
+	}
+	return 0;
+}
+
+static int parse_rvalue(const char *val, struct rvalue *rvalue)
+{
+	long long value;
+	char *val_end;
+
+	if (val[0] == '-' || isdigit(val[0])) {
+		/* must be a number */
+		errno = 0;
+		value = strtoll(val, &val_end, 0);
+		if (errno == ERANGE) {
+			errno = 0;
+			value = strtoull(val, &val_end, 0);
+		}
+		if (errno || *val_end != '\0') {
+			fprintf(stderr, "Failed to parse value '%s'\n", val);
+			return -EINVAL;
+		}
+		rvalue->ivalue = value;
+		rvalue->type = INTEGRAL;
+	} else {
+		/* if not a number, consider it enum value */
+		rvalue->svalue = strdup(val);
+		if (!rvalue->svalue)
+			return -ENOMEM;
+		rvalue->type = ENUMERATOR;
+	}
+	return 0;
+}
+
+static void dump(__u32 prog_id, enum dump_mode mode, const char *file_name, const char *prog_name)
+{
+	char command[64], buf[4096];
+	FILE *fp;
+	int status;
+
+	status = system("command -v bpftool > /dev/null 2>&1");
+	if (status != 0) {
+		fprintf(stderr, "bpftool is not available, can't print program dump\n");
+		return;
+	}
+	snprintf(command, sizeof(command), "bpftool prog dump %s id %u",
+		 mode == DUMP_JITED ? "jited" : "xlated", prog_id);
+	fp = popen(command, "r");
+	if (!fp) {
+		fprintf(stderr, "bpftool failed with error: %d\n", errno);
+		return;
+	}
+
+	printf("DUMP (%s) %s/%s:\n", mode == DUMP_JITED ? "JITED" : "XLATED", file_name, prog_name);
+	while (fgets(buf, sizeof(buf), fp))
+		fputs(buf, stdout);
+	fprintf(stdout, "\n");
+
+	if (ferror(fp))
+		fprintf(stderr, "Failed to dump BPF prog with error: %d\n", errno);
+
+	pclose(fp);
+}
+
 static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
 {
 	const char *base_filename = basename(strdupa(filename));
 	const char *prog_name = bpf_program__name(prog);
+	long mem_peak_a, mem_peak_b, mem_peak = -1;
 	char *buf;
 	int buf_sz, log_level;
 	struct verif_stats *stats;
-	int err = 0;
+	struct bpf_prog_info info;
+	__u32 info_len = sizeof(info);
+	int err = 0, cgroup_err;
 	void *tmp;
+	int fd;
 
 	if (!should_process_file_prog(base_filename, bpf_program__name(prog))) {
 		env.progs_skipped++;
@@ -1190,12 +1658,35 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 	if (env.force_reg_invariants)
 		bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
 
-	err = bpf_object__load(obj);
+	err = bpf_object__prepare(obj);
+	if (!err) {
+		cgroup_err = reset_stat_cgroup();
+		mem_peak_a = cgroup_memory_peak();
+		err = bpf_object__load(obj);
+		mem_peak_b = cgroup_memory_peak();
+		if (!cgroup_err && mem_peak_a >= 0 && mem_peak_b >= 0)
+			mem_peak = mem_peak_b - mem_peak_a;
+	}
 	env.progs_processed++;
 
 	stats->file_name = strdup(base_filename);
 	stats->prog_name = strdup(bpf_program__name(prog));
 	stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
+	stats->stats[SIZE] = bpf_program__insn_cnt(prog);
+	stats->stats[PROG_TYPE] = bpf_program__type(prog);
+	stats->stats[ATTACH_TYPE] = bpf_program__expected_attach_type(prog);
+	stats->stats[MEMORY_PEAK] = mem_peak < 0 ? -1 : mem_peak / (1024 * 1024);
+
+	memset(&info, 0, info_len);
+	fd = bpf_program__fd(prog);
+	if (fd > 0 && bpf_prog_get_info_by_fd(fd, &info, &info_len) == 0) {
+		stats->stats[JITED_SIZE] = info.jited_prog_len;
+		if (env.dump_mode & DUMP_JITED)
+			dump(info.id, DUMP_JITED, base_filename, prog_name);
+		if (env.dump_mode & DUMP_XLATED)
+			dump(info.id, DUMP_XLATED, base_filename, prog_name);
+	}
+
 	parse_verif_log(buf, buf_sz, stats);
 
 	if (env.verbose) {
@@ -1210,7 +1701,483 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 		free(buf);
 
 	return 0;
-};
+}
+
+static int append_preset_atom(struct var_preset *preset, char *value, bool is_index)
+{
+	struct field_access *tmp;
+	int i = preset->atom_count;
+	int err;
+
+	tmp = reallocarray(preset->atoms, i + 1, sizeof(*preset->atoms));
+	if (!tmp)
+		return -ENOMEM;
+
+	preset->atoms = tmp;
+	preset->atom_count++;
+
+	if (is_index) {
+		preset->atoms[i].type = ARRAY_INDEX;
+		err = parse_rvalue(value, &preset->atoms[i].index);
+		if (err)
+			return err;
+	} else {
+		preset->atoms[i].type = FIELD_NAME;
+		preset->atoms[i].name = strdup(value);
+		if (!preset->atoms[i].name)
+			return -ENOMEM;
+	}
+	return 0;
+}
+
+static int parse_var_atoms(const char *full_var, struct var_preset *preset)
+{
+	char expr[256], var[256], *name, *saveptr;
+	int n, len, off, err;
+
+	snprintf(expr, sizeof(expr), "%s", full_var);
+	preset->atom_count = 0;
+	while ((name = strtok_r(preset->atom_count ? NULL : expr, ".", &saveptr))) {
+		len = strlen(name);
+		/* parse variable name */
+		if (sscanf(name, "%[a-zA-Z0-9_] %n", var, &off) != 1) {
+			fprintf(stderr, "Can't parse %s\n", name);
+			return -EINVAL;
+		}
+		err = append_preset_atom(preset, var, false);
+		if (err)
+			return err;
+
+		/* parse optional array indexes */
+		while (off < len) {
+			if (sscanf(name + off, " [ %[a-zA-Z0-9_] ] %n", var, &n) != 1) {
+				fprintf(stderr, "Can't parse %s as index\n", name + off);
+				return -EINVAL;
+			}
+			err = append_preset_atom(preset, var, true);
+			if (err)
+				return err;
+			off += n;
+		}
+	}
+	return 0;
+}
+
+static int append_var_preset(struct var_preset **presets, int *cnt, const char *expr)
+{
+	void *tmp;
+	struct var_preset *cur;
+	char var[256], val[256];
+	int n, err;
+
+	tmp = realloc(*presets, (*cnt + 1) * sizeof(**presets));
+	if (!tmp)
+		return -ENOMEM;
+	*presets = tmp;
+	cur = &(*presets)[*cnt];
+	memset(cur, 0, sizeof(*cur));
+	(*cnt)++;
+
+	if (sscanf(expr, " %[][a-zA-Z0-9_. ] = %s %n", var, val, &n) != 2 || n != strlen(expr)) {
+		fprintf(stderr, "Failed to parse expression '%s'\n", expr);
+		return -EINVAL;
+	}
+	/* Remove trailing spaces from var, as scanf may add those */
+	rtrim(var);
+
+	err = parse_rvalue(val, &cur->value);
+	if (err)
+		return err;
+
+	cur->full_name = strdup(var);
+	if (!cur->full_name)
+		return -ENOMEM;
+
+	err = parse_var_atoms(var, cur);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+static int append_var_preset_file(const char *filename)
+{
+	char buf[1024];
+	FILE *f;
+	int err = 0;
+
+	f = fopen(filename, "rt");
+	if (!f) {
+		err = -errno;
+		fprintf(stderr, "Failed to open presets in '%s': %s\n", filename, strerror(-err));
+		return -EINVAL;
+	}
+
+	while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
+		if (buf[0] == '\0' || buf[0] == '#')
+			continue;
+
+		err = append_var_preset(&env.presets, &env.npresets, buf);
+		if (err)
+			goto cleanup;
+	}
+
+cleanup:
+	fclose(f);
+	return err;
+}
+
+static bool is_signed_type(const struct btf_type *t)
+{
+	if (btf_is_int(t))
+		return btf_int_encoding(t) & BTF_INT_SIGNED;
+	if (btf_is_any_enum(t))
+		return btf_kflag(t);
+	return true;
+}
+
+static int enum_value_from_name(const struct btf *btf, const struct btf_type *t,
+				const char *evalue, long long *retval)
+{
+	if (btf_is_enum(t)) {
+		struct btf_enum *e = btf_enum(t);
+		int i, n = btf_vlen(t);
+
+		for (i = 0; i < n; ++i, ++e) {
+			const char *cur_name = btf__name_by_offset(btf, e->name_off);
+
+			if (strcmp(cur_name, evalue) == 0) {
+				*retval = e->val;
+				return 0;
+			}
+		}
+	} else if (btf_is_enum64(t)) {
+		struct btf_enum64 *e = btf_enum64(t);
+		int i, n = btf_vlen(t);
+
+		for (i = 0; i < n; ++i, ++e) {
+			const char *cur_name = btf__name_by_offset(btf, e->name_off);
+			__u64 value =  btf_enum64_value(e);
+
+			if (strcmp(cur_name, evalue) == 0) {
+				*retval = value;
+				return 0;
+			}
+		}
+	}
+	return -EINVAL;
+}
+
+static bool is_preset_supported(const struct btf_type *t)
+{
+	return btf_is_int(t) || btf_is_enum(t) || btf_is_enum64(t);
+}
+
+static int find_enum_value(const struct btf *btf, const char *name, long long *value)
+{
+	const struct btf_type *t;
+	int cnt, i;
+	long long lvalue;
+
+	cnt = btf__type_cnt(btf);
+	for (i = 1; i != cnt; ++i) {
+		t = btf__type_by_id(btf, i);
+
+		if (!btf_is_any_enum(t))
+			continue;
+
+		if (enum_value_from_name(btf, t, name, &lvalue) == 0) {
+			*value = lvalue;
+			return 0;
+		}
+	}
+	return -ESRCH;
+}
+
+static int resolve_rvalue(struct btf *btf, const struct rvalue *rvalue, long long *result)
+{
+	int err = 0;
+
+	switch (rvalue->type) {
+	case INTEGRAL:
+		*result = rvalue->ivalue;
+		return 0;
+	case ENUMERATOR:
+		err = find_enum_value(btf, rvalue->svalue, result);
+		if (err) {
+			fprintf(stderr, "Can't resolve enum value %s\n", rvalue->svalue);
+			return err;
+		}
+		return 0;
+	default:
+		fprintf(stderr, "Unknown rvalue type\n");
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+static int adjust_var_secinfo_array(struct btf *btf, int tid, struct field_access *atom,
+				    const char *array_name, struct btf_var_secinfo *sinfo)
+{
+	const struct btf_type *t;
+	struct btf_array *barr;
+	long long idx;
+	int err;
+
+	tid = btf__resolve_type(btf, tid);
+	t = btf__type_by_id(btf, tid);
+	if (!btf_is_array(t)) {
+		fprintf(stderr, "Array index is not expected for %s\n",
+			array_name);
+		return -EINVAL;
+	}
+	barr = btf_array(t);
+	err = resolve_rvalue(btf, &atom->index, &idx);
+	if (err)
+		return err;
+	if (idx < 0 || idx >= barr->nelems) {
+		fprintf(stderr, "Array index %lld is out of bounds [0, %u): %s\n",
+			idx, barr->nelems, array_name);
+		return -EINVAL;
+	}
+	sinfo->size = btf__resolve_size(btf, barr->type);
+	sinfo->offset += sinfo->size * idx;
+	sinfo->type = btf__resolve_type(btf, barr->type);
+	return 0;
+}
+
+static int adjust_var_secinfo_member(const struct btf *btf,
+				     const struct btf_type *parent_type,
+				     __u32 parent_offset,
+				     const char *member_name,
+				     struct btf_var_secinfo *sinfo)
+{
+	int i;
+
+	if (!btf_is_composite(parent_type)) {
+		fprintf(stderr, "Can't resolve field %s for non-composite type\n", member_name);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < btf_vlen(parent_type); ++i) {
+		const struct btf_member *member;
+		const struct btf_type *member_type;
+		int tid, off;
+
+		member = btf_members(parent_type) + i;
+		tid =  btf__resolve_type(btf, member->type);
+		if (tid < 0)
+			return -EINVAL;
+
+		member_type = btf__type_by_id(btf, tid);
+		off = parent_offset + member->offset;
+		if (member->name_off) {
+			const char *name = btf__name_by_offset(btf, member->name_off);
+
+			if (strcmp(member_name, name) == 0) {
+				if (btf_member_bitfield_size(parent_type, i) != 0) {
+					fprintf(stderr, "Bitfield presets are not supported %s\n",
+						name);
+					return -EINVAL;
+				}
+				sinfo->offset += off / 8;
+				sinfo->type = tid;
+				sinfo->size = member_type->size;
+				return 0;
+			}
+		} else if (btf_is_composite(member_type)) {
+			int err;
+
+			err = adjust_var_secinfo_member(btf, member_type, off,
+							member_name, sinfo);
+			if (!err)
+				return 0;
+		}
+	}
+
+	return -ESRCH;
+}
+
+static int adjust_var_secinfo(struct btf *btf, const struct btf_type *t,
+			      struct btf_var_secinfo *sinfo, struct var_preset *preset)
+{
+	const struct btf_type *base_type;
+	const char *prev_name;
+	int err, i;
+	int tid;
+
+	assert(preset->atom_count > 0);
+	assert(preset->atoms[0].type == FIELD_NAME);
+
+	tid = btf__resolve_type(btf, t->type);
+	base_type = btf__type_by_id(btf, tid);
+	prev_name = preset->atoms[0].name;
+
+	for (i = 1; i < preset->atom_count; ++i) {
+		struct field_access *atom = preset->atoms + i;
+
+		switch (atom->type) {
+		case ARRAY_INDEX:
+			err = adjust_var_secinfo_array(btf, tid, atom, prev_name, sinfo);
+			break;
+		case FIELD_NAME:
+			err = adjust_var_secinfo_member(btf, base_type, 0, atom->name, sinfo);
+			if (err == -ESRCH)
+				fprintf(stderr, "Can't find '%s'\n", atom->name);
+			prev_name = atom->name;
+			break;
+		default:
+			fprintf(stderr, "Unknown field_access type\n");
+			return -EOPNOTSUPP;
+		}
+		if (err)
+			return err;
+		base_type = btf__type_by_id(btf, sinfo->type);
+		tid = sinfo->type;
+	}
+
+	return 0;
+}
+
+static int set_global_var(struct bpf_object *obj, struct btf *btf,
+			  struct bpf_map *map, struct btf_var_secinfo *sinfo,
+			  struct var_preset *preset)
+{
+	const struct btf_type *base_type;
+	void *ptr;
+	long long value = preset->value.ivalue;
+	size_t size;
+
+	base_type = btf__type_by_id(btf, btf__resolve_type(btf, sinfo->type));
+	if (!base_type) {
+		fprintf(stderr, "Failed to resolve type %d\n", sinfo->type);
+		return -EINVAL;
+	}
+	if (!is_preset_supported(base_type)) {
+		fprintf(stderr, "Can't set %s. Only ints and enums are supported\n",
+			preset->full_name);
+		return -EINVAL;
+	}
+
+	if (preset->value.type == ENUMERATOR) {
+		if (btf_is_any_enum(base_type)) {
+			if (enum_value_from_name(btf, base_type, preset->value.svalue, &value)) {
+				fprintf(stderr,
+					"Failed to find integer value for enum element %s\n",
+					preset->value.svalue);
+				return -EINVAL;
+			}
+		} else {
+			fprintf(stderr, "Value %s is not supported for type %s\n",
+				preset->value.svalue,
+				btf__name_by_offset(btf, base_type->name_off));
+			return -EINVAL;
+		}
+	}
+
+	/* Check if value fits into the target variable size */
+	if  (sinfo->size < sizeof(value)) {
+		bool is_signed = is_signed_type(base_type);
+		__u32 unsigned_bits = sinfo->size * 8 - (is_signed ? 1 : 0);
+		long long max_val = 1ll << unsigned_bits;
+
+		if (value >= max_val || value < -max_val) {
+			fprintf(stderr,
+				"Variable %s value %lld is out of range [%lld; %lld]\n",
+				btf__name_by_offset(btf, base_type->name_off), value,
+				is_signed ? -max_val : 0, max_val - 1);
+			return -EINVAL;
+		}
+	}
+
+	ptr = bpf_map__initial_value(map, &size);
+	if (!ptr || sinfo->offset + sinfo->size > size)
+		return -EINVAL;
+
+	if (__BYTE_ORDER == __LITTLE_ENDIAN) {
+		memcpy(ptr + sinfo->offset, &value, sinfo->size);
+	} else { /* __BYTE_ORDER == __BIG_ENDIAN */
+		__u8 src_offset = sizeof(value) - sinfo->size;
+
+		memcpy(ptr + sinfo->offset, (void *)&value + src_offset, sinfo->size);
+	}
+	return 0;
+}
+
+static int set_global_vars(struct bpf_object *obj, struct var_preset *presets, int npresets)
+{
+	struct btf_var_secinfo *sinfo;
+	const char *sec_name;
+	const struct btf_type *t;
+	struct bpf_map *map;
+	struct btf *btf;
+	int i, j, k, n, cnt, err = 0;
+
+	if (npresets == 0)
+		return 0;
+
+	btf = bpf_object__btf(obj);
+	if (!btf)
+		return -EINVAL;
+
+	cnt = btf__type_cnt(btf);
+	for (i = 1; i != cnt; ++i) {
+		t = btf__type_by_id(btf, i);
+
+		if (!btf_is_datasec(t))
+			continue;
+
+		sinfo = btf_var_secinfos(t);
+		sec_name = btf__name_by_offset(btf, t->name_off);
+		map = bpf_object__find_map_by_name(obj, sec_name);
+		if (!map)
+			continue;
+
+		n = btf_vlen(t);
+		for (j = 0; j < n; ++j, ++sinfo) {
+			const struct btf_type *var_type = btf__type_by_id(btf, sinfo->type);
+			const char *var_name;
+
+			if (!btf_is_var(var_type))
+				continue;
+
+			var_name = btf__name_by_offset(btf, var_type->name_off);
+
+			for (k = 0; k < npresets; ++k) {
+				struct btf_var_secinfo tmp_sinfo;
+
+				if (strcmp(var_name, presets[k].atoms[0].name) != 0)
+					continue;
+
+				if (presets[k].applied) {
+					fprintf(stderr, "Variable %s is set more than once",
+						var_name);
+					return -EINVAL;
+				}
+				tmp_sinfo = *sinfo;
+				err = adjust_var_secinfo(btf, var_type,
+							 &tmp_sinfo, presets + k);
+				if (err)
+					return err;
+
+				err = set_global_var(obj, btf, map, &tmp_sinfo, presets + k);
+				if (err)
+					return err;
+
+				presets[k].applied = true;
+			}
+		}
+	}
+	for (i = 0; i < npresets; ++i) {
+		if (!presets[i].applied) {
+			fprintf(stderr, "Global variable preset %s has not been applied\n",
+				presets[i].full_name);
+			err = -EINVAL;
+		}
+		presets[i].applied = false;
+	}
+	return err;
+}
 
 static int process_obj(const char *filename)
 {
@@ -1261,6 +2228,11 @@ static int process_obj(const char *filename)
 	if (prog_cnt == 1) {
 		prog = bpf_object__next_program(obj, NULL);
 		bpf_program__set_autoload(prog, true);
+		err = set_global_vars(obj, env.presets, env.npresets);
+		if (err) {
+			fprintf(stderr, "Failed to set global variables %d\n", err);
+			goto cleanup;
+		}
 		process_prog(filename, obj, prog);
 		goto cleanup;
 	}
@@ -1275,6 +2247,12 @@ static int process_obj(const char *filename)
 			goto cleanup;
 		}
 
+		err = set_global_vars(tobj, env.presets, env.npresets);
+		if (err) {
+			fprintf(stderr, "Failed to set global variables %d\n", err);
+			goto cleanup;
+		}
+
 		lprog = NULL;
 		bpf_object__for_each_program(tprog, tobj) {
 			const char *tprog_name = bpf_program__name(tprog);
@@ -1309,12 +2287,18 @@ static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
 	case PROG_NAME:
 		cmp = strcmp(s1->prog_name, s2->prog_name);
 		break;
+	case ATTACH_TYPE:
+	case PROG_TYPE:
+	case SIZE:
+	case JITED_SIZE:
+	case STACK:
 	case VERDICT:
 	case DURATION:
 	case TOTAL_INSNS:
 	case TOTAL_STATES:
 	case PEAK_STATES:
 	case MAX_STATES_PER_INSN:
+	case MEMORY_PEAK:
 	case MARK_READ_MAX_LEN: {
 		long v1 = s1->stats[id];
 		long v2 = s2->stats[id];
@@ -1523,12 +2507,28 @@ static void prepare_value(const struct verif_stats *s, enum stat_id id,
 		else
 			*str = s->stats[VERDICT] ? "success" : "failure";
 		break;
+	case ATTACH_TYPE:
+		if (!s)
+			*str = "N/A";
+		else
+			*str = libbpf_bpf_attach_type_str(s->stats[ATTACH_TYPE]) ?: "N/A";
+		break;
+	case PROG_TYPE:
+		if (!s)
+			*str = "N/A";
+		else
+			*str = libbpf_bpf_prog_type_str(s->stats[PROG_TYPE]) ?: "N/A";
+		break;
 	case DURATION:
 	case TOTAL_INSNS:
 	case TOTAL_STATES:
 	case PEAK_STATES:
 	case MAX_STATES_PER_INSN:
 	case MARK_READ_MAX_LEN:
+	case STACK:
+	case SIZE:
+	case JITED_SIZE:
+	case MEMORY_PEAK:
 		*val = s ? s->stats[id] : 0;
 		break;
 	default:
@@ -1612,7 +2612,11 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats
 	case TOTAL_STATES:
 	case PEAK_STATES:
 	case MAX_STATES_PER_INSN:
-	case MARK_READ_MAX_LEN: {
+	case MARK_READ_MAX_LEN:
+	case SIZE:
+	case JITED_SIZE:
+	case MEMORY_PEAK:
+	case STACK: {
 		long val;
 		int err, n;
 
@@ -1625,6 +2629,42 @@ static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats
 		st->stats[id] = val;
 		break;
 	}
+	case PROG_TYPE: {
+		enum bpf_prog_type prog_type = 0;
+		const char *type;
+
+		while ((type = libbpf_bpf_prog_type_str(prog_type)))  {
+			if (strcmp(type, str) == 0) {
+				st->stats[id] = prog_type;
+				break;
+			}
+			prog_type++;
+		}
+
+		if (!type) {
+			fprintf(stderr, "Unrecognized prog type %s\n", str);
+			return -EINVAL;
+		}
+		break;
+	}
+	case ATTACH_TYPE: {
+		enum bpf_attach_type attach_type = 0;
+		const char *type;
+
+		while ((type = libbpf_bpf_attach_type_str(attach_type)))  {
+			if (strcmp(type, str) == 0) {
+				st->stats[id] = attach_type;
+				break;
+			}
+			attach_type++;
+		}
+
+		if (!type) {
+			fprintf(stderr, "Unrecognized attach type %s\n", str);
+			return -EINVAL;
+		}
+		break;
+	}
 	default:
 		fprintf(stderr, "Unrecognized stat #%d\n", id);
 		return -EINVAL;
@@ -2213,7 +3253,7 @@ static void output_prog_stats(void)
 
 static int handle_verif_mode(void)
 {
-	int i, err;
+	int i, err = 0;
 
 	if (env.filename_cnt == 0) {
 		fprintf(stderr, "Please provide path to BPF object file!\n\n");
@@ -2221,11 +3261,12 @@ static int handle_verif_mode(void)
 		return -EINVAL;
 	}
 
+	create_stat_cgroup();
 	for (i = 0; i < env.filename_cnt; i++) {
 		err = process_obj(env.filenames[i]);
 		if (err) {
 			fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
-			return err;
+			goto out;
 		}
 	}
 
@@ -2233,7 +3274,9 @@ static int handle_verif_mode(void)
 
 	output_prog_stats();
 
-	return 0;
+out:
+	destroy_stat_cgroup();
+	return err;
 }
 
 static int handle_replay_mode(void)
@@ -2263,7 +3306,7 @@ static int handle_replay_mode(void)
 
 int main(int argc, char **argv)
 {
-	int err = 0, i;
+	int err = 0, i, j;
 
 	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
 		return 1;
@@ -2321,5 +3364,21 @@ int main(int argc, char **argv)
 		free(env.deny_filters[i].prog_glob);
 	}
 	free(env.deny_filters);
+	for (i = 0; i < env.npresets; ++i) {
+		free(env.presets[i].full_name);
+		for (j = 0; j < env.presets[i].atom_count; ++j) {
+			switch (env.presets[i].atoms[j].type) {
+			case FIELD_NAME:
+				free(env.presets[i].atoms[j].name);
+				break;
+			case ARRAY_INDEX:
+				if (env.presets[i].atoms[j].index.type == ENUMERATOR)
+					free(env.presets[i].atoms[j].index.svalue);
+				break;
+			}
+		}
+		free(env.presets[i].atoms);
+	}
+	free(env.presets);
 	return -err;
 }
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 79505d294c44..2f869daf8a06 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -43,6 +43,15 @@ riscv64)
 	BZIMAGE="arch/riscv/boot/Image"
 	ARCH="riscv"
 	;;
+ppc64el)
+	QEMU_BINARY=qemu-system-ppc64
+	QEMU_CONSOLE="hvc0"
+	# KVM could not be tested for powerpc, therefore not enabled for now.
+	HOST_FLAGS=(-machine pseries -cpu POWER9)
+	CROSS_FLAGS=(-machine pseries -cpu POWER9)
+	BZIMAGE="vmlinux"
+	ARCH="powerpc"
+	;;
 *)
 	echo "Unsupported architecture"
 	exit 1
diff --git a/tools/testing/selftests/bpf/with_addr.sh b/tools/testing/selftests/bpf/with_addr.sh
deleted file mode 100755
index ffcd3953f94c..000000000000
--- a/tools/testing/selftests/bpf/with_addr.sh
+++ /dev/null
@@ -1,54 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# add private ipv4 and ipv6 addresses to loopback
-
-readonly V6_INNER='100::a/128'
-readonly V4_INNER='192.168.0.1/32'
-
-if getopts ":s" opt; then
-  readonly SIT_DEV_NAME='sixtofourtest0'
-  readonly V6_SIT='2::/64'
-  readonly V4_SIT='172.17.0.1/32'
-  shift
-fi
-
-fail() {
-  echo "error: $*" 1>&2
-  exit 1
-}
-
-setup() {
-  ip -6 addr add "${V6_INNER}" dev lo || fail 'failed to setup v6 address'
-  ip -4 addr add "${V4_INNER}" dev lo || fail 'failed to setup v4 address'
-
-  if [[ -n "${V6_SIT}" ]]; then
-    ip link add "${SIT_DEV_NAME}" type sit remote any local any \
-	    || fail 'failed to add sit'
-    ip link set dev "${SIT_DEV_NAME}" up \
-	    || fail 'failed to bring sit device up'
-    ip -6 addr add "${V6_SIT}" dev "${SIT_DEV_NAME}" \
-	    || fail 'failed to setup v6 SIT address'
-    ip -4 addr add "${V4_SIT}" dev "${SIT_DEV_NAME}" \
-	    || fail 'failed to setup v4 SIT address'
-  fi
-
-  sleep 2	# avoid race causing bind to fail
-}
-
-cleanup() {
-  if [[ -n "${V6_SIT}" ]]; then
-    ip -4 addr del "${V4_SIT}" dev "${SIT_DEV_NAME}"
-    ip -6 addr del "${V6_SIT}" dev "${SIT_DEV_NAME}"
-    ip link del "${SIT_DEV_NAME}"
-  fi
-
-  ip -4 addr del "${V4_INNER}" dev lo
-  ip -6 addr del "${V6_INNER}" dev lo
-}
-
-trap cleanup EXIT
-
-setup
-"$@"
-exit "$?"
diff --git a/tools/testing/selftests/bpf/with_tunnels.sh b/tools/testing/selftests/bpf/with_tunnels.sh
deleted file mode 100755
index e24949ed3a20..000000000000
--- a/tools/testing/selftests/bpf/with_tunnels.sh
+++ /dev/null
@@ -1,36 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# setup tunnels for flow dissection test
-
-readonly SUFFIX="test_$(mktemp -u XXXX)"
-CONFIG="remote 127.0.0.2 local 127.0.0.1 dev lo"
-
-setup() {
-  ip link add "ipip_${SUFFIX}" type ipip ${CONFIG}
-  ip link add "gre_${SUFFIX}" type gre ${CONFIG}
-  ip link add "sit_${SUFFIX}" type sit ${CONFIG}
-
-  echo "tunnels before test:"
-  ip tunnel show
-
-  ip link set "ipip_${SUFFIX}" up
-  ip link set "gre_${SUFFIX}" up
-  ip link set "sit_${SUFFIX}" up
-}
-
-
-cleanup() {
-  ip tunnel del "ipip_${SUFFIX}"
-  ip tunnel del "gre_${SUFFIX}"
-  ip tunnel del "sit_${SUFFIX}"
-
-  echo "tunnels after test:"
-  ip tunnel show
-}
-
-trap cleanup EXIT
-
-setup
-"$@"
-exit "$?"
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 6f9956eed797..3d8de0d4c96a 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -13,6 +13,7 @@
  * - UDP 9091 packets trigger TX reply
  * - TX HW timestamp is requested and reported back upon completion
  * - TX checksum is requested
+ * - TX launch time HW offload is requested for transmission
  */
 
 #include <test_progs.h>
@@ -27,7 +28,7 @@
 #include <linux/errqueue.h>
 #include <linux/if_link.h>
 #include <linux/net_tstamp.h>
-#include <linux/udp.h>
+#include <netinet/udp.h>
 #include <linux/sockios.h>
 #include <linux/if_xdp.h>
 #include <sys/mman.h>
@@ -37,6 +38,15 @@
 #include <time.h>
 #include <unistd.h>
 #include <libgen.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/pkt_sched.h>
+#include <linux/pkt_cls.h>
+#include <linux/ethtool.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
 
 #include "xdp_metadata.h"
 
@@ -64,6 +74,18 @@ int rxq;
 bool skip_tx;
 __u64 last_hw_rx_timestamp;
 __u64 last_xdp_rx_timestamp;
+__u64 last_launch_time;
+__u64 launch_time_delta_to_hw_rx_timestamp;
+int launch_time_queue;
+
+#define run_command(cmd, ...)					\
+({								\
+	char command[1024];					\
+	memset(command, 0, sizeof(command));			\
+	snprintf(command, sizeof(command), cmd, ##__VA_ARGS__);	\
+	fprintf(stderr, "Running: %s\n", command);		\
+	system(command);					\
+})
 
 void test__fail(void) { /* for network_helpers.c */ }
 
@@ -79,7 +101,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
 		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
-		.flags = XSK_UMEM__DEFAULT_FLAGS,
+		.flags = XDP_UMEM_TX_METADATA_LEN,
 		.tx_metadata_len = sizeof(struct xsk_tx_metadata),
 	};
 	__u32 idx = 0;
@@ -298,6 +320,12 @@ static bool complete_tx(struct xsk *xsk, clockid_t clock_id)
 	if (meta->completion.tx_timestamp) {
 		__u64 ref_tstamp = gettime(clock_id);
 
+		if (launch_time_delta_to_hw_rx_timestamp) {
+			print_tstamp_delta("HW Launch-time",
+					   "HW TX-complete-time",
+					   last_launch_time,
+					   meta->completion.tx_timestamp);
+		}
 		print_tstamp_delta("HW TX-complete-time", "User TX-complete-time",
 				   meta->completion.tx_timestamp, ref_tstamp);
 		print_tstamp_delta("XDP RX-time", "User TX-complete-time",
@@ -395,6 +423,17 @@ static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id)
 	       xsk, ntohs(udph->check), ntohs(want_csum),
 	       meta->request.csum_start, meta->request.csum_offset);
 
+	/* Set the value of launch time */
+	if (launch_time_delta_to_hw_rx_timestamp) {
+		meta->flags |= XDP_TXMD_FLAGS_LAUNCH_TIME;
+		meta->request.launch_time = last_hw_rx_timestamp +
+					    launch_time_delta_to_hw_rx_timestamp;
+		last_launch_time = meta->request.launch_time;
+		print_tstamp_delta("HW RX-time", "HW Launch-time",
+				   last_hw_rx_timestamp,
+				   meta->request.launch_time);
+	}
+
 	memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */
 	tx_desc->options |= XDP_TX_METADATA;
 	tx_desc->len = len;
@@ -407,6 +446,7 @@ static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t
 	const struct xdp_desc *rx_desc;
 	struct pollfd fds[rxq + 1];
 	__u64 comp_addr;
+	__u64 deadline;
 	__u64 addr;
 	__u32 idx = 0;
 	int ret;
@@ -477,9 +517,15 @@ peek:
 					if (ret)
 						printf("kick_tx ret=%d\n", ret);
 
-					for (int j = 0; j < 500; j++) {
+					/* wait 1 second + cover launch time */
+					deadline = gettime(clock_id) +
+						   NANOSEC_PER_SEC +
+						   launch_time_delta_to_hw_rx_timestamp;
+					while (true) {
 						if (complete_tx(xsk, clock_id))
 							break;
+						if (gettime(clock_id) >= deadline)
+							break;
 						usleep(10);
 					}
 				}
@@ -551,6 +597,7 @@ static void hwtstamp_enable(const char *ifname)
 {
 	struct hwtstamp_config cfg = {
 		.rx_filter = HWTSTAMP_FILTER_ALL,
+		.tx_type = HWTSTAMP_TX_ON,
 	};
 
 	hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg);
@@ -607,6 +654,10 @@ static void print_usage(void)
 		"  -h    Display this help and exit\n\n"
 		"  -m    Enable multi-buffer XDP for larger MTU\n"
 		"  -r    Don't generate AF_XDP reply (rx metadata only)\n"
+		"  -l    Delta of launch time relative to HW RX-time in ns\n"
+		"        default: 0 ns (launch time request is disabled)\n"
+		"  -L    Tx Queue to be enabled with launch time offload\n"
+		"        default: 0 (Tx Queue 0)\n"
 		"Generate test packets on the other machine with:\n"
 		"  echo -n xdp | nc -u -q1 <dst_ip> 9091\n";
 
@@ -617,7 +668,7 @@ static void read_args(int argc, char *argv[])
 {
 	int opt;
 
-	while ((opt = getopt(argc, argv, "chmr")) != -1) {
+	while ((opt = getopt(argc, argv, "chmrl:L:")) != -1) {
 		switch (opt) {
 		case 'c':
 			bind_flags &= ~XDP_USE_NEED_WAKEUP;
@@ -633,6 +684,12 @@ static void read_args(int argc, char *argv[])
 		case 'r':
 			skip_tx = true;
 			break;
+		case 'l':
+			launch_time_delta_to_hw_rx_timestamp = atoll(optarg);
+			break;
+		case 'L':
+			launch_time_queue = atoll(optarg);
+			break;
 		case '?':
 			if (isprint(optopt))
 				fprintf(stderr, "Unknown option: -%c\n", optopt);
@@ -656,23 +713,118 @@ static void read_args(int argc, char *argv[])
 		error(-1, errno, "Invalid interface name");
 }
 
+void clean_existing_configurations(void)
+{
+	/* Check and delete root qdisc if exists */
+	if (run_command("sudo tc qdisc show dev %s | grep -q 'qdisc mqprio 8001:'", ifname) == 0)
+		run_command("sudo tc qdisc del dev %s root", ifname);
+
+	/* Check and delete ingress qdisc if exists */
+	if (run_command("sudo tc qdisc show dev %s | grep -q 'qdisc ingress ffff:'", ifname) == 0)
+		run_command("sudo tc qdisc del dev %s ingress", ifname);
+
+	/* Check and delete ethtool filters if any exist */
+	if (run_command("sudo ethtool -n %s | grep -q 'Filter:'", ifname) == 0) {
+		run_command("sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 sudo ethtool -N %s delete >&2",
+			    ifname, ifname);
+	}
+}
+
+#define MAX_TC 16
+
 int main(int argc, char *argv[])
 {
 	clockid_t clock_id = CLOCK_TAI;
+	struct bpf_program *prog;
 	int server_fd = -1;
+	size_t map_len = 0;
+	size_t que_len = 0;
+	char *buf = NULL;
+	char *map = NULL;
+	char *que = NULL;
+	char *tmp = NULL;
+	int tc = 0;
 	int ret;
 	int i;
 
-	struct bpf_program *prog;
-
 	read_args(argc, argv);
 
 	rxq = rxq_num(ifname);
-
 	printf("rxq: %d\n", rxq);
 
+	if (launch_time_queue >= rxq || launch_time_queue < 0)
+		error(1, 0, "Invalid launch_time_queue.");
+
+	clean_existing_configurations();
+	sleep(1);
+
+	/* Enable tx and rx hardware timestamping */
 	hwtstamp_enable(ifname);
 
+	/* Prepare priority to traffic class map for tc-mqprio */
+	for (i = 0; i < MAX_TC; i++) {
+		if (i < rxq)
+			tc = i;
+
+		if (asprintf(&buf, "%d ", tc) == -1) {
+			printf("Failed to malloc buf for tc map.\n");
+			goto free_mem;
+		}
+
+		map_len += strlen(buf);
+		tmp = realloc(map, map_len + 1);
+		if (!tmp) {
+			printf("Failed to realloc tc map.\n");
+			goto free_mem;
+		}
+		map = tmp;
+		strcat(map, buf);
+		free(buf);
+		buf = NULL;
+	}
+
+	/* Prepare traffic class to hardware queue map for tc-mqprio */
+	for (i = 0; i <= tc; i++) {
+		if (asprintf(&buf, "1@%d ", i) == -1) {
+			printf("Failed to malloc buf for tc queues.\n");
+			goto free_mem;
+		}
+
+		que_len += strlen(buf);
+		tmp = realloc(que, que_len + 1);
+		if (!tmp) {
+			printf("Failed to realloc tc queues.\n");
+			goto free_mem;
+		}
+		que = tmp;
+		strcat(que, buf);
+		free(buf);
+		buf = NULL;
+	}
+
+	/* Add mqprio qdisc */
+	run_command("sudo tc qdisc add dev %s handle 8001: parent root mqprio num_tc %d map %squeues %shw 0",
+		    ifname, tc + 1, map, que);
+
+	/* To test launch time, send UDP packet with VLAN priority 1 to port 9091 */
+	if (launch_time_delta_to_hw_rx_timestamp) {
+		/* Enable launch time hardware offload on launch_time_queue */
+		run_command("sudo tc qdisc replace dev %s parent 8001:%d etf offload clockid CLOCK_TAI delta 500000",
+			    ifname, launch_time_queue + 1);
+		sleep(1);
+
+		/* Route incoming packet with VLAN priority 1 into launch_time_queue */
+		if (run_command("sudo ethtool -N %s flow-type ether vlan 0x2000 vlan-mask 0x1FFF action %d",
+				ifname, launch_time_queue)) {
+			run_command("sudo tc qdisc add dev %s ingress", ifname);
+			run_command("sudo tc filter add dev %s parent ffff: protocol 802.1Q flower vlan_prio 1 hw_tc %d",
+				    ifname, launch_time_queue);
+		}
+
+		/* Enable VLAN tag stripping offload */
+		run_command("sudo ethtool -K %s rxvlan on", ifname);
+	}
+
 	rx_xsk = malloc(sizeof(struct xsk) * rxq);
 	if (!rx_xsk)
 		error(1, ENOMEM, "malloc");
@@ -732,4 +884,11 @@ int main(int argc, char *argv[])
 	cleanup();
 	if (ret)
 		error(1, -ret, "verify_metadata");
+
+	clean_existing_configurations();
+
+free_mem:
+	free(buf);
+	free(map);
+	free(que);
 }
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
deleted file mode 100644
index c1fc44c87c30..000000000000
--- a/tools/testing/selftests/bpf/xdp_redirect_multi.c
+++ /dev/null
@@ -1,226 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
-#include <linux/if_link.h>
-#include <assert.h>
-#include <errno.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <net/if.h>
-#include <unistd.h>
-#include <libgen.h>
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <netinet/in.h>
-
-#include "bpf_util.h"
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#define MAX_IFACE_NUM 32
-#define MAX_INDEX_NUM 1024
-
-static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int ifaces[MAX_IFACE_NUM] = {};
-
-static void int_exit(int sig)
-{
-	__u32 prog_id = 0;
-	int i;
-
-	for (i = 0; ifaces[i] > 0; i++) {
-		if (bpf_xdp_query_id(ifaces[i], xdp_flags, &prog_id)) {
-			printf("bpf_xdp_query_id failed\n");
-			exit(1);
-		}
-		if (prog_id)
-			bpf_xdp_detach(ifaces[i], xdp_flags, NULL);
-	}
-
-	exit(0);
-}
-
-static int get_mac_addr(unsigned int ifindex, void *mac_addr)
-{
-	char ifname[IF_NAMESIZE];
-	struct ifreq ifr;
-	int fd, ret = -1;
-
-	fd = socket(AF_INET, SOCK_DGRAM, 0);
-	if (fd < 0)
-		return ret;
-
-	if (!if_indextoname(ifindex, ifname))
-		goto err_out;
-
-	strcpy(ifr.ifr_name, ifname);
-
-	if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
-		goto err_out;
-
-	memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
-	ret = 0;
-
-err_out:
-	close(fd);
-	return ret;
-}
-
-static void usage(const char *prog)
-{
-	fprintf(stderr,
-		"usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
-		"OPTS:\n"
-		"    -S    use skb-mode\n"
-		"    -N    enforce native mode\n"
-		"    -F    force loading prog\n"
-		"    -X    load xdp program on egress\n",
-		prog);
-}
-
-int main(int argc, char **argv)
-{
-	int prog_fd, group_all, mac_map;
-	struct bpf_program *ingress_prog, *egress_prog;
-	int i, err, ret, opt, egress_prog_fd = 0;
-	struct bpf_devmap_val devmap_val;
-	bool attach_egress_prog = false;
-	unsigned char mac_addr[6];
-	char ifname[IF_NAMESIZE];
-	struct bpf_object *obj;
-	unsigned int ifindex;
-	char filename[256];
-
-	while ((opt = getopt(argc, argv, "SNFX")) != -1) {
-		switch (opt) {
-		case 'S':
-			xdp_flags |= XDP_FLAGS_SKB_MODE;
-			break;
-		case 'N':
-			/* default, set below */
-			break;
-		case 'F':
-			xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
-			break;
-		case 'X':
-			attach_egress_prog = true;
-			break;
-		default:
-			usage(basename(argv[0]));
-			return 1;
-		}
-	}
-
-	if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
-		xdp_flags |= XDP_FLAGS_DRV_MODE;
-	} else if (attach_egress_prog) {
-		printf("Load xdp program on egress with SKB mode not supported yet\n");
-		goto err_out;
-	}
-
-	if (optind == argc) {
-		printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
-		goto err_out;
-	}
-
-	printf("Get interfaces:");
-	for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
-		ifaces[i] = if_nametoindex(argv[optind + i]);
-		if (!ifaces[i])
-			ifaces[i] = strtoul(argv[optind + i], NULL, 0);
-		if (!if_indextoname(ifaces[i], ifname)) {
-			perror("Invalid interface name or i");
-			goto err_out;
-		}
-		if (ifaces[i] > MAX_INDEX_NUM) {
-			printf(" interface index too large\n");
-			goto err_out;
-		}
-		printf(" %d", ifaces[i]);
-	}
-	printf("\n");
-
-	snprintf(filename, sizeof(filename), "%s_kern.bpf.o", argv[0]);
-	obj = bpf_object__open_file(filename, NULL);
-	err = libbpf_get_error(obj);
-	if (err)
-		goto err_out;
-	err = bpf_object__load(obj);
-	if (err)
-		goto err_out;
-	prog_fd = bpf_program__fd(bpf_object__next_program(obj, NULL));
-
-	if (attach_egress_prog)
-		group_all = bpf_object__find_map_fd_by_name(obj, "map_egress");
-	else
-		group_all = bpf_object__find_map_fd_by_name(obj, "map_all");
-	mac_map = bpf_object__find_map_fd_by_name(obj, "mac_map");
-
-	if (group_all < 0 || mac_map < 0) {
-		printf("bpf_object__find_map_fd_by_name failed\n");
-		goto err_out;
-	}
-
-	if (attach_egress_prog) {
-		/* Find ingress/egress prog for 2nd xdp prog */
-		ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_all_prog");
-		egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
-		if (!ingress_prog || !egress_prog) {
-			printf("finding ingress/egress_prog in obj file failed\n");
-			goto err_out;
-		}
-		prog_fd = bpf_program__fd(ingress_prog);
-		egress_prog_fd = bpf_program__fd(egress_prog);
-		if (prog_fd < 0 || egress_prog_fd < 0) {
-			printf("find egress_prog fd failed\n");
-			goto err_out;
-		}
-	}
-
-	signal(SIGINT, int_exit);
-	signal(SIGTERM, int_exit);
-
-	/* Init forward multicast groups and exclude group */
-	for (i = 0; ifaces[i] > 0; i++) {
-		ifindex = ifaces[i];
-
-		if (attach_egress_prog) {
-			ret = get_mac_addr(ifindex, mac_addr);
-			if (ret < 0) {
-				printf("get interface %d mac failed\n", ifindex);
-				goto err_out;
-			}
-			ret = bpf_map_update_elem(mac_map, &ifindex, mac_addr, 0);
-			if (ret) {
-				perror("bpf_update_elem mac_map failed\n");
-				goto err_out;
-			}
-		}
-
-		/* Add all the interfaces to group all */
-		devmap_val.ifindex = ifindex;
-		devmap_val.bpf_prog.fd = egress_prog_fd;
-		ret = bpf_map_update_elem(group_all, &ifindex, &devmap_val, 0);
-		if (ret) {
-			perror("bpf_map_update_elem");
-			goto err_out;
-		}
-
-		/* bind prog_fd to each interface */
-		ret = bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
-		if (ret) {
-			printf("Set xdp fd failed on %d\n", ifindex);
-			goto err_out;
-		}
-	}
-
-	/* sleep some time for testing */
-	sleep(999);
-
-	return 0;
-
-err_out:
-	return 1;
-}
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index 1503a1d2faa0..9ed8c796645d 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -155,7 +155,7 @@ int main(int argc, char **argv)
 	}
 
 	if (!server) {
-		/* Only supports IPv4; see hints initiailization above. */
+		/* Only supports IPv4; see hints initialization above. */
 		if (getaddrinfo(argv[optind], NULL, &hints, &a) || !a) {
 			fprintf(stderr, "Could not resolve %s\n", argv[optind]);
 			return 1;
diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
index 93c2cc413cfc..48729da142c2 100644
--- a/tools/testing/selftests/bpf/xsk.h
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -93,8 +93,8 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
 	/* Refresh the local tail pointer.
 	 * cached_cons is r->size bigger than the real consumer pointer so
 	 * that this addition can be avoided in the more frequently
-	 * executed code that computs free_entries in the beginning of
-	 * this function. Without this optimization it whould have been
+	 * executed code that computes free_entries in the beginning of
+	 * this function. Without this optimization it would have been
 	 * free_entries = r->cached_prod - r->cached_cons + r->size.
 	 */
 	r->cached_cons = __atomic_load_n(r->consumer, __ATOMIC_ACQUIRE);
diff --git a/tools/testing/selftests/bpf/xsk_xdp_common.h b/tools/testing/selftests/bpf/xsk_xdp_common.h
index 5a6f36f07383..45810ff552da 100644
--- a/tools/testing/selftests/bpf/xsk_xdp_common.h
+++ b/tools/testing/selftests/bpf/xsk_xdp_common.h
@@ -4,6 +4,7 @@
 #define XSK_XDP_COMMON_H_
 
 #define MAX_SOCKETS 2
+#define PKT_HDR_ALIGN (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
 
 struct xdp_info {
 	__u64 count;
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 11f047b8af75..05b3cebc5ca9 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -74,42 +74,33 @@
 #define _GNU_SOURCE
 #include <assert.h>
 #include <fcntl.h>
-#include <errno.h>
 #include <getopt.h>
 #include <linux/if_link.h>
 #include <linux/if_ether.h>
 #include <linux/mman.h>
 #include <linux/netdev.h>
-#include <linux/bitmap.h>
 #include <linux/ethtool.h>
 #include <arpa/inet.h>
 #include <net/if.h>
 #include <locale.h>
-#include <poll.h>
-#include <pthread.h>
-#include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <libgen.h>
-#include <string.h>
 #include <stddef.h>
 #include <sys/mman.h>
-#include <sys/socket.h>
-#include <sys/time.h>
 #include <sys/types.h>
-#include <unistd.h>
 
+#include "prog_tests/test_xsk.h"
 #include "xsk_xdp_progs.skel.h"
 #include "xsk.h"
 #include "xskxceiver.h"
 #include <bpf/bpf.h>
 #include <linux/filter.h>
-#include "../kselftest.h"
+#include "kselftest.h"
 #include "xsk_xdp_common.h"
 
 #include <network_helpers.h>
 
-static bool opt_verbose;
 static bool opt_print_tests;
 static enum test_mode opt_mode = TEST_MODE_ALL;
 static u32 opt_run_test = RUN_ALL_TESTS;
@@ -118,169 +109,12 @@ void test__fail(void) { /* for network_helpers.c */ }
 
 static void __exit_with_error(int error, const char *file, const char *func, int line)
 {
-	ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error,
-			      strerror(error));
+	ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line,
+			      error, strerror(error));
 	ksft_exit_xfail();
 }
 
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
-#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : ""
-static char *mode_string(struct test_spec *test)
-{
-	switch (test->mode) {
-	case TEST_MODE_SKB:
-		return "SKB";
-	case TEST_MODE_DRV:
-		return "DRV";
-	case TEST_MODE_ZC:
-		return "ZC";
-	default:
-		return "BOGUS";
-	}
-}
-
-static void report_failure(struct test_spec *test)
-{
-	if (test->fail)
-		return;
-
-	ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test),
-			      test->name);
-	test->fail = true;
-}
-
-/* The payload is a word consisting of a packet sequence number in the upper
- * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's
- * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0.
- */
-static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size)
-{
-	u32 *ptr = (u32 *)dest, i;
-
-	start /= sizeof(*ptr);
-	size /= sizeof(*ptr);
-	for (i = 0; i < size; i++)
-		ptr[i] = htonl(pkt_nb << 16 | (i + start));
-}
-
-static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr)
-{
-	memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN);
-	memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN);
-	eth_hdr->h_proto = htons(ETH_P_LOOPBACK);
-}
-
-static bool is_umem_valid(struct ifobject *ifobj)
-{
-	return !!ifobj->umem->umem;
-}
-
-static u32 mode_to_xdp_flags(enum test_mode mode)
-{
-	return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
-}
-
-static u64 umem_size(struct xsk_umem_info *umem)
-{
-	return umem->num_frames * umem->frame_size;
-}
-
-static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer,
-			      u64 size)
-{
-	struct xsk_umem_config cfg = {
-		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
-		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
-		.frame_size = umem->frame_size,
-		.frame_headroom = umem->frame_headroom,
-		.flags = XSK_UMEM__DEFAULT_FLAGS
-	};
-	int ret;
-
-	if (umem->fill_size)
-		cfg.fill_size = umem->fill_size;
-
-	if (umem->comp_size)
-		cfg.comp_size = umem->comp_size;
-
-	if (umem->unaligned_mode)
-		cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
-
-	ret = xsk_umem__create(&umem->umem, buffer, size,
-			       &umem->fq, &umem->cq, &cfg);
-	if (ret)
-		return ret;
-
-	umem->buffer = buffer;
-	if (ifobj->shared_umem && ifobj->rx_on) {
-		umem->base_addr = umem_size(umem);
-		umem->next_buffer = umem_size(umem);
-	}
-
-	return 0;
-}
-
-static u64 umem_alloc_buffer(struct xsk_umem_info *umem)
-{
-	u64 addr;
-
-	addr = umem->next_buffer;
-	umem->next_buffer += umem->frame_size;
-	if (umem->next_buffer >= umem->base_addr + umem_size(umem))
-		umem->next_buffer = umem->base_addr;
-
-	return addr;
-}
-
-static void umem_reset_alloc(struct xsk_umem_info *umem)
-{
-	umem->next_buffer = 0;
-}
-
-static void enable_busy_poll(struct xsk_socket_info *xsk)
-{
-	int sock_opt;
-
-	sock_opt = 1;
-	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
-		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
-		exit_with_error(errno);
-
-	sock_opt = 20;
-	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
-		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
-		exit_with_error(errno);
-
-	sock_opt = xsk->batch_size;
-	if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
-		       (void *)&sock_opt, sizeof(sock_opt)) < 0)
-		exit_with_error(errno);
-}
-
-static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
-				  struct ifobject *ifobject, bool shared)
-{
-	struct xsk_socket_config cfg = {};
-	struct xsk_ring_cons *rxr;
-	struct xsk_ring_prod *txr;
-
-	xsk->umem = umem;
-	cfg.rx_size = xsk->rxqsize;
-	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-	cfg.bind_flags = ifobject->bind_flags;
-	if (shared)
-		cfg.bind_flags |= XDP_SHARED_UMEM;
-	if (ifobject->mtu > MAX_ETH_PKT_SIZE)
-		cfg.bind_flags |= XDP_USE_SG;
-	if (umem->comp_size)
-		cfg.tx_size = umem->comp_size;
-	if (umem->fill_size)
-		cfg.rx_size = umem->fill_size;
-
-	txr = ifobject->tx_on ? &xsk->tx : NULL;
-	rxr = ifobject->rx_on ? &xsk->rx : NULL;
-	return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg);
-}
 
 static bool ifobj_zc_avail(struct ifobject *ifobject)
 {
@@ -312,7 +146,7 @@ static bool ifobj_zc_avail(struct ifobject *ifobject)
 	ifobject->bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY;
 	ifobject->rx_on = true;
 	xsk->rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
-	ret = __xsk_configure_socket(xsk, umem, ifobject, false);
+	ret = xsk_configure_socket(xsk, umem, ifobject, false);
 	if (!ret)
 		zc_avail = true;
 
@@ -325,25 +159,6 @@ out:
 	return zc_avail;
 }
 
-#define MAX_SKB_FRAGS_PATH "/proc/sys/net/core/max_skb_frags"
-static unsigned int get_max_skb_frags(void)
-{
-	unsigned int max_skb_frags = 0;
-	FILE *file;
-
-	file = fopen(MAX_SKB_FRAGS_PATH, "r");
-	if (!file) {
-		ksft_print_msg("Error opening %s\n", MAX_SKB_FRAGS_PATH);
-		return 0;
-	}
-
-	if (fscanf(file, "%u", &max_skb_frags) != 1)
-		ksft_print_msg("Error reading %s\n", MAX_SKB_FRAGS_PATH);
-
-	fclose(file);
-	return max_skb_frags;
-}
-
 static struct option long_options[] = {
 	{"interface", required_argument, 0, 'i'},
 	{"busy-poll", no_argument, 0, 'b'},
@@ -444,2119 +259,36 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
 	}
 }
 
-static int set_ring_size(struct ifobject *ifobj)
-{
-	int ret;
-	u32 ctr = 0;
-
-	while (ctr++ < SOCK_RECONF_CTR) {
-		ret = set_hw_ring_size(ifobj->ifname, &ifobj->ring);
-		if (!ret)
-			break;
-
-		/* Retry if it fails */
-		if (ctr >= SOCK_RECONF_CTR || errno != EBUSY)
-			return -errno;
-
-		usleep(USLEEP_MAX);
-	}
-
-	return ret;
-}
-
-static int hw_ring_size_reset(struct ifobject *ifobj)
-{
-	ifobj->ring.tx_pending = ifobj->set_ring.default_tx;
-	ifobj->ring.rx_pending = ifobj->set_ring.default_rx;
-	return set_ring_size(ifobj);
-}
-
-static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
-			     struct ifobject *ifobj_rx)
-{
-	u32 i, j;
-
-	for (i = 0; i < MAX_INTERFACES; i++) {
-		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
-
-		ifobj->xsk = &ifobj->xsk_arr[0];
-		ifobj->use_poll = false;
-		ifobj->use_fill_ring = true;
-		ifobj->release_rx = true;
-		ifobj->validation_func = NULL;
-		ifobj->use_metadata = false;
-
-		if (i == 0) {
-			ifobj->rx_on = false;
-			ifobj->tx_on = true;
-		} else {
-			ifobj->rx_on = true;
-			ifobj->tx_on = false;
-		}
-
-		memset(ifobj->umem, 0, sizeof(*ifobj->umem));
-		ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
-		ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
-
-		for (j = 0; j < MAX_SOCKETS; j++) {
-			memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
-			ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
-			ifobj->xsk_arr[j].batch_size = DEFAULT_BATCH_SIZE;
-			if (i == 0)
-				ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default;
-			else
-				ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default;
-
-			memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN);
-			memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN);
-			ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0);
-			ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1);
-		}
-	}
-
-	if (ifobj_tx->hw_ring_size_supp)
-		hw_ring_size_reset(ifobj_tx);
-
-	test->ifobj_tx = ifobj_tx;
-	test->ifobj_rx = ifobj_rx;
-	test->current_step = 0;
-	test->total_steps = 1;
-	test->nb_sockets = 1;
-	test->fail = false;
-	test->set_ring = false;
-	test->mtu = MAX_ETH_PKT_SIZE;
-	test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
-	test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
-	test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog;
-	test->xskmap_tx = ifobj_tx->xdp_progs->maps.xsk;
-}
-
-static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
-			   struct ifobject *ifobj_rx, enum test_mode mode,
-			   const struct test_spec *test_to_run)
-{
-	struct pkt_stream *tx_pkt_stream;
-	struct pkt_stream *rx_pkt_stream;
-	u32 i;
-
-	tx_pkt_stream = test->tx_pkt_stream_default;
-	rx_pkt_stream = test->rx_pkt_stream_default;
-	memset(test, 0, sizeof(*test));
-	test->tx_pkt_stream_default = tx_pkt_stream;
-	test->rx_pkt_stream_default = rx_pkt_stream;
-
-	for (i = 0; i < MAX_INTERFACES; i++) {
-		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
-
-		ifobj->bind_flags = XDP_USE_NEED_WAKEUP;
-		if (mode == TEST_MODE_ZC)
-			ifobj->bind_flags |= XDP_ZEROCOPY;
-		else
-			ifobj->bind_flags |= XDP_COPY;
-	}
-
-	strncpy(test->name, test_to_run->name, MAX_TEST_NAME_SIZE);
-	test->test_func = test_to_run->test_func;
-	test->mode = mode;
-	__test_spec_init(test, ifobj_tx, ifobj_rx);
-}
-
-static void test_spec_reset(struct test_spec *test)
-{
-	__test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
-}
-
-static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx,
-				   struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx,
-				   struct bpf_map *xskmap_tx)
-{
-	test->xdp_prog_rx = xdp_prog_rx;
-	test->xdp_prog_tx = xdp_prog_tx;
-	test->xskmap_rx = xskmap_rx;
-	test->xskmap_tx = xskmap_tx;
-}
-
-static int test_spec_set_mtu(struct test_spec *test, int mtu)
-{
-	int err;
-
-	if (test->ifobj_rx->mtu != mtu) {
-		err = xsk_set_mtu(test->ifobj_rx->ifindex, mtu);
-		if (err)
-			return err;
-		test->ifobj_rx->mtu = mtu;
-	}
-	if (test->ifobj_tx->mtu != mtu) {
-		err = xsk_set_mtu(test->ifobj_tx->ifindex, mtu);
-		if (err)
-			return err;
-		test->ifobj_tx->mtu = mtu;
-	}
-
-	return 0;
-}
-
-static void pkt_stream_reset(struct pkt_stream *pkt_stream)
-{
-	if (pkt_stream) {
-		pkt_stream->current_pkt_nb = 0;
-		pkt_stream->nb_rx_pkts = 0;
-	}
-}
-
-static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream)
-{
-	if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts)
-		return NULL;
-
-	return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
-}
-
-static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent)
-{
-	while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) {
-		(*pkts_sent)++;
-		if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid)
-			return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
-		pkt_stream->current_pkt_nb++;
-	}
-	return NULL;
-}
-
-static void pkt_stream_delete(struct pkt_stream *pkt_stream)
-{
-	free(pkt_stream->pkts);
-	free(pkt_stream);
-}
-
-static void pkt_stream_restore_default(struct test_spec *test)
-{
-	struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream;
-	struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream;
-
-	if (tx_pkt_stream != test->tx_pkt_stream_default) {
-		pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream);
-		test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default;
-	}
-
-	if (rx_pkt_stream != test->rx_pkt_stream_default) {
-		pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream);
-		test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default;
-	}
-}
-
-static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
-{
-	struct pkt_stream *pkt_stream;
-
-	pkt_stream = calloc(1, sizeof(*pkt_stream));
-	if (!pkt_stream)
-		return NULL;
-
-	pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
-	if (!pkt_stream->pkts) {
-		free(pkt_stream);
-		return NULL;
-	}
-
-	pkt_stream->nb_pkts = nb_pkts;
-	return pkt_stream;
-}
-
-static bool pkt_continues(u32 options)
-{
-	return options & XDP_PKT_CONTD;
-}
-
-static u32 ceil_u32(u32 a, u32 b)
-{
-	return (a + b - 1) / b;
-}
-
-static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pkt *pkt)
-{
-	u32 nb_frags = 1, next_frag;
-
-	if (!pkt)
-		return 1;
-
-	if (!pkt_stream->verbatim) {
-		if (!pkt->valid || !pkt->len)
-			return 1;
-		return ceil_u32(pkt->len, frame_size);
-	}
-
-	/* Search for the end of the packet in verbatim mode */
-	if (!pkt_continues(pkt->options))
-		return nb_frags;
-
-	next_frag = pkt_stream->current_pkt_nb;
-	pkt++;
-	while (next_frag++ < pkt_stream->nb_pkts) {
-		nb_frags++;
-		if (!pkt_continues(pkt->options) || !pkt->valid)
-			break;
-		pkt++;
-	}
-	return nb_frags;
-}
-
-static bool set_pkt_valid(int offset, u32 len)
-{
-	return len <= MAX_ETH_JUMBO_SIZE;
-}
-
-static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
-{
-	pkt->offset = offset;
-	pkt->len = len;
-	pkt->valid = set_pkt_valid(offset, len);
-}
-
-static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
-{
-	bool prev_pkt_valid = pkt->valid;
-
-	pkt_set(pkt_stream, pkt, offset, len);
-	pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid;
-}
-
-static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
-{
-	return ceil_u32(len, umem->frame_size) * umem->frame_size;
-}
-
-static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off)
-{
-	struct pkt_stream *pkt_stream;
-	u32 i;
-
-	pkt_stream = __pkt_stream_alloc(nb_pkts);
-	if (!pkt_stream)
-		exit_with_error(ENOMEM);
-
-	pkt_stream->nb_pkts = nb_pkts;
-	pkt_stream->max_pkt_len = pkt_len;
-	for (i = 0; i < nb_pkts; i++) {
-		struct pkt *pkt = &pkt_stream->pkts[i];
-
-		pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len);
-		pkt->pkt_nb = nb_start + i * nb_off;
-	}
-
-	return pkt_stream;
-}
-
-static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
-{
-	return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1);
-}
-
-static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
-{
-	return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
-}
-
-static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
-{
-	struct pkt_stream *pkt_stream;
-
-	pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
-	test->ifobj_tx->xsk->pkt_stream = pkt_stream;
-	pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
-	test->ifobj_rx->xsk->pkt_stream = pkt_stream;
-}
-
-static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
-				      int offset)
-{
-	struct pkt_stream *pkt_stream;
-	u32 i;
-
-	pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream);
-	for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2)
-		pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
-
-	ifobj->xsk->pkt_stream = pkt_stream;
-}
-
-static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
-{
-	__pkt_stream_replace_half(test->ifobj_tx, pkt_len, offset);
-	__pkt_stream_replace_half(test->ifobj_rx, pkt_len, offset);
-}
-
-static void pkt_stream_receive_half(struct test_spec *test)
-{
-	struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream;
-	u32 i;
-
-	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts,
-							      pkt_stream->pkts[0].len);
-	pkt_stream = test->ifobj_rx->xsk->pkt_stream;
-	for (i = 1; i < pkt_stream->nb_pkts; i += 2)
-		pkt_stream->pkts[i].valid = false;
-
-	pkt_stream->nb_valid_entries /= 2;
-}
-
-static void pkt_stream_even_odd_sequence(struct test_spec *test)
-{
-	struct pkt_stream *pkt_stream;
-	u32 i;
-
-	for (i = 0; i < test->nb_sockets; i++) {
-		pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream;
-		pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
-						   pkt_stream->pkts[0].len, i, 2);
-		test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream;
-
-		pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream;
-		pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
-						   pkt_stream->pkts[0].len, i, 2);
-		test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream;
-	}
-}
-
-static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem)
-{
-	if (!pkt->valid)
-		return pkt->offset;
-	return pkt->offset + umem_alloc_buffer(umem);
-}
-
-static void pkt_stream_cancel(struct pkt_stream *pkt_stream)
-{
-	pkt_stream->current_pkt_nb--;
-}
-
-static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len,
-			 u32 pkt_nb, u32 bytes_written)
-{
-	void *data = xsk_umem__get_data(umem->buffer, addr);
-
-	if (len < MIN_PKT_SIZE)
-		return;
-
-	if (!bytes_written) {
-		gen_eth_hdr(xsk, data);
-
-		len -= PKT_HDR_SIZE;
-		data += PKT_HDR_SIZE;
-	} else {
-		bytes_written -= PKT_HDR_SIZE;
-	}
-
-	write_payload(data, pkt_nb, bytes_written, len);
-}
-
-static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, struct pkt *frames,
-						       u32 nb_frames, bool verbatim)
-{
-	u32 i, len = 0, pkt_nb = 0, payload = 0;
-	struct pkt_stream *pkt_stream;
-
-	pkt_stream = __pkt_stream_alloc(nb_frames);
-	if (!pkt_stream)
-		exit_with_error(ENOMEM);
-
-	for (i = 0; i < nb_frames; i++) {
-		struct pkt *pkt = &pkt_stream->pkts[pkt_nb];
-		struct pkt *frame = &frames[i];
-
-		pkt->offset = frame->offset;
-		if (verbatim) {
-			*pkt = *frame;
-			pkt->pkt_nb = payload;
-			if (!frame->valid || !pkt_continues(frame->options))
-				payload++;
-		} else {
-			if (frame->valid)
-				len += frame->len;
-			if (frame->valid && pkt_continues(frame->options))
-				continue;
-
-			pkt->pkt_nb = pkt_nb;
-			pkt->len = len;
-			pkt->valid = frame->valid;
-			pkt->options = 0;
-
-			len = 0;
-		}
-
-		print_verbose("offset: %d len: %u valid: %u options: %u pkt_nb: %u\n",
-			      pkt->offset, pkt->len, pkt->valid, pkt->options, pkt->pkt_nb);
-
-		if (pkt->valid && pkt->len > pkt_stream->max_pkt_len)
-			pkt_stream->max_pkt_len = pkt->len;
-
-		if (pkt->valid)
-			pkt_stream->nb_valid_entries++;
-
-		pkt_nb++;
-	}
-
-	pkt_stream->nb_pkts = pkt_nb;
-	pkt_stream->verbatim = verbatim;
-	return pkt_stream;
-}
-
-static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
-{
-	struct pkt_stream *pkt_stream;
-
-	pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true);
-	test->ifobj_tx->xsk->pkt_stream = pkt_stream;
-
-	pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false);
-	test->ifobj_rx->xsk->pkt_stream = pkt_stream;
-}
-
-static void pkt_print_data(u32 *data, u32 cnt)
-{
-	u32 i;
-
-	for (i = 0; i < cnt; i++) {
-		u32 seqnum, pkt_nb;
-
-		seqnum = ntohl(*data) & 0xffff;
-		pkt_nb = ntohl(*data) >> 16;
-		ksft_print_msg("%u:%u ", pkt_nb, seqnum);
-		data++;
-	}
-}
-
-static void pkt_dump(void *pkt, u32 len, bool eth_header)
-{
-	struct ethhdr *ethhdr = pkt;
-	u32 i, *data;
-
-	if (eth_header) {
-		/*extract L2 frame */
-		ksft_print_msg("DEBUG>> L2: dst mac: ");
-		for (i = 0; i < ETH_ALEN; i++)
-			ksft_print_msg("%02X", ethhdr->h_dest[i]);
-
-		ksft_print_msg("\nDEBUG>> L2: src mac: ");
-		for (i = 0; i < ETH_ALEN; i++)
-			ksft_print_msg("%02X", ethhdr->h_source[i]);
-
-		data = pkt + PKT_HDR_SIZE;
-	} else {
-		data = pkt;
-	}
-
-	/*extract L5 frame */
-	ksft_print_msg("\nDEBUG>> L5: seqnum: ");
-	pkt_print_data(data, PKT_DUMP_NB_TO_PRINT);
-	ksft_print_msg("....");
-	if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) {
-		ksft_print_msg("\n.... ");
-		pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT,
-			       PKT_DUMP_NB_TO_PRINT);
-	}
-	ksft_print_msg("\n---------------------------------------\n");
-}
-
-static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr)
-{
-	u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom;
-	u32 offset = addr % umem->frame_size, expected_offset;
-	int pkt_offset = pkt->valid ? pkt->offset : 0;
-
-	if (!umem->unaligned_mode)
-		pkt_offset = 0;
-
-	expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
-
-	if (offset == expected_offset)
-		return true;
-
-	ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset);
-	return false;
-}
-
-static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
-{
-	void *data = xsk_umem__get_data(buffer, addr);
-	struct xdp_info *meta = data - sizeof(struct xdp_info);
-
-	if (meta->count != pkt->pkt_nb) {
-		ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n",
-			       __func__, pkt->pkt_nb,
-			       (unsigned long long)meta->count);
-		return false;
-	}
-
-	return true;
-}
-
-static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb,
-			  u32 bytes_processed)
-{
-	u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum;
-	void *data = xsk_umem__get_data(umem->buffer, addr);
-
-	addr -= umem->base_addr;
-
-	if (addr >= umem->num_frames * umem->frame_size ||
-	    addr + len > umem->num_frames * umem->frame_size) {
-		ksft_print_msg("Frag invalid addr: %llx len: %u\n",
-			       (unsigned long long)addr, len);
-		return false;
-	}
-	if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
-		ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n",
-			       (unsigned long long)addr, len);
-		return false;
-	}
-
-	pkt_data = data;
-	if (!bytes_processed) {
-		pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data);
-		len -= PKT_HDR_SIZE;
-	} else {
-		bytes_processed -= PKT_HDR_SIZE;
-	}
-
-	expected_seqnum = bytes_processed / sizeof(*pkt_data);
-	seqnum = ntohl(*pkt_data) & 0xffff;
-	pkt_nb = ntohl(*pkt_data) >> 16;
-
-	if (expected_pkt_nb != pkt_nb) {
-		ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n",
-			       __func__, expected_pkt_nb, pkt_nb);
-		goto error;
-	}
-	if (expected_seqnum != seqnum) {
-		ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n",
-			       __func__, expected_seqnum, seqnum);
-		goto error;
-	}
-
-	words_to_end = len / sizeof(*pkt_data) - 1;
-	pkt_data += words_to_end;
-	seqnum = ntohl(*pkt_data) & 0xffff;
-	expected_seqnum += words_to_end;
-	if (expected_seqnum != seqnum) {
-		ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n",
-			       __func__, expected_seqnum, seqnum);
-		goto error;
-	}
-
-	return true;
-
-error:
-	pkt_dump(data, len, !bytes_processed);
-	return false;
-}
-
-static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
-{
-	if (pkt->len != len) {
-		ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n",
-			       __func__, pkt->len, len);
-		pkt_dump(xsk_umem__get_data(buffer, addr), len, true);
-		return false;
-	}
-
-	return true;
-}
-
-static int kick_tx(struct xsk_socket_info *xsk)
-{
-	int ret;
-
-	ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
-	if (ret >= 0)
-		return TEST_PASS;
-	if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
-		usleep(100);
-		return TEST_PASS;
-	}
-	return TEST_FAILURE;
-}
-
-static int kick_rx(struct xsk_socket_info *xsk)
-{
-	int ret;
-
-	ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
-	if (ret < 0)
-		return TEST_FAILURE;
-
-	return TEST_PASS;
-}
-
-static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
-{
-	unsigned int rcvd;
-	u32 idx;
-	int ret;
-
-	if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
-		ret = kick_tx(xsk);
-		if (ret)
-			return TEST_FAILURE;
-	}
-
-	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
-	if (rcvd) {
-		if (rcvd > xsk->outstanding_tx) {
-			u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
-
-			ksft_print_msg("[%s] Too many packets completed\n", __func__);
-			ksft_print_msg("Last completion address: %llx\n",
-				       (unsigned long long)addr);
-			return TEST_FAILURE;
-		}
-
-		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
-		xsk->outstanding_tx -= rcvd;
-	}
-
-	return TEST_PASS;
-}
-
-static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
-{
-	u32 frags_processed = 0, nb_frags = 0, pkt_len = 0;
-	u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0;
-	struct pkt_stream *pkt_stream = xsk->pkt_stream;
-	struct ifobject *ifobj = test->ifobj_rx;
-	struct xsk_umem_info *umem = xsk->umem;
-	struct pollfd fds = { };
-	struct pkt *pkt;
-	u64 first_addr = 0;
-	int ret;
-
-	fds.fd = xsk_socket__fd(xsk->xsk);
-	fds.events = POLLIN;
-
-	ret = kick_rx(xsk);
-	if (ret)
-		return TEST_FAILURE;
-
-	if (ifobj->use_poll) {
-		ret = poll(&fds, 1, POLL_TMOUT);
-		if (ret < 0)
-			return TEST_FAILURE;
-
-		if (!ret) {
-			if (!is_umem_valid(test->ifobj_tx))
-				return TEST_PASS;
-
-			ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
-			return TEST_CONTINUE;
-		}
-
-		if (!(fds.revents & POLLIN))
-			return TEST_CONTINUE;
-	}
-
-	rcvd = xsk_ring_cons__peek(&xsk->rx, xsk->batch_size, &idx_rx);
-	if (!rcvd)
-		return TEST_CONTINUE;
-
-	if (ifobj->use_fill_ring) {
-		ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
-		while (ret != rcvd) {
-			if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
-				ret = poll(&fds, 1, POLL_TMOUT);
-				if (ret < 0)
-					return TEST_FAILURE;
-			}
-			ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
-		}
-	}
-
-	while (frags_processed < rcvd) {
-		const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
-		u64 addr = desc->addr, orig;
-
-		orig = xsk_umem__extract_addr(addr);
-		addr = xsk_umem__add_offset_to_addr(addr);
-
-		if (!nb_frags) {
-			pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
-			if (!pkt) {
-				ksft_print_msg("[%s] received too many packets addr: %lx len %u\n",
-					       __func__, addr, desc->len);
-				return TEST_FAILURE;
-			}
-		}
-
-		print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n",
-			      addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid);
-
-		if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) ||
-		    !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata &&
-		    !is_metadata_correct(pkt, umem->buffer, addr)))
-			return TEST_FAILURE;
-
-		if (!nb_frags++)
-			first_addr = addr;
-		frags_processed++;
-		pkt_len += desc->len;
-		if (ifobj->use_fill_ring)
-			*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
-
-		if (pkt_continues(desc->options))
-			continue;
-
-		/* The complete packet has been received */
-		if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) ||
-		    !is_offset_correct(umem, pkt, addr))
-			return TEST_FAILURE;
-
-		pkt_stream->nb_rx_pkts++;
-		nb_frags = 0;
-		pkt_len = 0;
-	}
-
-	if (nb_frags) {
-		/* In the middle of a packet. Start over from beginning of packet. */
-		idx_rx -= nb_frags;
-		xsk_ring_cons__cancel(&xsk->rx, nb_frags);
-		if (ifobj->use_fill_ring) {
-			idx_fq -= nb_frags;
-			xsk_ring_prod__cancel(&umem->fq, nb_frags);
-		}
-		frags_processed -= nb_frags;
-	}
-
-	if (ifobj->use_fill_ring)
-		xsk_ring_prod__submit(&umem->fq, frags_processed);
-	if (ifobj->release_rx)
-		xsk_ring_cons__release(&xsk->rx, frags_processed);
-
-	pthread_mutex_lock(&pacing_mutex);
-	pkts_in_flight -= pkts_sent;
-	pthread_mutex_unlock(&pacing_mutex);
-	pkts_sent = 0;
-
-return TEST_CONTINUE;
-}
-
-bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num,
-			  unsigned long *bitmap)
-{
-	struct pkt_stream *pkt_stream = xsk->pkt_stream;
-
-	if (!pkt_stream) {
-		__set_bit(sock_num, bitmap);
-		return false;
-	}
-
-	if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) {
-		__set_bit(sock_num, bitmap);
-		if (bitmap_full(bitmap, test->nb_sockets))
-			return true;
-	}
-
-	return false;
-}
-
-static int receive_pkts(struct test_spec *test)
-{
-	struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
-	DECLARE_BITMAP(bitmap, test->nb_sockets);
-	struct xsk_socket_info *xsk;
-	u32 sock_num = 0;
-	int res, ret;
-
-	ret = gettimeofday(&tv_now, NULL);
-	if (ret)
-		exit_with_error(errno);
-
-	timeradd(&tv_now, &tv_timeout, &tv_end);
-
-	while (1) {
-		xsk = &test->ifobj_rx->xsk_arr[sock_num];
-
-		if ((all_packets_received(test, xsk, sock_num, bitmap)))
-			break;
-
-		res = __receive_pkts(test, xsk);
-		if (!(res == TEST_PASS || res == TEST_CONTINUE))
-			return res;
-
-		ret = gettimeofday(&tv_now, NULL);
-		if (ret)
-			exit_with_error(errno);
-
-		if (timercmp(&tv_now, &tv_end, >)) {
-			ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
-			return TEST_FAILURE;
-		}
-		sock_num = (sock_num + 1) % test->nb_sockets;
-	}
-
-	return TEST_PASS;
-}
-
-static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout)
-{
-	u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len;
-	struct pkt_stream *pkt_stream = xsk->pkt_stream;
-	struct xsk_umem_info *umem = ifobject->umem;
-	bool use_poll = ifobject->use_poll;
-	struct pollfd fds = { };
-	int ret;
-
-	buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
-	/* pkts_in_flight might be negative if many invalid packets are sent */
-	if (pkts_in_flight >= (int)((umem_size(umem) - xsk->batch_size * buffer_len) /
-	    buffer_len)) {
-		ret = kick_tx(xsk);
-		if (ret)
-			return TEST_FAILURE;
-		return TEST_CONTINUE;
-	}
-
-	fds.fd = xsk_socket__fd(xsk->xsk);
-	fds.events = POLLOUT;
-
-	while (xsk_ring_prod__reserve(&xsk->tx, xsk->batch_size, &idx) < xsk->batch_size) {
-		if (use_poll) {
-			ret = poll(&fds, 1, POLL_TMOUT);
-			if (timeout) {
-				if (ret < 0) {
-					ksft_print_msg("ERROR: [%s] Poll error %d\n",
-						       __func__, errno);
-					return TEST_FAILURE;
-				}
-				if (ret == 0)
-					return TEST_PASS;
-				break;
-			}
-			if (ret <= 0) {
-				ksft_print_msg("ERROR: [%s] Poll error %d\n",
-					       __func__, errno);
-				return TEST_FAILURE;
-			}
-		}
-
-		complete_pkts(xsk, xsk->batch_size);
-	}
-
-	for (i = 0; i < xsk->batch_size; i++) {
-		struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
-		u32 nb_frags_left, nb_frags, bytes_written = 0;
-
-		if (!pkt)
-			break;
-
-		nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt);
-		if (nb_frags > xsk->batch_size - i) {
-			pkt_stream_cancel(pkt_stream);
-			xsk_ring_prod__cancel(&xsk->tx, xsk->batch_size - i);
-			break;
-		}
-		nb_frags_left = nb_frags;
-
-		while (nb_frags_left--) {
-			struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
-
-			tx_desc->addr = pkt_get_addr(pkt, ifobject->umem);
-			if (pkt_stream->verbatim) {
-				tx_desc->len = pkt->len;
-				tx_desc->options = pkt->options;
-			} else if (nb_frags_left) {
-				tx_desc->len = umem->frame_size;
-				tx_desc->options = XDP_PKT_CONTD;
-			} else {
-				tx_desc->len = pkt->len - bytes_written;
-				tx_desc->options = 0;
-			}
-			if (pkt->valid)
-				pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb,
-					     bytes_written);
-			bytes_written += tx_desc->len;
-
-			print_verbose("Tx addr: %llx len: %u options: %u pkt_nb: %u\n",
-				      tx_desc->addr, tx_desc->len, tx_desc->options, pkt->pkt_nb);
-
-			if (nb_frags_left) {
-				i++;
-				if (pkt_stream->verbatim)
-					pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
-			}
-		}
-
-		if (pkt && pkt->valid) {
-			valid_pkts++;
-			valid_frags += nb_frags;
-		}
-	}
-
-	pthread_mutex_lock(&pacing_mutex);
-	pkts_in_flight += valid_pkts;
-	pthread_mutex_unlock(&pacing_mutex);
-
-	xsk_ring_prod__submit(&xsk->tx, i);
-	xsk->outstanding_tx += valid_frags;
-
-	if (use_poll) {
-		ret = poll(&fds, 1, POLL_TMOUT);
-		if (ret <= 0) {
-			if (ret == 0 && timeout)
-				return TEST_PASS;
-
-			ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret);
-			return TEST_FAILURE;
-		}
-	}
-
-	if (!timeout) {
-		if (complete_pkts(xsk, i))
-			return TEST_FAILURE;
-
-		usleep(10);
-		return TEST_PASS;
-	}
-
-	return TEST_CONTINUE;
-}
-
-static int wait_for_tx_completion(struct xsk_socket_info *xsk)
-{
-	struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
-	int ret;
-
-	ret = gettimeofday(&tv_now, NULL);
-	if (ret)
-		exit_with_error(errno);
-	timeradd(&tv_now, &tv_timeout, &tv_end);
-
-	while (xsk->outstanding_tx) {
-		ret = gettimeofday(&tv_now, NULL);
-		if (ret)
-			exit_with_error(errno);
-		if (timercmp(&tv_now, &tv_end, >)) {
-			ksft_print_msg("ERROR: [%s] Transmission loop timed out\n", __func__);
-			return TEST_FAILURE;
-		}
-
-		complete_pkts(xsk, xsk->batch_size);
-	}
-
-	return TEST_PASS;
-}
-
-bool all_packets_sent(struct test_spec *test, unsigned long *bitmap)
-{
-	return bitmap_full(bitmap, test->nb_sockets);
-}
-
-static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
-{
-	bool timeout = !is_umem_valid(test->ifobj_rx);
-	DECLARE_BITMAP(bitmap, test->nb_sockets);
-	u32 i, ret;
-
-	while (!(all_packets_sent(test, bitmap))) {
-		for (i = 0; i < test->nb_sockets; i++) {
-			struct pkt_stream *pkt_stream;
-
-			pkt_stream = ifobject->xsk_arr[i].pkt_stream;
-			if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) {
-				__set_bit(i, bitmap);
-				continue;
-			}
-			ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout);
-			if (ret == TEST_CONTINUE && !test->fail)
-				continue;
-
-			if ((ret || test->fail) && !timeout)
-				return TEST_FAILURE;
-
-			if (ret == TEST_PASS && timeout)
-				return ret;
-
-			ret = wait_for_tx_completion(&ifobject->xsk_arr[i]);
-			if (ret)
-				return TEST_FAILURE;
-		}
-	}
-
-	return TEST_PASS;
-}
-
-static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats)
-{
-	int fd = xsk_socket__fd(xsk), err;
-	socklen_t optlen, expected_len;
-
-	optlen = sizeof(*stats);
-	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen);
-	if (err) {
-		ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
-			       __func__, -err, strerror(-err));
-		return TEST_FAILURE;
-	}
-
-	expected_len = sizeof(struct xdp_statistics);
-	if (optlen != expected_len) {
-		ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n",
-			       __func__, expected_len, optlen);
-		return TEST_FAILURE;
-	}
-
-	return TEST_PASS;
-}
-
-static int validate_rx_dropped(struct ifobject *ifobject)
-{
-	struct xsk_socket *xsk = ifobject->xsk->xsk;
-	struct xdp_statistics stats;
-	int err;
-
-	err = kick_rx(ifobject->xsk);
-	if (err)
-		return TEST_FAILURE;
-
-	err = get_xsk_stats(xsk, &stats);
-	if (err)
-		return TEST_FAILURE;
-
-	/* The receiver calls getsockopt after receiving the last (valid)
-	 * packet which is not the final packet sent in this test (valid and
-	 * invalid packets are sent in alternating fashion with the final
-	 * packet being invalid). Since the last packet may or may not have
-	 * been dropped already, both outcomes must be allowed.
-	 */
-	if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 ||
-	    stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1)
-		return TEST_PASS;
-
-	return TEST_FAILURE;
-}
-
-static int validate_rx_full(struct ifobject *ifobject)
-{
-	struct xsk_socket *xsk = ifobject->xsk->xsk;
-	struct xdp_statistics stats;
-	int err;
-
-	usleep(1000);
-	err = kick_rx(ifobject->xsk);
-	if (err)
-		return TEST_FAILURE;
-
-	err = get_xsk_stats(xsk, &stats);
-	if (err)
-		return TEST_FAILURE;
-
-	if (stats.rx_ring_full)
-		return TEST_PASS;
-
-	return TEST_FAILURE;
-}
-
-static int validate_fill_empty(struct ifobject *ifobject)
-{
-	struct xsk_socket *xsk = ifobject->xsk->xsk;
-	struct xdp_statistics stats;
-	int err;
-
-	usleep(1000);
-	err = kick_rx(ifobject->xsk);
-	if (err)
-		return TEST_FAILURE;
-
-	err = get_xsk_stats(xsk, &stats);
-	if (err)
-		return TEST_FAILURE;
-
-	if (stats.rx_fill_ring_empty_descs)
-		return TEST_PASS;
-
-	return TEST_FAILURE;
-}
-
-static int validate_tx_invalid_descs(struct ifobject *ifobject)
-{
-	struct xsk_socket *xsk = ifobject->xsk->xsk;
-	int fd = xsk_socket__fd(xsk);
-	struct xdp_statistics stats;
-	socklen_t optlen;
-	int err;
-
-	optlen = sizeof(stats);
-	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
-	if (err) {
-		ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
-			       __func__, -err, strerror(-err));
-		return TEST_FAILURE;
-	}
-
-	if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
-		ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n",
-			       __func__,
-			       (unsigned long long)stats.tx_invalid_descs,
-			       ifobject->xsk->pkt_stream->nb_pkts);
-		return TEST_FAILURE;
-	}
-
-	return TEST_PASS;
-}
-
-static void xsk_configure_socket(struct test_spec *test, struct ifobject *ifobject,
-				 struct xsk_umem_info *umem, bool tx)
-{
-	int i, ret;
-
-	for (i = 0; i < test->nb_sockets; i++) {
-		bool shared = (ifobject->shared_umem && tx) ? true : !!i;
-		u32 ctr = 0;
-
-		while (ctr++ < SOCK_RECONF_CTR) {
-			ret = __xsk_configure_socket(&ifobject->xsk_arr[i], umem,
-						     ifobject, shared);
-			if (!ret)
-				break;
-
-			/* Retry if it fails as xsk_socket__create() is asynchronous */
-			if (ctr >= SOCK_RECONF_CTR)
-				exit_with_error(-ret);
-			usleep(USLEEP_MAX);
-		}
-		if (ifobject->busy_poll)
-			enable_busy_poll(&ifobject->xsk_arr[i]);
-	}
-}
-
-static void thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobject)
-{
-	xsk_configure_socket(test, ifobject, test->ifobj_rx->umem, true);
-	ifobject->xsk = &ifobject->xsk_arr[0];
-	ifobject->xskmap = test->ifobj_rx->xskmap;
-	memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info));
-	ifobject->umem->base_addr = 0;
-}
-
-static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream,
-				   bool fill_up)
-{
-	u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM;
-	u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts;
-	int ret;
-
-	if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
-		buffers_to_fill = umem->num_frames;
-	else
-		buffers_to_fill = umem->fill_size;
-
-	ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
-	if (ret != buffers_to_fill)
-		exit_with_error(ENOSPC);
-
-	while (filled < buffers_to_fill) {
-		struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts);
-		u64 addr;
-		u32 i;
-
-		for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt_stream, pkt); i++) {
-			if (!pkt) {
-				if (!fill_up)
-					break;
-				addr = filled * umem->frame_size + umem->base_addr;
-			} else if (pkt->offset >= 0) {
-				addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem);
-			} else {
-				addr = pkt->offset + umem_alloc_buffer(umem);
-			}
-
-			*xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
-			if (++filled >= buffers_to_fill)
-				break;
-		}
-	}
-	xsk_ring_prod__submit(&umem->fq, filled);
-	xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled);
-
-	pkt_stream_reset(pkt_stream);
-	umem_reset_alloc(umem);
-}
-
-static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
-{
-	u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
-	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
-	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
-	void *bufs;
-	int ret;
-	u32 i;
-
-	if (ifobject->umem->unaligned_mode)
-		mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB;
-
-	if (ifobject->shared_umem)
-		umem_sz *= 2;
-
-	bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
-	if (bufs == MAP_FAILED)
-		exit_with_error(errno);
-
-	ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz);
-	if (ret)
-		exit_with_error(-ret);
-
-	xsk_configure_socket(test, ifobject, ifobject->umem, false);
-
-	ifobject->xsk = &ifobject->xsk_arr[0];
-
-	if (!ifobject->rx_on)
-		return;
-
-	xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream, ifobject->use_fill_ring);
-
-	for (i = 0; i < test->nb_sockets; i++) {
-		ifobject->xsk = &ifobject->xsk_arr[i];
-		ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i);
-		if (ret)
-			exit_with_error(errno);
-	}
-}
-
-static void *worker_testapp_validate_tx(void *arg)
-{
-	struct test_spec *test = (struct test_spec *)arg;
-	struct ifobject *ifobject = test->ifobj_tx;
-	int err;
-
-	if (test->current_step == 1) {
-		if (!ifobject->shared_umem)
-			thread_common_ops(test, ifobject);
-		else
-			thread_common_ops_tx(test, ifobject);
-	}
-
-	err = send_pkts(test, ifobject);
-
-	if (!err && ifobject->validation_func)
-		err = ifobject->validation_func(ifobject);
-	if (err)
-		report_failure(test);
-
-	pthread_exit(NULL);
-}
-
-static void *worker_testapp_validate_rx(void *arg)
-{
-	struct test_spec *test = (struct test_spec *)arg;
-	struct ifobject *ifobject = test->ifobj_rx;
-	int err;
-
-	if (test->current_step == 1) {
-		thread_common_ops(test, ifobject);
-	} else {
-		xsk_clear_xskmap(ifobject->xskmap);
-		err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0);
-		if (err) {
-			ksft_print_msg("Error: Failed to update xskmap, error %s\n",
-				       strerror(-err));
-			exit_with_error(-err);
-		}
-	}
-
-	pthread_barrier_wait(&barr);
-
-	err = receive_pkts(test);
-
-	if (!err && ifobject->validation_func)
-		err = ifobject->validation_func(ifobject);
-	if (err)
-		report_failure(test);
-
-	pthread_exit(NULL);
-}
-
-static u64 ceil_u64(u64 a, u64 b)
-{
-	return (a + b - 1) / b;
-}
-
-static void testapp_clean_xsk_umem(struct ifobject *ifobj)
-{
-	u64 umem_sz = ifobj->umem->num_frames * ifobj->umem->frame_size;
-
-	if (ifobj->shared_umem)
-		umem_sz *= 2;
-
-	umem_sz = ceil_u64(umem_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
-	xsk_umem__delete(ifobj->umem->umem);
-	munmap(ifobj->umem->buffer, umem_sz);
-}
-
-static void handler(int signum)
-{
-	pthread_exit(NULL);
-}
-
-static bool xdp_prog_changed_rx(struct test_spec *test)
-{
-	struct ifobject *ifobj = test->ifobj_rx;
-
-	return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode;
-}
-
-static bool xdp_prog_changed_tx(struct test_spec *test)
-{
-	struct ifobject *ifobj = test->ifobj_tx;
-
-	return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode;
-}
-
-static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog,
-			     struct bpf_map *xskmap, enum test_mode mode)
-{
-	int err;
-
-	xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode));
-	err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode));
-	if (err) {
-		ksft_print_msg("Error attaching XDP program\n");
-		exit_with_error(-err);
-	}
-
-	if (ifobj->mode != mode && (mode == TEST_MODE_DRV || mode == TEST_MODE_ZC))
-		if (!xsk_is_in_mode(ifobj->ifindex, XDP_FLAGS_DRV_MODE)) {
-			ksft_print_msg("ERROR: XDP prog not in DRV mode\n");
-			exit_with_error(EINVAL);
-		}
-
-	ifobj->xdp_prog = xdp_prog;
-	ifobj->xskmap = xskmap;
-	ifobj->mode = mode;
-}
-
-static void xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx,
-				 struct ifobject *ifobj_tx)
-{
-	if (xdp_prog_changed_rx(test))
-		xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode);
-
-	if (!ifobj_tx || ifobj_tx->shared_umem)
-		return;
-
-	if (xdp_prog_changed_tx(test))
-		xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode);
-}
-
-static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *ifobj1,
-				      struct ifobject *ifobj2)
-{
-	pthread_t t0, t1;
-	int err;
-
-	if (test->mtu > MAX_ETH_PKT_SIZE) {
-		if (test->mode == TEST_MODE_ZC && (!ifobj1->multi_buff_zc_supp ||
-						   (ifobj2 && !ifobj2->multi_buff_zc_supp))) {
-			ksft_test_result_skip("Multi buffer for zero-copy not supported.\n");
-			return TEST_SKIP;
-		}
-		if (test->mode != TEST_MODE_ZC && (!ifobj1->multi_buff_supp ||
-						   (ifobj2 && !ifobj2->multi_buff_supp))) {
-			ksft_test_result_skip("Multi buffer not supported.\n");
-			return TEST_SKIP;
-		}
-	}
-	err = test_spec_set_mtu(test, test->mtu);
-	if (err) {
-		ksft_print_msg("Error, could not set mtu.\n");
-		exit_with_error(err);
-	}
-
-	if (ifobj2) {
-		if (pthread_barrier_init(&barr, NULL, 2))
-			exit_with_error(errno);
-		pkt_stream_reset(ifobj2->xsk->pkt_stream);
-	}
-
-	test->current_step++;
-	pkt_stream_reset(ifobj1->xsk->pkt_stream);
-	pkts_in_flight = 0;
-
-	signal(SIGUSR1, handler);
-	/*Spawn RX thread */
-	pthread_create(&t0, NULL, ifobj1->func_ptr, test);
-
-	if (ifobj2) {
-		pthread_barrier_wait(&barr);
-		if (pthread_barrier_destroy(&barr))
-			exit_with_error(errno);
-
-		/*Spawn TX thread */
-		pthread_create(&t1, NULL, ifobj2->func_ptr, test);
-
-		pthread_join(t1, NULL);
-	}
-
-	if (!ifobj2)
-		pthread_kill(t0, SIGUSR1);
-	else
-		pthread_join(t0, NULL);
-
-	if (test->total_steps == test->current_step || test->fail) {
-		u32 i;
-
-		if (ifobj2)
-			for (i = 0; i < test->nb_sockets; i++)
-				xsk_socket__delete(ifobj2->xsk_arr[i].xsk);
-
-		for (i = 0; i < test->nb_sockets; i++)
-			xsk_socket__delete(ifobj1->xsk_arr[i].xsk);
-
-		testapp_clean_xsk_umem(ifobj1);
-		if (ifobj2 && !ifobj2->shared_umem)
-			testapp_clean_xsk_umem(ifobj2);
-	}
-
-	return !!test->fail;
-}
-
-static int testapp_validate_traffic(struct test_spec *test)
-{
-	struct ifobject *ifobj_rx = test->ifobj_rx;
-	struct ifobject *ifobj_tx = test->ifobj_tx;
-
-	if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) ||
-	    (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) {
-		ksft_test_result_skip("No huge pages present.\n");
-		return TEST_SKIP;
-	}
-
-	if (test->set_ring) {
-		if (ifobj_tx->hw_ring_size_supp) {
-			if (set_ring_size(ifobj_tx)) {
-				ksft_test_result_skip("Failed to change HW ring size.\n");
-				return TEST_FAILURE;
-			}
-		} else {
-			ksft_test_result_skip("Changing HW ring size not supported.\n");
-			return TEST_SKIP;
-		}
-	}
-
-	xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx);
-	return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx);
-}
-
-static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj)
-{
-	return __testapp_validate_traffic(test, ifobj, NULL);
-}
-
-static int testapp_teardown(struct test_spec *test)
-{
-	int i;
-
-	for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
-		if (testapp_validate_traffic(test))
-			return TEST_FAILURE;
-		test_spec_reset(test);
-	}
-
-	return TEST_PASS;
-}
-
-static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
-{
-	thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr;
-	struct ifobject *tmp_ifobj = (*ifobj1);
-
-	(*ifobj1)->func_ptr = (*ifobj2)->func_ptr;
-	(*ifobj2)->func_ptr = tmp_func_ptr;
-
-	*ifobj1 = *ifobj2;
-	*ifobj2 = tmp_ifobj;
-}
-
-static int testapp_bidirectional(struct test_spec *test)
-{
-	int res;
-
-	test->ifobj_tx->rx_on = true;
-	test->ifobj_rx->tx_on = true;
-	test->total_steps = 2;
-	if (testapp_validate_traffic(test))
-		return TEST_FAILURE;
-
-	print_verbose("Switching Tx/Rx direction\n");
-	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
-	res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
-
-	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
-	return res;
-}
-
-static int swap_xsk_resources(struct test_spec *test)
-{
-	int ret;
-
-	test->ifobj_tx->xsk_arr[0].pkt_stream = NULL;
-	test->ifobj_rx->xsk_arr[0].pkt_stream = NULL;
-	test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default;
-	test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default;
-	test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1];
-	test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1];
-
-	ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0);
-	if (ret)
-		return TEST_FAILURE;
-
-	return TEST_PASS;
-}
-
-static int testapp_xdp_prog_cleanup(struct test_spec *test)
-{
-	test->total_steps = 2;
-	test->nb_sockets = 2;
-	if (testapp_validate_traffic(test))
-		return TEST_FAILURE;
-
-	if (swap_xsk_resources(test))
-		return TEST_FAILURE;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_headroom(struct test_spec *test)
-{
-	test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_stats_rx_dropped(struct test_spec *test)
-{
-	if (test->mode == TEST_MODE_ZC) {
-		ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n");
-		return TEST_SKIP;
-	}
-
-	pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0);
-	test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
-		XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
-	pkt_stream_receive_half(test);
-	test->ifobj_rx->validation_func = validate_rx_dropped;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_stats_tx_invalid_descs(struct test_spec *test)
-{
-	pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0);
-	test->ifobj_tx->validation_func = validate_tx_invalid_descs;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_stats_rx_full(struct test_spec *test)
-{
-	pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
-	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
-
-	test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
-	test->ifobj_rx->release_rx = false;
-	test->ifobj_rx->validation_func = validate_rx_full;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_stats_fill_empty(struct test_spec *test)
-{
-	pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
-	test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
-
-	test->ifobj_rx->use_fill_ring = false;
-	test->ifobj_rx->validation_func = validate_fill_empty;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_send_receive_unaligned(struct test_spec *test)
-{
-	test->ifobj_tx->umem->unaligned_mode = true;
-	test->ifobj_rx->umem->unaligned_mode = true;
-	/* Let half of the packets straddle a 4K buffer boundary */
-	pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2);
-
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_send_receive_unaligned_mb(struct test_spec *test)
-{
-	test->mtu = MAX_ETH_JUMBO_SIZE;
-	test->ifobj_tx->umem->unaligned_mode = true;
-	test->ifobj_rx->umem->unaligned_mode = true;
-	pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_single_pkt(struct test_spec *test)
-{
-	struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}};
-
-	pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_send_receive_mb(struct test_spec *test)
-{
-	test->mtu = MAX_ETH_JUMBO_SIZE;
-	pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
-
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_invalid_desc_mb(struct test_spec *test)
-{
-	struct xsk_umem_info *umem = test->ifobj_tx->umem;
-	u64 umem_size = umem->num_frames * umem->frame_size;
-	struct pkt pkts[] = {
-		/* Valid packet for synch to start with */
-		{0, MIN_PKT_SIZE, 0, true, 0},
-		/* Zero frame len is not legal */
-		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
-		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
-		{0, 0, 0, false, 0},
-		/* Invalid address in the second frame */
-		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
-		{umem_size, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
-		/* Invalid len in the middle */
-		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
-		{0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
-		/* Invalid options in the middle */
-		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
-		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XSK_DESC__INVALID_OPTION},
-		/* Transmit 2 frags, receive 3 */
-		{0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, XDP_PKT_CONTD},
-		{0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, 0},
-		/* Middle frame crosses chunk boundary with small length */
-		{0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
-		{-MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false, 0},
-		/* Valid packet for synch so that something is received */
-		{0, MIN_PKT_SIZE, 0, true, 0}};
-
-	if (umem->unaligned_mode) {
-		/* Crossing a chunk boundary allowed */
-		pkts[12].valid = true;
-		pkts[13].valid = true;
-	}
-
-	test->mtu = MAX_ETH_JUMBO_SIZE;
-	pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_invalid_desc(struct test_spec *test)
-{
-	struct xsk_umem_info *umem = test->ifobj_tx->umem;
-	u64 umem_size = umem->num_frames * umem->frame_size;
-	struct pkt pkts[] = {
-		/* Zero packet address allowed */
-		{0, MIN_PKT_SIZE, 0, true},
-		/* Allowed packet */
-		{0, MIN_PKT_SIZE, 0, true},
-		/* Straddling the start of umem */
-		{-2, MIN_PKT_SIZE, 0, false},
-		/* Packet too large */
-		{0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
-		/* Up to end of umem allowed */
-		{umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true},
-		/* After umem ends */
-		{umem_size, MIN_PKT_SIZE, 0, false},
-		/* Straddle the end of umem */
-		{umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
-		/* Straddle a 4K boundary */
-		{0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
-		/* Straddle a 2K boundary */
-		{0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true},
-		/* Valid packet for synch so that something is received */
-		{0, MIN_PKT_SIZE, 0, true}};
-
-	if (umem->unaligned_mode) {
-		/* Crossing a page boundary allowed */
-		pkts[7].valid = true;
-	}
-	if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
-		/* Crossing a 2K frame size boundary not allowed */
-		pkts[8].valid = false;
-	}
-
-	if (test->ifobj_tx->shared_umem) {
-		pkts[4].offset += umem_size;
-		pkts[5].offset += umem_size;
-		pkts[6].offset += umem_size;
-	}
-
-	pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_xdp_drop(struct test_spec *test)
-{
-	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
-	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
-
-	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop,
-			       skel_rx->maps.xsk, skel_tx->maps.xsk);
-
-	pkt_stream_receive_half(test);
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_xdp_metadata_copy(struct test_spec *test)
-{
-	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
-	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
-	struct bpf_map *data_map;
-	int count = 0;
-	int key = 0;
-
-	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata,
-			       skel_tx->progs.xsk_xdp_populate_metadata,
-			       skel_rx->maps.xsk, skel_tx->maps.xsk);
-	test->ifobj_rx->use_metadata = true;
-
-	data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
-	if (!data_map || !bpf_map__is_internal(data_map)) {
-		ksft_print_msg("Error: could not find bss section of XDP program\n");
-		return TEST_FAILURE;
-	}
-
-	if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) {
-		ksft_print_msg("Error: could not update count element\n");
-		return TEST_FAILURE;
-	}
-
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_xdp_shared_umem(struct test_spec *test)
-{
-	struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
-	struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
-
-	test->total_steps = 1;
-	test->nb_sockets = 2;
-
-	test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem,
-			       skel_tx->progs.xsk_xdp_shared_umem,
-			       skel_rx->maps.xsk, skel_tx->maps.xsk);
-
-	pkt_stream_even_odd_sequence(test);
-
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_poll_txq_tmout(struct test_spec *test)
-{
-	test->ifobj_tx->use_poll = true;
-	/* create invalid frame by set umem frame_size and pkt length equal to 2048 */
-	test->ifobj_tx->umem->frame_size = 2048;
-	pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048);
-	return testapp_validate_traffic_single_thread(test, test->ifobj_tx);
-}
-
-static int testapp_poll_rxq_tmout(struct test_spec *test)
-{
-	test->ifobj_rx->use_poll = true;
-	return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
-}
-
-static int testapp_too_many_frags(struct test_spec *test)
-{
-	struct pkt *pkts;
-	u32 max_frags, i;
-	int ret;
-
-	if (test->mode == TEST_MODE_ZC) {
-		max_frags = test->ifobj_tx->xdp_zc_max_segs;
-	} else {
-		max_frags = get_max_skb_frags();
-		if (!max_frags) {
-			ksft_print_msg("Couldn't retrieve MAX_SKB_FRAGS from system, using default (17) value\n");
-			max_frags = 17;
-		}
-		max_frags += 1;
-	}
-
-	pkts = calloc(2 * max_frags + 2, sizeof(struct pkt));
-	if (!pkts)
-		return TEST_FAILURE;
-
-	test->mtu = MAX_ETH_JUMBO_SIZE;
-
-	/* Valid packet for synch */
-	pkts[0].len = MIN_PKT_SIZE;
-	pkts[0].valid = true;
-
-	/* One valid packet with the max amount of frags */
-	for (i = 1; i < max_frags + 1; i++) {
-		pkts[i].len = MIN_PKT_SIZE;
-		pkts[i].options = XDP_PKT_CONTD;
-		pkts[i].valid = true;
-	}
-	pkts[max_frags].options = 0;
-
-	/* An invalid packet with the max amount of frags but signals packet
-	 * continues on the last frag
-	 */
-	for (i = max_frags + 1; i < 2 * max_frags + 1; i++) {
-		pkts[i].len = MIN_PKT_SIZE;
-		pkts[i].options = XDP_PKT_CONTD;
-		pkts[i].valid = false;
-	}
-
-	/* Valid packet for synch */
-	pkts[2 * max_frags + 1].len = MIN_PKT_SIZE;
-	pkts[2 * max_frags + 1].valid = true;
-
-	pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2);
-	ret = testapp_validate_traffic(test);
-
-	free(pkts);
-	return ret;
-}
-
-static int xsk_load_xdp_programs(struct ifobject *ifobj)
-{
-	ifobj->xdp_progs = xsk_xdp_progs__open_and_load();
-	if (libbpf_get_error(ifobj->xdp_progs))
-		return libbpf_get_error(ifobj->xdp_progs);
-
-	return 0;
-}
-
 static void xsk_unload_xdp_programs(struct ifobject *ifobj)
 {
 	xsk_xdp_progs__destroy(ifobj->xdp_progs);
 }
 
-/* Simple test */
-static bool hugepages_present(void)
-{
-	size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE;
-	void *bufs;
-
-	bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
-		    MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB);
-	if (bufs == MAP_FAILED)
-		return false;
-
-	mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
-	munmap(bufs, mmap_sz);
-	return true;
-}
-
-static void init_iface(struct ifobject *ifobj, thread_func_t func_ptr)
-{
-	LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
-	int err;
-
-	ifobj->func_ptr = func_ptr;
-
-	err = xsk_load_xdp_programs(ifobj);
-	if (err) {
-		ksft_print_msg("Error loading XDP program\n");
-		exit_with_error(err);
-	}
-
-	if (hugepages_present())
-		ifobj->unaligned_supp = true;
-
-	err = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &query_opts);
-	if (err) {
-		ksft_print_msg("Error querying XDP capabilities\n");
-		exit_with_error(-err);
-	}
-	if (query_opts.feature_flags & NETDEV_XDP_ACT_RX_SG)
-		ifobj->multi_buff_supp = true;
-	if (query_opts.feature_flags & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
-		if (query_opts.xdp_zc_max_segs > 1) {
-			ifobj->multi_buff_zc_supp = true;
-			ifobj->xdp_zc_max_segs = query_opts.xdp_zc_max_segs;
-		} else {
-			ifobj->xdp_zc_max_segs = 0;
-		}
-	}
-}
-
-static int testapp_send_receive(struct test_spec *test)
-{
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_send_receive_2k_frame(struct test_spec *test)
-{
-	test->ifobj_tx->umem->frame_size = 2048;
-	test->ifobj_rx->umem->frame_size = 2048;
-	pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_poll_rx(struct test_spec *test)
-{
-	test->ifobj_rx->use_poll = true;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_poll_tx(struct test_spec *test)
-{
-	test->ifobj_tx->use_poll = true;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_aligned_inv_desc(struct test_spec *test)
-{
-	return testapp_invalid_desc(test);
-}
-
-static int testapp_aligned_inv_desc_2k_frame(struct test_spec *test)
-{
-	test->ifobj_tx->umem->frame_size = 2048;
-	test->ifobj_rx->umem->frame_size = 2048;
-	return testapp_invalid_desc(test);
-}
-
-static int testapp_unaligned_inv_desc(struct test_spec *test)
-{
-	test->ifobj_tx->umem->unaligned_mode = true;
-	test->ifobj_rx->umem->unaligned_mode = true;
-	return testapp_invalid_desc(test);
-}
-
-static int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test)
-{
-	u64 page_size, umem_size;
-
-	/* Odd frame size so the UMEM doesn't end near a page boundary. */
-	test->ifobj_tx->umem->frame_size = 4001;
-	test->ifobj_rx->umem->frame_size = 4001;
-	test->ifobj_tx->umem->unaligned_mode = true;
-	test->ifobj_rx->umem->unaligned_mode = true;
-	/* This test exists to test descriptors that staddle the end of
-	 * the UMEM but not a page.
-	 */
-	page_size = sysconf(_SC_PAGESIZE);
-	umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
-	assert(umem_size % page_size > MIN_PKT_SIZE);
-	assert(umem_size % page_size < page_size - MIN_PKT_SIZE);
-
-	return testapp_invalid_desc(test);
-}
-
-static int testapp_aligned_inv_desc_mb(struct test_spec *test)
-{
-	return testapp_invalid_desc_mb(test);
-}
-
-static int testapp_unaligned_inv_desc_mb(struct test_spec *test)
-{
-	test->ifobj_tx->umem->unaligned_mode = true;
-	test->ifobj_rx->umem->unaligned_mode = true;
-	return testapp_invalid_desc_mb(test);
-}
-
-static int testapp_xdp_metadata(struct test_spec *test)
-{
-	return testapp_xdp_metadata_copy(test);
-}
-
-static int testapp_xdp_metadata_mb(struct test_spec *test)
-{
-	test->mtu = MAX_ETH_JUMBO_SIZE;
-	return testapp_xdp_metadata_copy(test);
-}
-
-static int testapp_hw_sw_min_ring_size(struct test_spec *test)
-{
-	int ret;
-
-	test->set_ring = true;
-	test->total_steps = 2;
-	test->ifobj_tx->ring.tx_pending = DEFAULT_BATCH_SIZE;
-	test->ifobj_tx->ring.rx_pending = DEFAULT_BATCH_SIZE * 2;
-	test->ifobj_tx->xsk->batch_size = 1;
-	test->ifobj_rx->xsk->batch_size = 1;
-	ret = testapp_validate_traffic(test);
-	if (ret)
-		return ret;
-
-	/* Set batch size to hw_ring_size - 1 */
-	test->ifobj_tx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
-	test->ifobj_rx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
-	return testapp_validate_traffic(test);
-}
-
-static int testapp_hw_sw_max_ring_size(struct test_spec *test)
-{
-	u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 4;
-	int ret;
-
-	test->set_ring = true;
-	test->total_steps = 2;
-	test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending;
-	test->ifobj_tx->ring.rx_pending  = test->ifobj_tx->ring.rx_max_pending;
-	test->ifobj_rx->umem->num_frames = max_descs;
-	test->ifobj_rx->umem->fill_size = max_descs;
-	test->ifobj_rx->umem->comp_size = max_descs;
-	test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-	test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-
-	ret = testapp_validate_traffic(test);
-	if (ret)
-		return ret;
-
-	/* Set batch_size to 8152 for testing, as the ice HW ignores the 3 lowest bits when
-	 * updating the Rx HW tail register.
-	 */
-	test->ifobj_tx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
-	test->ifobj_rx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
-	pkt_stream_replace(test, max_descs, MIN_PKT_SIZE);
-	return testapp_validate_traffic(test);
-}
-
 static void run_pkt_test(struct test_spec *test)
 {
 	int ret;
 
 	ret = test->test_func(test);
 
-	if (ret == TEST_PASS)
+	switch (ret) {
+	case TEST_PASS:
 		ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test),
 				      test->name);
-	pkt_stream_restore_default(test);
-}
-
-static struct ifobject *ifobject_create(void)
-{
-	struct ifobject *ifobj;
-
-	ifobj = calloc(1, sizeof(struct ifobject));
-	if (!ifobj)
-		return NULL;
-
-	ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr));
-	if (!ifobj->xsk_arr)
-		goto out_xsk_arr;
-
-	ifobj->umem = calloc(1, sizeof(*ifobj->umem));
-	if (!ifobj->umem)
-		goto out_umem;
-
-	return ifobj;
-
-out_umem:
-	free(ifobj->xsk_arr);
-out_xsk_arr:
-	free(ifobj);
-	return NULL;
-}
+		break;
+	case TEST_SKIP:
+		ksft_test_result_skip("SKIP: %s %s%s\n", mode_string(test), busy_poll_string(test),
+				      test->name);
+		break;
+	case TEST_FAILURE:
+		ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test),
+				      test->name);
+		break;
+	default:
+		ksft_test_result_fail("FAIL: %s %s%s -- Unexpected returned value (%d)\n",
+				      mode_string(test), busy_poll_string(test), test->name, ret);
+	}
 
-static void ifobject_delete(struct ifobject *ifobj)
-{
-	free(ifobj->umem);
-	free(ifobj->xsk_arr);
-	free(ifobj);
+	pkt_stream_restore_default(test);
 }
 
 static bool is_xdp_supported(int ifindex)
@@ -2587,42 +319,6 @@ static bool is_xdp_supported(int ifindex)
 	return true;
 }
 
-static const struct test_spec tests[] = {
-	{.name = "SEND_RECEIVE", .test_func = testapp_send_receive},
-	{.name = "SEND_RECEIVE_2K_FRAME", .test_func = testapp_send_receive_2k_frame},
-	{.name = "SEND_RECEIVE_SINGLE_PKT", .test_func = testapp_single_pkt},
-	{.name = "POLL_RX", .test_func = testapp_poll_rx},
-	{.name = "POLL_TX", .test_func = testapp_poll_tx},
-	{.name = "POLL_RXQ_FULL", .test_func = testapp_poll_rxq_tmout},
-	{.name = "POLL_TXQ_FULL", .test_func = testapp_poll_txq_tmout},
-	{.name = "SEND_RECEIVE_UNALIGNED", .test_func = testapp_send_receive_unaligned},
-	{.name = "ALIGNED_INV_DESC", .test_func = testapp_aligned_inv_desc},
-	{.name = "ALIGNED_INV_DESC_2K_FRAME_SIZE", .test_func = testapp_aligned_inv_desc_2k_frame},
-	{.name = "UNALIGNED_INV_DESC", .test_func = testapp_unaligned_inv_desc},
-	{.name = "UNALIGNED_INV_DESC_4001_FRAME_SIZE",
-	 .test_func = testapp_unaligned_inv_desc_4001_frame},
-	{.name = "UMEM_HEADROOM", .test_func = testapp_headroom},
-	{.name = "TEARDOWN", .test_func = testapp_teardown},
-	{.name = "BIDIRECTIONAL", .test_func = testapp_bidirectional},
-	{.name = "STAT_RX_DROPPED", .test_func = testapp_stats_rx_dropped},
-	{.name = "STAT_TX_INVALID", .test_func = testapp_stats_tx_invalid_descs},
-	{.name = "STAT_RX_FULL", .test_func = testapp_stats_rx_full},
-	{.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty},
-	{.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup},
-	{.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop},
-	{.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem},
-	{.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata},
-	{.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb},
-	{.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb},
-	{.name = "SEND_RECEIVE_UNALIGNED_9K_PACKETS",
-	 .test_func = testapp_send_receive_unaligned_mb},
-	{.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb},
-	{.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb},
-	{.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
-	{.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size},
-	{.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size},
-	};
-
 static void print_tests(void)
 {
 	u32 i;
@@ -2630,10 +326,13 @@ static void print_tests(void)
 	printf("Tests:\n");
 	for (i = 0; i < ARRAY_SIZE(tests); i++)
 		printf("%u: %s\n", i, tests[i].name);
+	for (i = ARRAY_SIZE(tests); i < ARRAY_SIZE(tests) + ARRAY_SIZE(ci_skip_tests); i++)
+		printf("%u: %s\n", i, ci_skip_tests[i - ARRAY_SIZE(tests)].name);
 }
 
 int main(int argc, char **argv)
 {
+	const size_t total_tests = ARRAY_SIZE(tests) + ARRAY_SIZE(ci_skip_tests);
 	struct pkt_stream *rx_pkt_stream_default;
 	struct pkt_stream *tx_pkt_stream_default;
 	struct ifobject *ifobj_tx, *ifobj_rx;
@@ -2661,7 +360,7 @@ int main(int argc, char **argv)
 		print_tests();
 		ksft_exit_xpass();
 	}
-	if (opt_run_test != RUN_ALL_TESTS && opt_run_test >= ARRAY_SIZE(tests)) {
+	if (opt_run_test != RUN_ALL_TESTS && opt_run_test >= total_tests) {
 		ksft_print_msg("Error: test %u does not exist.\n", opt_run_test);
 		ksft_exit_xfail();
 	}
@@ -2686,10 +385,13 @@ int main(int argc, char **argv)
 		ifobj_tx->set_ring.default_rx = ifobj_tx->ring.rx_pending;
 	}
 
-	init_iface(ifobj_rx, worker_testapp_validate_rx);
-	init_iface(ifobj_tx, worker_testapp_validate_tx);
+	if (init_iface(ifobj_rx, worker_testapp_validate_rx) ||
+	    init_iface(ifobj_tx, worker_testapp_validate_tx)) {
+		ksft_print_msg("Error : can't initialize interfaces\n");
+		ksft_exit_xfail();
+	}
 
-	test_spec_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
+	test_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
 	tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
 	rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
 	if (!tx_pkt_stream_default || !rx_pkt_stream_default)
@@ -2698,7 +400,7 @@ int main(int argc, char **argv)
 	test.rx_pkt_stream_default = rx_pkt_stream_default;
 
 	if (opt_run_test == RUN_ALL_TESTS)
-		nb_tests = ARRAY_SIZE(tests);
+		nb_tests = total_tests;
 	else
 		nb_tests = 1;
 	if (opt_mode == TEST_MODE_ALL) {
@@ -2720,11 +422,15 @@ int main(int argc, char **argv)
 		if (opt_mode != TEST_MODE_ALL && i != opt_mode)
 			continue;
 
-		for (j = 0; j < ARRAY_SIZE(tests); j++) {
+		for (j = 0; j < total_tests; j++) {
 			if (opt_run_test != RUN_ALL_TESTS && j != opt_run_test)
 				continue;
 
-			test_spec_init(&test, ifobj_tx, ifobj_rx, i, &tests[j]);
+			if (j < ARRAY_SIZE(tests))
+				test_init(&test, ifobj_tx, ifobj_rx, i, &tests[j]);
+			else
+				test_init(&test, ifobj_tx, ifobj_rx, i,
+					  &ci_skip_tests[j - ARRAY_SIZE(tests)]);
 			run_pkt_test(&test);
 			usleep(USLEEP_MAX);
 
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index e46e823f6a1a..3ca518df23ad 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -22,166 +22,13 @@
 #define PF_XDP AF_XDP
 #endif
 
-#ifndef SO_BUSY_POLL_BUDGET
-#define SO_BUSY_POLL_BUDGET 70
-#endif
-
-#ifndef SO_PREFER_BUSY_POLL
-#define SO_PREFER_BUSY_POLL 69
-#endif
-
-#define TEST_PASS 0
-#define TEST_FAILURE -1
-#define TEST_CONTINUE 1
-#define TEST_SKIP 2
-#define MAX_INTERFACES 2
-#define MAX_INTERFACE_NAME_CHARS 16
-#define MAX_TEST_NAME_SIZE 48
 #define MAX_TEARDOWN_ITER 10
-#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
-#define MIN_PKT_SIZE 64
-#define MAX_ETH_PKT_SIZE 1518
 #define MAX_ETH_JUMBO_SIZE 9000
-#define USLEEP_MAX 10000
 #define SOCK_RECONF_CTR 10
-#define DEFAULT_BATCH_SIZE 64
-#define POLL_TMOUT 1000
-#define THREAD_TMOUT 3
-#define DEFAULT_PKT_CNT (4 * 1024)
-#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
 #define RX_FULL_RXQSIZE 32
 #define UMEM_HEADROOM_TEST_SIZE 128
 #define XSK_UMEM__INVALID_FRAME_SIZE (MAX_ETH_JUMBO_SIZE + 1)
-#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024)
-#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024)
-#define XSK_DESC__INVALID_OPTION (0xffff)
-#define HUGEPAGE_SIZE (2 * 1024 * 1024)
-#define PKT_DUMP_NB_TO_PRINT 16
 #define RUN_ALL_TESTS UINT_MAX
 #define NUM_MAC_ADDRESSES 4
 
-#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
-
-enum test_mode {
-	TEST_MODE_SKB,
-	TEST_MODE_DRV,
-	TEST_MODE_ZC,
-	TEST_MODE_ALL
-};
-
-struct xsk_umem_info {
-	struct xsk_ring_prod fq;
-	struct xsk_ring_cons cq;
-	struct xsk_umem *umem;
-	u64 next_buffer;
-	u32 num_frames;
-	u32 frame_headroom;
-	void *buffer;
-	u32 frame_size;
-	u32 base_addr;
-	u32 fill_size;
-	u32 comp_size;
-	bool unaligned_mode;
-};
-
-struct xsk_socket_info {
-	struct xsk_ring_cons rx;
-	struct xsk_ring_prod tx;
-	struct xsk_umem_info *umem;
-	struct xsk_socket *xsk;
-	struct pkt_stream *pkt_stream;
-	u32 outstanding_tx;
-	u32 rxqsize;
-	u32 batch_size;
-	u8 dst_mac[ETH_ALEN];
-	u8 src_mac[ETH_ALEN];
-};
-
-struct pkt {
-	int offset;
-	u32 len;
-	u32 pkt_nb;
-	bool valid;
-	u16 options;
-};
-
-struct pkt_stream {
-	u32 nb_pkts;
-	u32 current_pkt_nb;
-	struct pkt *pkts;
-	u32 max_pkt_len;
-	u32 nb_rx_pkts;
-	u32 nb_valid_entries;
-	bool verbatim;
-};
-
-struct set_hw_ring {
-	u32 default_tx;
-	u32 default_rx;
-};
-
-struct ifobject;
-struct test_spec;
-typedef int (*validation_func_t)(struct ifobject *ifobj);
-typedef void *(*thread_func_t)(void *arg);
-typedef int (*test_func_t)(struct test_spec *test);
-
-struct ifobject {
-	char ifname[MAX_INTERFACE_NAME_CHARS];
-	struct xsk_socket_info *xsk;
-	struct xsk_socket_info *xsk_arr;
-	struct xsk_umem_info *umem;
-	thread_func_t func_ptr;
-	validation_func_t validation_func;
-	struct xsk_xdp_progs *xdp_progs;
-	struct bpf_map *xskmap;
-	struct bpf_program *xdp_prog;
-	struct ethtool_ringparam ring;
-	struct set_hw_ring set_ring;
-	enum test_mode mode;
-	int ifindex;
-	int mtu;
-	u32 bind_flags;
-	u32 xdp_zc_max_segs;
-	bool tx_on;
-	bool rx_on;
-	bool use_poll;
-	bool busy_poll;
-	bool use_fill_ring;
-	bool release_rx;
-	bool shared_umem;
-	bool use_metadata;
-	bool unaligned_supp;
-	bool multi_buff_supp;
-	bool multi_buff_zc_supp;
-	bool hw_ring_size_supp;
-};
-
-struct test_spec {
-	struct ifobject *ifobj_tx;
-	struct ifobject *ifobj_rx;
-	struct pkt_stream *tx_pkt_stream_default;
-	struct pkt_stream *rx_pkt_stream_default;
-	struct bpf_program *xdp_prog_rx;
-	struct bpf_program *xdp_prog_tx;
-	struct bpf_map *xskmap_rx;
-	struct bpf_map *xskmap_tx;
-	test_func_t test_func;
-	int mtu;
-	u16 total_steps;
-	u16 current_step;
-	u16 nb_sockets;
-	bool fail;
-	bool set_ring;
-	enum test_mode mode;
-	char name[MAX_TEST_NAME_SIZE];
-};
-
-pthread_barrier_t barr;
-pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-int pkts_in_flight;
-
-static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00};
-
 #endif				/* XSKXCEIVER_H_ */